From 5def65008ff92519a828e1ba403e9a46836ca802 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Wed, 21 Feb 2018 11:04:13 +0200 Subject: [PATCH 001/304] Fix zrealloc to behave similarly to je_realloc when size is 0 According to C11, the behavior of realloc with size 0 is now deprecated. it can either behave as free(ptr) and return NULL, or return a valid pointer. but in zmalloc it can lead to zmalloc_oom_handler and panic. and that can affect modules that use it. It looks like both glibc allocator and jemalloc behave like so: realloc(malloc(32),0) returns NULL realloc(NULL,0) returns a valid pointer This commit changes zmalloc to behave the same --- src/zmalloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/zmalloc.c b/src/zmalloc.c index 094dd80f..01ac8c79 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -147,6 +147,10 @@ void *zrealloc(void *ptr, size_t size) { size_t oldsize; void *newptr; + if (size == 0 && ptr!=NULL) { + zfree(ptr); + return NULL; + } if (ptr == NULL) return zmalloc(size); #ifdef HAVE_MALLOC_SIZE oldsize = zmalloc_size(ptr); From 17c5f17686354b28c715b6f16c9c4e8eb2239df4 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Mon, 13 Aug 2018 17:43:29 +0300 Subject: [PATCH 002/304] Add log when server dies of SIGTERM during loading this is very confusing to see the server disappears as if it got SIGKILL when it was not the case. --- src/server.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server.c b/src/server.c index b537ee04..0c665033 100644 --- a/src/server.c +++ b/src/server.c @@ -3780,6 +3780,7 @@ static void sigShutdownHandler(int sig) { rdbRemoveTempFile(getpid()); exit(1); /* Exit with an error since this was not a clean shutdown. */ } else if (server.loading) { + serverLogFromHandler(LL_WARNING, "Received shutdown signal during loading, exiting now."); exit(0); } From 6b818efeb62356358ea5a5187fd02889940d4bd0 Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 31 Dec 2018 11:51:03 +0200 Subject: [PATCH 003/304] Make dbSwapDatabases take args as long This prevents an integer overflow bug. Closes #5737. --- src/db.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index 62c8aa13..bac15a6b 100644 --- a/src/db.c +++ b/src/db.c @@ -997,7 +997,7 @@ void scanDatabaseForReadyLists(redisDb *db) { * * Returns C_ERR if at least one of the DB ids are out of range, otherwise * C_OK is returned. */ -int dbSwapDatabases(int id1, int id2) { +int dbSwapDatabases(long id1, long id2) { if (id1 < 0 || id1 >= server.dbnum || id2 < 0 || id2 >= server.dbnum) return C_ERR; if (id1 == id2) return C_OK; From b00e1891c04bd375bb7d9877397c675335f8b40c Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Tue, 15 Jan 2019 07:26:19 +0000 Subject: [PATCH 004/304] Fixed a rounding bug in geo.tcl --- tests/unit/geo.tcl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl index 604697be..49e421ee 100644 --- a/tests/unit/geo.tcl +++ b/tests/unit/geo.tcl @@ -61,6 +61,7 @@ set regression_vectors { {939895 151 59.149620271823181 65.204186651485145} {1412 156 149.29737817929004 15.95807862745508} {564862 149 84.062063109158544 -65.685403922426232} + {1546032440391 16751 -1.8175081637769495 20.665668878082954} } set rv_idx 0 @@ -274,8 +275,19 @@ start_server {tags {"geo"}} { foreach place $diff { set mydist [geo_distance $lon $lat $search_lon $search_lat] set mydist [expr $mydist/1000] - if {($mydist / $radius_km) > 0.999} {incr rounding_errors} + if {($mydist / $radius_km) > 0.999} { + incr rounding_errors + continue + } + if {$mydist < $radius_m} { + # This is a false positive for redis since given the + # same points the higher precision calculation provided + # by TCL shows the point within range + incr rounding_errors + continue + } } + # Make sure this is a real error and not a rounidng issue. if {[llength $diff] == $rounding_errors} { set res $res2; # Error silenced From 4cc43a96f67e155a3526a29816510536ef9380c7 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Mon, 18 Feb 2019 22:48:55 +0800 Subject: [PATCH 005/304] benchmark: add auth check in benchmark When we run benchmark but forget to set the right requirepass, benchmark should return error. --- src/redis-benchmark.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 31f91eb0..4f0f3404 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -204,6 +204,12 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { if (redisBufferRead(c->context) != REDIS_OK) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); + } + else if (strlen(c->context->reader->buf)>=32 + && !strncmp(c->context->reader->buf,"-NOAUTH Authentication required.", 32)) + { + fprintf(stderr,"Error: %s\n",c->context->reader->buf); + exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { From 0f0f787a37e6411a02d9a992ecc7bb8af7decf9a Mon Sep 17 00:00:00 2001 From: vattezhang Date: Wed, 27 Feb 2019 21:20:00 +0800 Subject: [PATCH 006/304] fix: fix sentinel command table and new flags format --- src/sentinel.c | 13 +++++++++---- src/server.h | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/sentinel.c b/src/sentinel.c index 4d03c9c1..92ea7543 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -450,11 +450,11 @@ struct redisCommand sentinelcmds[] = { {"punsubscribe",punsubscribeCommand,-1,"",0,NULL,0,0,0,0,0}, {"publish",sentinelPublishCommand,3,"",0,NULL,0,0,0,0,0}, {"info",sentinelInfoCommand,-1,"",0,NULL,0,0,0,0,0}, - {"role",sentinelRoleCommand,1,"l",0,NULL,0,0,0,0,0}, - {"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0}, + {"role",sentinelRoleCommand,1,"ok-loading",0,NULL,0,0,0,0,0}, + {"client",clientCommand,-2,"read-only no-script",0,NULL,0,0,0,0,0}, {"shutdown",shutdownCommand,-1,"",0,NULL,0,0,0,0,0}, - {"auth",authCommand,2,"sltF",0,NULL,0,0,0,0,0}, - {"hello",helloCommand,-2,"sF",0,NULL,0,0,0,0,0} + {"auth",authCommand,2,"no-script ok-loading ok-stale fast",0,NULL,0,0,0,0,0}, + {"hello",helloCommand,-2,"no-script fast",0,NULL,0,0,0,0,0} }; /* This function overwrites a few normal Redis config default with Sentinel @@ -477,6 +477,11 @@ void initSentinel(void) { retval = dictAdd(server.commands, sdsnew(cmd->name), cmd); serverAssert(retval == DICT_OK); + + /* Translate the command string flags description into an actual + * set of flags. */ + if (populateCommandTableParseFlags(cmd,cmd->sflags) == C_ERR) + serverPanic("Unsupported command flag"); } /* Initialize various data structures. */ diff --git a/src/server.h b/src/server.h index 99495265..c29a40b6 100644 --- a/src/server.h +++ b/src/server.h @@ -2264,6 +2264,7 @@ void serverLogHexDump(int level, char *descr, void *value, size_t len); int memtest_preserving_test(unsigned long *m, size_t bytes, int passes); void mixDigest(unsigned char *digest, void *ptr, size_t len); void xorDigest(unsigned char *digest, void *ptr, size_t len); +int populateCommandTableParseFlags(struct redisCommand *c, char *strflags); #define redisDebug(fmt, ...) \ printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__) From ad223e204222dca7758eb540a455bca93e62b861 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Wed, 13 Mar 2019 20:46:33 +0800 Subject: [PATCH 007/304] fix: fix benchmark cannot exit when NOAUTH err happens --- src/redis-benchmark.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 2c53bc93..edeaf3a2 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -419,11 +419,10 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); } - else if (strlen(c->context->reader->buf)>=32 - && !strncmp(c->context->reader->buf,"-NOAUTH Authentication required.", 32)) + else if (NULL != strstr(c->context->reader->buf,"NOAUTH")) { fprintf(stderr,"Error: %s\n",c->context->reader->buf); - exit(1); + exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { From 283d6cfd58600a8c9e90584a67f1ca90d85f5669 Mon Sep 17 00:00:00 2001 From: Jim Brunner Date: Wed, 13 Mar 2019 16:31:24 +0000 Subject: [PATCH 008/304] Addition of OnUnload function --- src/module.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/module.c b/src/module.c index 5ad99975..ba38ed8d 100644 --- a/src/module.c +++ b/src/module.c @@ -4799,6 +4799,23 @@ int moduleUnload(sds name) { errno = EBUSY; return REDISMODULE_ERR; } + + /* Give module a chance to clean up. */ + int (*onunload)(void *); + onunload = (int (*)(void *))(unsigned long) dlsym(module->handle, "RedisModule_OnUnload"); + if (onunload) { + RedisModuleCtx ctx = REDISMODULE_CTX_INIT; + ctx.module = module; + ctx.client = moduleFreeContextReusedClient; + int unload_status = onunload((void*)&ctx); + moduleFreeContext(&ctx); + + if (unload_status == REDISMODULE_ERR) { + serverLog(LL_WARNING, "Module %s OnUnload failed. Unload canceled.", name); + errno = ECANCELED; + return REDISMODULE_ERR; + } + } moduleUnregisterCommands(module); From 68fd59056b19c930b7b118d1f531f25947e026b8 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 3 Jun 2018 15:37:48 +0300 Subject: [PATCH 009/304] Add RedisModule_Assert() API call. --- src/module.c | 10 ++++++++++ src/redismodule.h | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/src/module.c b/src/module.c index 8954fcdf..c6e6e598 100644 --- a/src/module.c +++ b/src/module.c @@ -3461,6 +3461,15 @@ void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ... va_end(ap); } +/* Redis-like assert function. + * + * A failed assertion will shut down the server and produce logging information + * that looks identical to information generated by Redis itself. + */ +void RM__Assert(const char *estr, const char *file, int line) { + _serverAssert(estr, file, line); +} + /* -------------------------------------------------------------------------- * Blocking clients from modules * -------------------------------------------------------------------------- */ @@ -4993,6 +5002,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(EmitAOF); REGISTER_API(Log); REGISTER_API(LogIOError); + REGISTER_API(_Assert); REGISTER_API(StringAppendBuffer); REGISTER_API(RetainString); REGISTER_API(StringCompare); diff --git a/src/redismodule.h b/src/redismodule.h index d18c3888..db32df04 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -271,6 +271,7 @@ void REDISMODULE_API_FUNC(RedisModule_SaveFloat)(RedisModuleIO *io, float value) float REDISMODULE_API_FUNC(RedisModule_LoadFloat)(RedisModuleIO *io); void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...); void REDISMODULE_API_FUNC(RedisModule_LogIOError)(RedisModuleIO *io, const char *levelstr, const char *fmt, ...); +void REDISMODULE_API_FUNC(RedisModule__Assert)(const char *estr, const char *file, int line); int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); @@ -433,6 +434,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(EmitAOF); REDISMODULE_GET_API(Log); REDISMODULE_GET_API(LogIOError); + REDISMODULE_GET_API(_Assert); REDISMODULE_GET_API(StringAppendBuffer); REDISMODULE_GET_API(RetainString); REDISMODULE_GET_API(StringCompare); @@ -499,6 +501,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int return REDISMODULE_OK; } +#define RedisModule_Assert(_e) ((_e)?(void)0 : (RedisModule__Assert(#_e,__FILE__,__LINE__),exit(1))) + #else /* Things only defined for the modules core, not exported to modules From a88264d934744b23c02d92a3ba3fccbe070af0b4 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Wed, 30 Nov 2016 21:47:02 +0200 Subject: [PATCH 010/304] Add RedisModule_GetKeyNameFromIO(). --- src/aof.c | 2 +- src/cluster.c | 10 +++++----- src/module.c | 9 +++++++++ src/rdb.c | 14 +++++++------- src/rdb.h | 4 ++-- src/redis-check-rdb.c | 2 +- src/redismodule.h | 2 ++ src/server.h | 4 +++- 8 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/aof.c b/src/aof.c index cafcf961..615eebd0 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1239,7 +1239,7 @@ int rewriteModuleObject(rio *r, robj *key, robj *o) { RedisModuleIO io; moduleValue *mv = o->ptr; moduleType *mt = mv->type; - moduleInitIOContext(io,mt,r); + moduleInitIOContext(io,mt,r,key); mt->aof_rewrite(&io,key,mv->value); if (io.ctx) { moduleFreeContext(io.ctx); diff --git a/src/cluster.c b/src/cluster.c index 50a9ae68..c85e3791 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4776,7 +4776,7 @@ NULL /* Generates a DUMP-format representation of the object 'o', adding it to the * io stream pointed by 'rio'. This function can't fail. */ -void createDumpPayload(rio *payload, robj *o) { +void createDumpPayload(rio *payload, robj *o, robj *key) { unsigned char buf[2]; uint64_t crc; @@ -4784,7 +4784,7 @@ void createDumpPayload(rio *payload, robj *o) { * byte followed by the serialized object. This is understood by RESTORE. */ rioInitWithBuffer(payload,sdsempty()); serverAssert(rdbSaveObjectType(payload,o)); - serverAssert(rdbSaveObject(payload,o)); + serverAssert(rdbSaveObject(payload,o,key)); /* Write the footer, this is how it looks like: * ----------------+---------------------+---------------+ @@ -4842,7 +4842,7 @@ void dumpCommand(client *c) { } /* Create the DUMP encoded representation. */ - createDumpPayload(&payload,o); + createDumpPayload(&payload,o,c->argv[1]); /* Transfer to the client */ dumpobj = createObject(OBJ_STRING,payload.io.buffer.ptr); @@ -4915,7 +4915,7 @@ void restoreCommand(client *c) { rioInitWithBuffer(&payload,c->argv[3]->ptr); if (((type = rdbLoadObjectType(&payload)) == -1) || - ((obj = rdbLoadObject(type,&payload)) == NULL)) + ((obj = rdbLoadObject(type,&payload,c->argv[1])) == NULL)) { addReplyError(c,"Bad data format"); return; @@ -5203,7 +5203,7 @@ try_again: /* Emit the payload argument, that is the serialized object using * the DUMP format. */ - createDumpPayload(&payload,ov[j]); + createDumpPayload(&payload,ov[j],kv[j]); serverAssertWithInfo(c,NULL, rioWriteBulkString(&cmd,payload.io.buffer.ptr, sdslen(payload.io.buffer.ptr))); diff --git a/src/module.c b/src/module.c index e69d3dc6..e1ffd731 100644 --- a/src/module.c +++ b/src/module.c @@ -3438,6 +3438,14 @@ RedisModuleCtx *RM_GetContextFromIO(RedisModuleIO *io) { return io->ctx; } +/* Returns a RedisModuleString with the name of the key currently saving or + * loading, when an IO data type callback is called. There is no guarantee + * that the key name is always available, so this may return NULL. + */ +const RedisModuleString *RM_GetKeyNameFromIO(RedisModuleIO *io) { + return io->key; +} + /* -------------------------------------------------------------------------- * Logging * -------------------------------------------------------------------------- */ @@ -5164,6 +5172,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(RetainString); REGISTER_API(StringCompare); REGISTER_API(GetContextFromIO); + REGISTER_API(GetKeyNameFromIO); REGISTER_API(BlockClient); REGISTER_API(UnblockClient); REGISTER_API(IsBlockedReplyRequest); diff --git a/src/rdb.c b/src/rdb.c index 52dddf21..95e4766e 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -751,7 +751,7 @@ size_t rdbSaveStreamConsumers(rio *rdb, streamCG *cg) { /* Save a Redis object. * Returns -1 on error, number of bytes written on success. */ -ssize_t rdbSaveObject(rio *rdb, robj *o) { +ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key) { ssize_t n = 0, nwritten = 0; if (o->type == OBJ_STRING) { @@ -966,7 +966,7 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { RedisModuleIO io; moduleValue *mv = o->ptr; moduleType *mt = mv->type; - moduleInitIOContext(io,mt,rdb); + moduleInitIOContext(io,mt,rdb,key); /* Write the "module" identifier as prefix, so that we'll be able * to call the right module during loading. */ @@ -996,7 +996,7 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { * this length with very little changes to the code. In the future * we could switch to a faster solution. */ size_t rdbSavedObjectLen(robj *o) { - ssize_t len = rdbSaveObject(NULL,o); + ssize_t len = rdbSaveObject(NULL,o,NULL); serverAssertWithInfo(NULL,o,len != -1); return len; } @@ -1038,7 +1038,7 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime) { /* Save type, key, value */ if (rdbSaveObjectType(rdb,val) == -1) return -1; if (rdbSaveStringObject(rdb,key) == -1) return -1; - if (rdbSaveObject(rdb,val) == -1) return -1; + if (rdbSaveObject(rdb,val,key) == -1) return -1; return 1; } @@ -1380,7 +1380,7 @@ robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) { /* Load a Redis object of the specified type from the specified file. * On success a newly allocated object is returned, otherwise NULL. */ -robj *rdbLoadObject(int rdbtype, rio *rdb) { +robj *rdbLoadObject(int rdbtype, rio *rdb, robj *key) { robj *o = NULL, *ele, *dec; uint64_t len; unsigned int i; @@ -1767,7 +1767,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { exit(1); } RedisModuleIO io; - moduleInitIOContext(io,mt,rdb); + moduleInitIOContext(io,mt,rdb,key); io.ver = (rdbtype == RDB_TYPE_MODULE) ? 1 : 2; /* Call the rdb_load method of the module providing the 10 bit * encoding version in the lower 10 bits of the module ID. */ @@ -2023,7 +2023,7 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) { /* Read key */ if ((key = rdbLoadStringObject(rdb)) == NULL) goto eoferr; /* Read value */ - if ((val = rdbLoadObject(type,rdb)) == NULL) goto eoferr; + if ((val = rdbLoadObject(type,rdb,key)) == NULL) goto eoferr; /* Check if the key already expired. This function is used when loading * an RDB file from disk, either at startup, or when an RDB was * received from the master. In the latter case, the master is diff --git a/src/rdb.h b/src/rdb.h index 7b948616..0acddf9a 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -140,9 +140,9 @@ int rdbSaveBackground(char *filename, rdbSaveInfo *rsi); int rdbSaveToSlavesSockets(rdbSaveInfo *rsi); void rdbRemoveTempFile(pid_t childpid); int rdbSave(char *filename, rdbSaveInfo *rsi); -ssize_t rdbSaveObject(rio *rdb, robj *o); +ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key); size_t rdbSavedObjectLen(robj *o); -robj *rdbLoadObject(int type, rio *rdb); +robj *rdbLoadObject(int type, rio *rdb, robj *key); void backgroundSaveDoneHandler(int exitcode, int bysignal); int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime); robj *rdbLoadStringObject(rio *rdb); diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 8de1d8f4..ec00ee71 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -285,7 +285,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { rdbstate.keys++; /* Read value */ rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE; - if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr; + if ((val = rdbLoadObject(type,&rdb,key)) == NULL) goto eoferr; /* Check if the key already expired. */ if (expiretime != -1 && expiretime < now) rdbstate.already_expired++; diff --git a/src/redismodule.h b/src/redismodule.h index 272da08d..02941aa9 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -278,6 +278,7 @@ int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, Re void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetContextFromIO)(RedisModuleIO *io); +const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_GetKeyNameFromIO)(RedisModuleIO *io); long long REDISMODULE_API_FUNC(RedisModule_Milliseconds)(void); void REDISMODULE_API_FUNC(RedisModule_DigestAddStringBuffer)(RedisModuleDigest *md, unsigned char *ele, size_t len); void REDISMODULE_API_FUNC(RedisModule_DigestAddLongLong)(RedisModuleDigest *md, long long ele); @@ -442,6 +443,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(RetainString); REDISMODULE_GET_API(StringCompare); REDISMODULE_GET_API(GetContextFromIO); + REDISMODULE_GET_API(GetKeyNameFromIO); REDISMODULE_GET_API(Milliseconds); REDISMODULE_GET_API(DigestAddStringBuffer); REDISMODULE_GET_API(DigestAddLongLong); diff --git a/src/server.h b/src/server.h index 56c3b67d..b888266a 100644 --- a/src/server.h +++ b/src/server.h @@ -578,16 +578,18 @@ typedef struct RedisModuleIO { int ver; /* Module serialization version: 1 (old), * 2 (current version with opcodes annotation). */ struct RedisModuleCtx *ctx; /* Optional context, see RM_GetContextFromIO()*/ + struct redisObject *key; /* Optional name of key processed */ } RedisModuleIO; /* Macro to initialize an IO context. Note that the 'ver' field is populated * inside rdb.c according to the version of the value to load. */ -#define moduleInitIOContext(iovar,mtype,rioptr) do { \ +#define moduleInitIOContext(iovar,mtype,rioptr,keyptr) do { \ iovar.rio = rioptr; \ iovar.type = mtype; \ iovar.bytes = 0; \ iovar.error = 0; \ iovar.ver = 0; \ + iovar.key = keyptr; \ iovar.ctx = NULL; \ } while(0); From 8ea906a3e8f3e125baa9cf54f6027921d3822b02 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 16 Mar 2019 09:15:12 +0100 Subject: [PATCH 011/304] HyperLogLog: fix comment in hllCount(). --- src/hyperloglog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 1e7ce3dc..e01ea604 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1014,8 +1014,8 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { double m = HLL_REGISTERS; double E; int j; - /* Note that reghisto could be just HLL_Q+1, becuase this is the - * maximum frequency of the "000...1" sequence the hash function is + /* Note that reghisto size could be just HLL_Q+2, becuase HLL_Q+1 is + * the maximum frequency of the "000...1" sequence the hash function is * able to return. However it is slow to check for sanity of the * input: instead we history array at a safe size: overflows will * just write data to wrong, but correctly allocated, places. */ From b78ac354f41e370a4dc21ac01981cb0ccd0a1b7d Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 11:15:39 +0100 Subject: [PATCH 012/304] redis-check-aof: fix potential overflow. Bug signaled by @vattezhang in PR #5940 but fixed differently. --- src/redis-check-aof.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index c4d5a225..54ed85f0 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -33,8 +33,8 @@ #define ERROR(...) { \ char __buf[1024]; \ - sprintf(__buf, __VA_ARGS__); \ - sprintf(error, "0x%16llx: %s", (long long)epos, __buf); \ + snprintf(__buf, sizeof(__buf), __VA_ARGS__); \ + snprintf(error, sizeof(error), "0x%16llx: %s", (long long)epos, __buf); \ } static char error[1024]; From 14b17c3615108fdbca5e7fe4d2c3f0e8b7454521 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 11:34:40 +0100 Subject: [PATCH 013/304] replicaofCommand() refactoring: stay into 80 cols. --- src/replication.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 3c30999a..f2adc799 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2053,8 +2053,11 @@ void replicaofCommand(client *c) { /* Check if we are already attached to the specified slave */ if (server.masterhost && !strcasecmp(server.masterhost,c->argv[1]->ptr) && server.masterport == port) { - serverLog(LL_NOTICE,"REPLICAOF would result into synchronization with the master we are already connected with. No operation performed."); - addReplySds(c,sdsnew("+OK Already connected to specified master\r\n")); + serverLog(LL_NOTICE,"REPLICAOF would result into synchronization " + "with the master we are already connected " + "with. No operation performed."); + addReplySds(c,sdsnew("+OK Already connected to specified " + "master\r\n")); return; } /* There was no previous master or the user specified a different one, From c3e187190b5e48e69f666c8faa2100253a9b536e Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Fri, 23 Feb 2018 16:19:37 +0200 Subject: [PATCH 014/304] Initial command filter experiment. --- src/module.c | 76 +++++++++++++++++++++++++++++++++++++++ src/modules/Makefile | 6 +++- src/modules/hellofilter.c | 69 +++++++++++++++++++++++++++++++++++ src/redismodule.h | 8 +++++ src/server.c | 2 ++ src/server.h | 2 +- 6 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 src/modules/hellofilter.c diff --git a/src/module.c b/src/module.c index e69d3dc6..1780342e 100644 --- a/src/module.c +++ b/src/module.c @@ -270,6 +270,28 @@ typedef struct RedisModuleDictIter { raxIterator ri; } RedisModuleDictIter; +/* Information about the command to be executed, as passed to and from a + * filter. */ +typedef struct RedisModuleFilteredCommand { + RedisModuleString **argv; + int argc; +} RedisModuleFilteredCommand; + +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd); + +typedef struct RedisModuleCommandFilter { + /* The module that registered the filter */ + RedisModule *module; + /* Filter callback function */ + RedisModuleCommandFilterFunc callback; + /* Indicates a filter is active, avoid reentrancy */ + int active; +} RedisModuleCommandFilter; + +/* Registered filters */ +static list *moduleCommandFilters; + + /* -------------------------------------------------------------------------- * Prototypes * -------------------------------------------------------------------------- */ @@ -4770,6 +4792,56 @@ int moduleUnregisterUsedAPI(RedisModule *module) { return count; } +/* -------------------------------------------------------------------------- + * Module Command Filter API + * -------------------------------------------------------------------------- */ + +/* Register a new command filter function. Filters get executed by Redis + * before processing an inbound command and can be used to manipulate the + * behavior of standard Redis commands. Filters must not attempt to + * perform Redis commands or operate on the dataset, and must restrict + * themselves to manipulation of the arguments. + */ + +int RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { + RedisModuleCommandFilter *filter = zmalloc(sizeof(*filter)); + filter->module = ctx->module; + filter->callback = callback; + filter->active = 0; + + listAddNodeTail(moduleCommandFilters, filter); + return REDISMODULE_OK; +} + +void moduleCallCommandFilters(client *c) { + if (listLength(moduleCommandFilters) == 0) return; + + listIter li; + listNode *ln; + listRewind(moduleCommandFilters,&li); + + RedisModuleFilteredCommand cmd = { + .argv = c->argv, + .argc = c->argc + }; + + while((ln = listNext(&li))) { + RedisModuleCommandFilter *filter = ln->value; + if (filter->active) continue; + + RedisModuleCtx ctx = REDISMODULE_CTX_INIT; + ctx.module = filter->module; + + filter->active = 1; + filter->callback(&ctx, &cmd); + filter->active = 0; + moduleFreeContext(&ctx); + } + + c->argv = cmd.argv; + c->argc = cmd.argc; +} + /* -------------------------------------------------------------------------- * Modules API internals * -------------------------------------------------------------------------- */ @@ -4816,6 +4888,9 @@ void moduleInitModulesSystem(void) { moduleFreeContextReusedClient->flags |= CLIENT_MODULE; moduleFreeContextReusedClient->user = NULL; /* root user. */ + /* Set up filter list */ + moduleCommandFilters = listCreate(); + moduleRegisterCoreAPI(); if (pipe(server.module_blocked_pipe) == -1) { serverLog(LL_WARNING, @@ -5219,4 +5294,5 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(DictCompare); REGISTER_API(ExportSharedAPI); REGISTER_API(GetSharedAPI); + REGISTER_API(RegisterCommandFilter); } diff --git a/src/modules/Makefile b/src/modules/Makefile index 51ffac17..537aa0da 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -13,7 +13,7 @@ endif .SUFFIXES: .c .so .xo .o -all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so +all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so hellofilter.so .c.xo: $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ @@ -46,6 +46,10 @@ hellotimer.so: hellotimer.xo hellodict.xo: ../redismodule.h hellodict.so: hellodict.xo + +hellofilter.xo: ../redismodule.h + +hellofilter.so: hellofilter.xo $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc testmodule.xo: ../redismodule.h diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c new file mode 100644 index 00000000..c9e33158 --- /dev/null +++ b/src/modules/hellofilter.c @@ -0,0 +1,69 @@ +#define REDISMODULE_EXPERIMENTAL_API +#include "../redismodule.h" + +static RedisModuleString *log_key_name; + +static const char log_command_name[] = "hellofilter.log"; + +int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +{ + RedisModuleString *s = RedisModule_CreateStringFromString(ctx, argv[0]); + + int i; + for (i = 1; i < argc; i++) { + size_t arglen; + const char *arg = RedisModule_StringPtrLen(argv[i], &arglen); + + RedisModule_StringAppendBuffer(ctx, s, " ", 1); + RedisModule_StringAppendBuffer(ctx, s, arg, arglen); + } + + RedisModuleKey *log = RedisModule_OpenKey(ctx, log_key_name, REDISMODULE_WRITE|REDISMODULE_READ); + RedisModule_ListPush(log, REDISMODULE_LIST_HEAD, s); + RedisModule_CloseKey(log); + RedisModule_FreeString(ctx, s); + + size_t cmdlen; + const char *cmdname = RedisModule_StringPtrLen(argv[1], &cmdlen); + RedisModuleCallReply *reply = RedisModule_Call(ctx, cmdname, "v", &argv[2], argc - 2); + if (reply) { + RedisModule_ReplyWithCallReply(ctx, reply); + RedisModule_FreeCallReply(reply); + } else { + RedisModule_ReplyWithSimpleString(ctx, "Unknown command or invalid arguments"); + } + return REDISMODULE_OK; +} + +void HelloFilter_CommandFilter(RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd) +{ + cmd->argv = RedisModule_Realloc(cmd->argv, (cmd->argc+1)*sizeof(RedisModuleString *)); + int i; + + for (i = cmd->argc; i > 0; i--) { + cmd->argv[i] = cmd->argv[i-1]; + } + cmd->argv[0] = RedisModule_CreateString(ctx, log_command_name, sizeof(log_command_name)-1); + cmd->argc++; +} + +int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (RedisModule_Init(ctx,"hellofilter",1,REDISMODULE_APIVER_1) + == REDISMODULE_ERR) return REDISMODULE_ERR; + + if (argc != 1) { + RedisModule_Log(ctx, "warning", "Log key name not specified"); + return REDISMODULE_ERR; + } + + log_key_name = RedisModule_CreateStringFromString(ctx, argv[0]); + + if (RedisModule_CreateCommand(ctx,log_command_name, + HelloFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter) + == REDISMODULE_ERR) return REDISMODULE_ERR; + + return REDISMODULE_OK; +} diff --git a/src/redismodule.h b/src/redismodule.h index 272da08d..54ce99d9 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -163,6 +163,12 @@ typedef void (*RedisModuleTypeFreeFunc)(void *value); typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len); typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data); +typedef struct RedisModuleFilteredCommand { + RedisModuleString **argv; + int argc; +} RedisModuleFilteredCommand; +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd); + #define REDISMODULE_TYPE_METHOD_VERSION 1 typedef struct RedisModuleTypeMethods { uint64_t version; @@ -337,6 +343,7 @@ void REDISMODULE_API_FUNC(RedisModule_SetDisconnectCallback)(RedisModuleBlockedC void REDISMODULE_API_FUNC(RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint64_t flags); int REDISMODULE_API_FUNC(RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func); void *REDISMODULE_API_FUNC(RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname); +int REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); #endif /* This is included inline inside each Redis module. */ @@ -499,6 +506,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(SetClusterFlags); REDISMODULE_GET_API(ExportSharedAPI); REDISMODULE_GET_API(GetSharedAPI); + REDISMODULE_GET_API(RegisterCommandFilter); #endif if (RedisModule_IsModuleNameBusy && RedisModule_IsModuleNameBusy(name)) return REDISMODULE_ERR; diff --git a/src/server.c b/src/server.c index 712cda1b..66e79dea 100644 --- a/src/server.c +++ b/src/server.c @@ -3268,6 +3268,8 @@ void call(client *c, int flags) { * other operations can be performed by the caller. Otherwise * if C_ERR is returned the client was destroyed (i.e. after QUIT). */ int processCommand(client *c) { + moduleCallCommandFilters(c); + /* The QUIT command is handled separately. Normal command procs will * go through checking for replication and QUIT will cause trouble * when FORCE_REPLICATION is enabled and would be implemented in diff --git a/src/server.h b/src/server.h index 56c3b67d..f55213bf 100644 --- a/src/server.h +++ b/src/server.h @@ -1489,7 +1489,7 @@ size_t moduleCount(void); void moduleAcquireGIL(void); void moduleReleaseGIL(void); void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid); - +void moduleCallCommandFilters(client *c); /* Utils */ long long ustime(void); From a5af648fdddaf93e89735a8577b56f12379d1dd2 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 15:38:43 +0100 Subject: [PATCH 015/304] MANIFESTO v2. --- MANIFESTO | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/MANIFESTO b/MANIFESTO index 2b719057..d43a5889 100644 --- a/MANIFESTO +++ b/MANIFESTO @@ -34,7 +34,21 @@ Redis Manifesto so that the complexity is obvious and more complex operations can be performed as the sum of the basic operations. -4 - Code is like a poem; it's not just something we write to reach some +4 - We believe in code efficiency. Computers get faster and faster, yet we + believe that abusing computing capabilities is not wise: the amount of + operations you can do for a given amount of energy remains anyway a + significant parameter: it allows to do more with less computers and, at + the same time, having a smaller environmental impact. Similarly Redis is + able to "scale down" to smaller devices. It is perfectly usable in a + Raspberry Pi and other small ARM based computers. Faster code having + just the layers of abstractions that are really needed will also result, + often, in more predictable performances. We think likewise about memory + usage, one of the fundamental goals of the Redis project is to + incrementally build more and more memory efficient data structures, so that + problems that were not approachable in RAM in the past will be perfectly + fine to handle in the future. + +5 - Code is like a poem; it's not just something we write to reach some practical result. Sometimes people that are far from the Redis philosophy suggest using other code written by other authors (frequently in other languages) in order to implement something Redis currently lacks. But to us @@ -45,23 +59,44 @@ Redis Manifesto when needed. At the same time, when writing the Redis story we're trying to write smaller stories that will fit in to other code. -5 - We're against complexity. We believe designing systems is a fight against +6 - We're against complexity. We believe designing systems is a fight against complexity. We'll accept to fight the complexity when it's worthwhile but we'll try hard to recognize when a small feature is not worth 1000s of lines of code. Most of the time the best way to fight complexity is by not creating it at all. -6 - Two levels of API. The Redis API has two levels: 1) a subset of the API fits +7 - Threading is not a silver bullet. Instead of making Redis threaded we + believe on the idea of an efficient (mostly) single threaded Redis core. + Multiple of such cores, that may run in the same computer or may run + in multiple computers, are abstracted away as a single big system by + higher order protocols and features: Redis Cluster and the upcoming + Redis Proxy are our main goals. A shared nothing approach is not just + much simpler (see the previous point in this document), is also optimal + in NUMA systems. In the specific case of Redis it allows for each instance + to have a more limited amount of data, making the Redis persist-by-fork + approach more sounding. In the future we may explore parallelism only for + I/O, which is the low hanging fruit: minimal complexity could provide an + improved single process experience. + +8 - Two levels of API. The Redis API has two levels: 1) a subset of the API fits naturally into a distributed version of Redis and 2) a more complex API that supports multi-key operations. Both are useful if used judiciously but there's no way to make the more complex multi-keys API distributed in an opaque way without violating our other principles. We don't want to provide the illusion of something that will work magically when actually it can't in all cases. Instead we'll provide commands to quickly migrate keys from one - instance to another to perform multi-key operations and expose the tradeoffs - to the user. + instance to another to perform multi-key operations and expose the + trade-offs to the user. -7 - We optimize for joy. We believe writing code is a lot of hard work, and the +9 - We optimize for joy. We believe writing code is a lot of hard work, and the only way it can be worth is by enjoying it. When there is no longer joy in writing code, the best thing to do is stop. To prevent this, we'll avoid taking paths that will make Redis less of a joy to develop. + +10 - All the above points are put together in what we call opportunistic + programming: trying to get the most for the user with minimal increases + in complexity (hanging fruits). Solve 95% of the problem with 5% of the + code when it is acceptable. Avoid a fixed schedule but follow the flow of + user requests, inspiration, Redis internal readiness for certain features + (sometimes many past changes reach a critical point making a previously + complex feature very easy to obtain). From 3eaa2cdc44a9b0742f0695f44911b92547995836 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 15:49:52 +0100 Subject: [PATCH 016/304] MANIFESTO: simplicity and lock-in. --- MANIFESTO | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/MANIFESTO b/MANIFESTO index d43a5889..37278946 100644 --- a/MANIFESTO +++ b/MANIFESTO @@ -63,7 +63,11 @@ Redis Manifesto complexity. We'll accept to fight the complexity when it's worthwhile but we'll try hard to recognize when a small feature is not worth 1000s of lines of code. Most of the time the best way to fight complexity is by not - creating it at all. + creating it at all. Complexity is also a form of lock-in: code that is + very hard to understand cannot be modified by users in an independent way + regardless of the license. One of the main Redis goals is to remain + understandable, enough for a single programmer to have a clear idea of how + it works in detail just reading the source code for a couple of weeks. 7 - Threading is not a silver bullet. Instead of making Redis threaded we believe on the idea of an efficient (mostly) single threaded Redis core. From 67111320835ee46498ea0e4de07dab5cb59584da Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 18:36:46 +0200 Subject: [PATCH 017/304] Add command filtering argument handling API. --- src/module.c | 81 +++++++++++++++++++++++++++++++++++++++ src/modules/hellofilter.c | 46 ++++++++++++++++++---- src/redismodule.h | 18 ++++++--- 3 files changed, 132 insertions(+), 13 deletions(-) diff --git a/src/module.c b/src/module.c index 1780342e..741c546b 100644 --- a/src/module.c +++ b/src/module.c @@ -291,6 +291,10 @@ typedef struct RedisModuleCommandFilter { /* Registered filters */ static list *moduleCommandFilters; +typedef struct RedisModuleCommandFilterCtx { + RedisModuleString **argv; + int argc; +} RedisModuleCommandFilterCtx; /* -------------------------------------------------------------------------- * Prototypes @@ -4842,6 +4846,78 @@ void moduleCallCommandFilters(client *c) { c->argc = cmd.argc; } +/* Return the number of arguments a filtered command has. The number of + * arguments include the command itself. + */ +int RM_CommandFilterArgsCount(RedisModuleCommandFilterCtx *filter) +{ + return filter->argc; +} + +/* Return the specified command argument. The first argument (position 0) is + * the command itself, and the rest are user-provided args. + */ +const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *filter, int pos) +{ + if (pos < 0 || pos >= filter->argc) return NULL; + return filter->argv[pos]; +} + +/* Modify the filtered command by inserting a new argument at the specified + * position. The specified RedisModuleString argument may be used by Redis + * after the filter context is destroyed, so it must not be auto-memory + * allocated, freed or used elsewhere. + */ + +int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) +{ + int i; + + if (pos < 0 || pos > filter->argc) return REDISMODULE_ERR; + + filter->argv = zrealloc(filter->argv, (filter->argc+1)*sizeof(RedisModuleString *)); + for (i = filter->argc; i > pos; i--) { + filter->argv[i] = filter->argv[i-1]; + } + filter->argv[pos] = arg; + filter->argc++; + + return REDISMODULE_OK; +} + +/* Modify the filtered command by replacing an existing argument with a new one. + * The specified RedisModuleString argument may be used by Redis after the + * filter context is destroyed, so it must not be auto-memory allocated, freed + * or used elsewhere. + */ + +int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) +{ + if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; + + decrRefCount(filter->argv[pos]); + filter->argv[pos] = arg; + + return REDISMODULE_OK; +} + +/* Modify the filtered command by deleting an argument at the specified + * position. + */ +int RM_CommandFilterArgDelete(RedisModuleCommandFilterCtx *filter, int pos) +{ + int i; + if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; + + decrRefCount(filter->argv[pos]); + for (i = pos; i < filter->argc-1; i++) { + filter->argv[i] = filter->argv[i+1]; + } + filter->argc--; + + return REDISMODULE_OK; +} + /* -------------------------------------------------------------------------- * Modules API internals * -------------------------------------------------------------------------- */ @@ -5295,4 +5371,9 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(ExportSharedAPI); REGISTER_API(GetSharedAPI); REGISTER_API(RegisterCommandFilter); + REGISTER_API(CommandFilterArgsCount); + REGISTER_API(CommandFilterArgGet); + REGISTER_API(CommandFilterArgInsert); + REGISTER_API(CommandFilterArgReplace); + REGISTER_API(CommandFilterArgDelete); } diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c index c9e33158..84eb02c3 100644 --- a/src/modules/hellofilter.c +++ b/src/modules/hellofilter.c @@ -1,6 +1,8 @@ #define REDISMODULE_EXPERIMENTAL_API #include "../redismodule.h" +#include + static RedisModuleString *log_key_name; static const char log_command_name[] = "hellofilter.log"; @@ -35,16 +37,46 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar return REDISMODULE_OK; } -void HelloFilter_CommandFilter(RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd) +void HelloFilter_CommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterCtx *filter) { - cmd->argv = RedisModule_Realloc(cmd->argv, (cmd->argc+1)*sizeof(RedisModuleString *)); - int i; + (void) ctx; - for (i = cmd->argc; i > 0; i--) { - cmd->argv[i] = cmd->argv[i-1]; + /* Fun manipulations: + * - Remove @delme + * - Replace @replaceme + * - Append @insertbefore or @insertafter + * - Prefix with Log command if @log encounterd + */ + int log = 0; + int pos = 0; + while (pos < RedisModule_CommandFilterArgsCount(filter)) { + const RedisModuleString *arg = RedisModule_CommandFilterArgGet(filter, pos); + size_t arg_len; + const char *arg_str = RedisModule_StringPtrLen(arg, &arg_len); + + if (arg_len == 6 && !memcmp(arg_str, "@delme", 6)) { + RedisModule_CommandFilterArgDelete(filter, pos); + continue; + } + if (arg_len == 10 && !memcmp(arg_str, "@replaceme", 10)) { + RedisModule_CommandFilterArgReplace(filter, pos, + RedisModule_CreateString(NULL, "--replaced--", 12)); + } else if (arg_len == 13 && !memcmp(arg_str, "@insertbefore", 13)) { + RedisModule_CommandFilterArgInsert(filter, pos, + RedisModule_CreateString(NULL, "--inserted-before--", 19)); + pos++; + } else if (arg_len == 12 && !memcmp(arg_str, "@insertafter", 12)) { + RedisModule_CommandFilterArgInsert(filter, pos + 1, + RedisModule_CreateString(NULL, "--inserted-after--", 18)); + pos++; + } else if (arg_len == 4 && !memcmp(arg_str, "@log", 4)) { + log = 1; + } + pos++; } - cmd->argv[0] = RedisModule_CreateString(ctx, log_command_name, sizeof(log_command_name)-1); - cmd->argc++; + + if (log) RedisModule_CommandFilterArgInsert(filter, 0, + RedisModule_CreateString(NULL, log_command_name, sizeof(log_command_name)-1)); } int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { diff --git a/src/redismodule.h b/src/redismodule.h index 54ce99d9..426a6df6 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -150,6 +150,7 @@ typedef struct RedisModuleBlockedClient RedisModuleBlockedClient; typedef struct RedisModuleClusterInfo RedisModuleClusterInfo; typedef struct RedisModuleDict RedisModuleDict; typedef struct RedisModuleDictIter RedisModuleDictIter; +typedef struct RedisModuleCommandFilterCtx RedisModuleCommandFilterCtx; typedef int (*RedisModuleCmdFunc)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); typedef void (*RedisModuleDisconnectFunc)(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc); @@ -162,12 +163,7 @@ typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value typedef void (*RedisModuleTypeFreeFunc)(void *value); typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len); typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data); - -typedef struct RedisModuleFilteredCommand { - RedisModuleString **argv; - int argc; -} RedisModuleFilteredCommand; -typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd); +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleCommandFilterCtx *filter); #define REDISMODULE_TYPE_METHOD_VERSION 1 typedef struct RedisModuleTypeMethods { @@ -344,6 +340,11 @@ void REDISMODULE_API_FUNC(RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint int REDISMODULE_API_FUNC(RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func); void *REDISMODULE_API_FUNC(RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname); int REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *filter); +const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *filter, int pos); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgInsert)(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgReplace)(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgDelete)(RedisModuleCommandFilterCtx *filter, int pos); #endif /* This is included inline inside each Redis module. */ @@ -507,6 +508,11 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(ExportSharedAPI); REDISMODULE_GET_API(GetSharedAPI); REDISMODULE_GET_API(RegisterCommandFilter); + REDISMODULE_GET_API(CommandFilterArgsCount); + REDISMODULE_GET_API(CommandFilterArgGet); + REDISMODULE_GET_API(CommandFilterArgInsert); + REDISMODULE_GET_API(CommandFilterArgReplace); + REDISMODULE_GET_API(CommandFilterArgDelete); #endif if (RedisModule_IsModuleNameBusy && RedisModule_IsModuleNameBusy(name)) return REDISMODULE_ERR; From 9095e4dc9bbb8c0311e0df2af556295ca6ce92ca Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 19:34:52 +0200 Subject: [PATCH 018/304] Add command filter Module API tests. --- tests/modules/commandfilter.tcl | 27 +++++++++++++++++++++++++++ tests/test_helper.tcl | 1 + 2 files changed, 28 insertions(+) create mode 100644 tests/modules/commandfilter.tcl diff --git a/tests/modules/commandfilter.tcl b/tests/modules/commandfilter.tcl new file mode 100644 index 00000000..f0d96b25 --- /dev/null +++ b/tests/modules/commandfilter.tcl @@ -0,0 +1,27 @@ +set testmodule [file normalize src/modules/hellofilter.so] + +start_server {tags {"modules"}} { + r module load $testmodule log-key + + test {Command Filter handles redirected commands} { + r set mykey @log + r lrange log-key 0 -1 + } "{hellofilter.log set mykey @log}" + + test {Command Filter can call RedisModule_CommandFilterArgDelete} { + r rpush mylist elem1 @delme elem2 + r lrange mylist 0 -1 + } {elem1 elem2} + + test {Command Filter can call RedisModule_CommandFilterArgInsert} { + r del mylist + r rpush mylist elem1 @insertbefore elem2 @insertafter elem3 + r lrange mylist 0 -1 + } {elem1 --inserted-before-- @insertbefore elem2 @insertafter --inserted-after-- elem3} + + test {Command Filter can call RedisModule_CommandFilterArgReplace} { + r del mylist + r rpush mylist elem1 @replaceme elem2 + r lrange mylist 0 -1 + } {elem1 --replaced-- elem2} +} diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 568eacde..d2f28152 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -63,6 +63,7 @@ set ::all_tests { unit/lazyfree unit/wait unit/pendingquerybuf + modules/commandfilter } # Index to the next test to run in the ::all_tests list. set ::next_test 0 From 2a5aeef79f894b80024d49ec1036ac03ae7ac5c5 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 23:05:52 +0200 Subject: [PATCH 019/304] CommandFilter API: More cleanup. --- src/module.c | 37 +++++++++---------------------------- src/redismodule.h | 2 +- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/src/module.c b/src/module.c index 741c546b..c6cb8a0c 100644 --- a/src/module.c +++ b/src/module.c @@ -270,32 +270,23 @@ typedef struct RedisModuleDictIter { raxIterator ri; } RedisModuleDictIter; -/* Information about the command to be executed, as passed to and from a - * filter. */ -typedef struct RedisModuleFilteredCommand { +typedef struct RedisModuleCommandFilterCtx { RedisModuleString **argv; int argc; -} RedisModuleFilteredCommand; +} RedisModuleCommandFilterCtx; -typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd); +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCommandFilterCtx *filter); typedef struct RedisModuleCommandFilter { /* The module that registered the filter */ RedisModule *module; /* Filter callback function */ RedisModuleCommandFilterFunc callback; - /* Indicates a filter is active, avoid reentrancy */ - int active; } RedisModuleCommandFilter; /* Registered filters */ static list *moduleCommandFilters; -typedef struct RedisModuleCommandFilterCtx { - RedisModuleString **argv; - int argc; -} RedisModuleCommandFilterCtx; - /* -------------------------------------------------------------------------- * Prototypes * -------------------------------------------------------------------------- */ @@ -4802,16 +4793,13 @@ int moduleUnregisterUsedAPI(RedisModule *module) { /* Register a new command filter function. Filters get executed by Redis * before processing an inbound command and can be used to manipulate the - * behavior of standard Redis commands. Filters must not attempt to - * perform Redis commands or operate on the dataset, and must restrict - * themselves to manipulation of the arguments. + * behavior of standard Redis commands. */ int RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { RedisModuleCommandFilter *filter = zmalloc(sizeof(*filter)); filter->module = ctx->module; filter->callback = callback; - filter->active = 0; listAddNodeTail(moduleCommandFilters, filter); return REDISMODULE_OK; @@ -4824,26 +4812,19 @@ void moduleCallCommandFilters(client *c) { listNode *ln; listRewind(moduleCommandFilters,&li); - RedisModuleFilteredCommand cmd = { + RedisModuleCommandFilterCtx filter = { .argv = c->argv, .argc = c->argc }; while((ln = listNext(&li))) { - RedisModuleCommandFilter *filter = ln->value; - if (filter->active) continue; + RedisModuleCommandFilter *f = ln->value; - RedisModuleCtx ctx = REDISMODULE_CTX_INIT; - ctx.module = filter->module; - - filter->active = 1; - filter->callback(&ctx, &cmd); - filter->active = 0; - moduleFreeContext(&ctx); + f->callback(&filter); } - c->argv = cmd.argv; - c->argc = cmd.argc; + c->argv = filter.argv; + c->argc = filter.argc; } /* Return the number of arguments a filtered command has. The number of diff --git a/src/redismodule.h b/src/redismodule.h index 426a6df6..5df83ae6 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -163,7 +163,7 @@ typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value typedef void (*RedisModuleTypeFreeFunc)(void *value); typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len); typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data); -typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleCommandFilterCtx *filter); +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCommandFilterCtx *filter); #define REDISMODULE_TYPE_METHOD_VERSION 1 typedef struct RedisModuleTypeMethods { From 325fc1cb2e2e15a99e5d012184d177dc19257036 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 23:06:38 +0200 Subject: [PATCH 020/304] CommandFilter API: Support Lua and RM_call() flows. --- src/module.c | 20 +++++++++++++------- src/scripting.c | 5 +++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/module.c b/src/module.c index c6cb8a0c..17accfb7 100644 --- a/src/module.c +++ b/src/module.c @@ -2741,12 +2741,6 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch RedisModuleCallReply *reply = NULL; int replicate = 0; /* Replicate this command? */ - cmd = lookupCommandByCString((char*)cmdname); - if (!cmd) { - errno = EINVAL; - return NULL; - } - /* Create the client and dispatch the command. */ va_start(ap, fmt); c = createClient(-1); @@ -2760,11 +2754,23 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch c->db = ctx->client->db; c->argv = argv; c->argc = argc; - c->cmd = c->lastcmd = cmd; /* We handle the above format error only when the client is setup so that * we can free it normally. */ if (argv == NULL) goto cleanup; + /* Call command filters */ + moduleCallCommandFilters(c); + + /* Lookup command now, after filters had a chance to make modifications + * if necessary. + */ + cmd = lookupCommand(c->argv[0]->ptr); + if (!cmd) { + errno = EINVAL; + goto cleanup; + } + c->cmd = c->lastcmd = cmd; + /* Basic arity checks. */ if ((cmd->arity > 0 && cmd->arity != argc) || (argc < -cmd->arity)) { errno = EINVAL; diff --git a/src/scripting.c b/src/scripting.c index cbbf43fb..032bfdf1 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -462,6 +462,11 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { c->argc = argc; c->user = server.lua_caller->user; + /* Process module hooks */ + moduleCallCommandFilters(c); + argv = c->argv; + argc = c->argc; + /* Log the command if debugging is active. */ if (ldb.active && ldb.step) { sds cmdlog = sdsnew(""); From a9a6a894e82442600f11d97d23f70b90316ca0a4 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 23:07:28 +0200 Subject: [PATCH 021/304] CommandFilter API: hellofilter and tests. --- src/modules/hellofilter.c | 32 ++++++++++++++++++++++++++++---- tests/modules/commandfilter.tcl | 20 +++++++++++++++++++- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c index 84eb02c3..d5dd405a 100644 --- a/src/modules/hellofilter.c +++ b/src/modules/hellofilter.c @@ -6,17 +6,32 @@ static RedisModuleString *log_key_name; static const char log_command_name[] = "hellofilter.log"; +static const char ping_command_name[] = "hellofilter.ping"; +static int in_module = 0; + +int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +{ + RedisModuleCallReply *reply = RedisModule_Call(ctx, "ping", "c", "@log"); + if (reply) { + RedisModule_ReplyWithCallReply(ctx, reply); + RedisModule_FreeCallReply(reply); + } else { + RedisModule_ReplyWithSimpleString(ctx, "Unknown command or invalid arguments"); + } + + return REDISMODULE_OK; +} int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { - RedisModuleString *s = RedisModule_CreateStringFromString(ctx, argv[0]); + RedisModuleString *s = RedisModule_CreateString(ctx, "", 0); int i; for (i = 1; i < argc; i++) { size_t arglen; const char *arg = RedisModule_StringPtrLen(argv[i], &arglen); - RedisModule_StringAppendBuffer(ctx, s, " ", 1); + if (i > 1) RedisModule_StringAppendBuffer(ctx, s, " ", 1); RedisModule_StringAppendBuffer(ctx, s, arg, arglen); } @@ -25,6 +40,8 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar RedisModule_CloseKey(log); RedisModule_FreeString(ctx, s); + in_module = 1; + size_t cmdlen; const char *cmdname = RedisModule_StringPtrLen(argv[1], &cmdlen); RedisModuleCallReply *reply = RedisModule_Call(ctx, cmdname, "v", &argv[2], argc - 2); @@ -34,12 +51,15 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar } else { RedisModule_ReplyWithSimpleString(ctx, "Unknown command or invalid arguments"); } + + in_module = 0; + return REDISMODULE_OK; } -void HelloFilter_CommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterCtx *filter) +void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) { - (void) ctx; + if (in_module) return; /* don't process our own RM_Call() */ /* Fun manipulations: * - Remove @delme @@ -94,6 +114,10 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) HelloFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,ping_command_name, + HelloFilter_PingCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + if (RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter) == REDISMODULE_ERR) return REDISMODULE_ERR; diff --git a/tests/modules/commandfilter.tcl b/tests/modules/commandfilter.tcl index f0d96b25..47d9c302 100644 --- a/tests/modules/commandfilter.tcl +++ b/tests/modules/commandfilter.tcl @@ -6,7 +6,7 @@ start_server {tags {"modules"}} { test {Command Filter handles redirected commands} { r set mykey @log r lrange log-key 0 -1 - } "{hellofilter.log set mykey @log}" + } "{set mykey @log}" test {Command Filter can call RedisModule_CommandFilterArgDelete} { r rpush mylist elem1 @delme elem2 @@ -24,4 +24,22 @@ start_server {tags {"modules"}} { r rpush mylist elem1 @replaceme elem2 r lrange mylist 0 -1 } {elem1 --replaced-- elem2} + + test {Command Filter applies on RM_Call() commands} { + r del log-key + r hellofilter.ping + r lrange log-key 0 -1 + } "{ping @log}" + + test {Command Filter applies on Lua redis.call()} { + r del log-key + r eval "redis.call('ping', '@log')" 0 + r lrange log-key 0 -1 + } "{ping @log}" + + test {Command Filter applies on Lua redis.call() that calls a module} { + r del log-key + r eval "redis.call('hellofilter.ping')" 0 + r lrange log-key 0 -1 + } "{ping @log}" } From 8620a434a058aa5c66cccf2cc571e4337c73d12b Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Tue, 19 Mar 2019 13:11:37 +0200 Subject: [PATCH 022/304] Added keyspace miss notifications support --- src/db.c | 7 ++++++- src/modules/testmodule.c | 29 ++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/db.c b/src/db.c index 7950d507..8c904708 100644 --- a/src/db.c +++ b/src/db.c @@ -83,6 +83,7 @@ robj *lookupKey(redisDb *db, robj *key, int flags) { * 1. A key gets expired if it reached it's TTL. * 2. The key last access time is updated. * 3. The global keys hits/misses stats are updated (reported in INFO). + * 4. If keyspace notifications are enabled, a "miss" notification is fired. * * This API should not be used when we write to the key after obtaining * the object linked to the key, but only for read only operations. @@ -106,6 +107,7 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { * to return NULL ASAP. */ if (server.masterhost == NULL) { server.stat_keyspace_misses++; + notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); return NULL; } @@ -127,12 +129,15 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { server.current_client->cmd->flags & CMD_READONLY) { server.stat_keyspace_misses++; + notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); return NULL; } } val = lookupKey(db,key,flags); - if (val == NULL) + if (val == NULL) { server.stat_keyspace_misses++; + notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); + } else server.stat_keyspace_hits++; return val; diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index 67a86170..826dd9a7 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -109,9 +109,9 @@ int TestStringPrintf(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { if (argc < 3) { return RedisModule_WrongArity(ctx); } - RedisModuleString *s = RedisModule_CreateStringPrintf(ctx, - "Got %d args. argv[1]: %s, argv[2]: %s", - argc, + RedisModuleString *s = RedisModule_CreateStringPrintf(ctx, + "Got %d args. argv[1]: %s, argv[2]: %s", + argc, RedisModule_StringPtrLen(argv[1], NULL), RedisModule_StringPtrLen(argv[2], NULL) ); @@ -133,7 +133,7 @@ int TestUnlink(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModuleKey *k = RedisModule_OpenKey(ctx, RedisModule_CreateStringPrintf(ctx, "unlinked"), REDISMODULE_WRITE | REDISMODULE_READ); if (!k) return failTest(ctx, "Could not create key"); - + if (REDISMODULE_ERR == RedisModule_StringSet(k, RedisModule_CreateStringPrintf(ctx, "Foobar"))) { return failTest(ctx, "Could not set string value"); } @@ -152,7 +152,7 @@ int TestUnlink(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { return failTest(ctx, "Could not verify key to be unlinked"); } return RedisModule_ReplyWithSimpleString(ctx, "OK"); - + } int NotifyCallback(RedisModuleCtx *ctx, int type, const char *event, @@ -188,6 +188,10 @@ int TestNotifications(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModule_Call(ctx, "LPUSH", "cc", "l", "y"); RedisModule_Call(ctx, "LPUSH", "cc", "l", "y"); + /* Miss some keys intentionally so we will get a "miss" notification. */ + RedisModule_Call(ctx, "GET", "c", "nosuchkey"); + RedisModule_Call(ctx, "SMEMBERS", "c", "nosuchkey"); + size_t sz; const char *rep; RedisModuleCallReply *r = RedisModule_Call(ctx, "HGET", "cc", "notifications", "foo"); @@ -225,6 +229,16 @@ int TestNotifications(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { FAIL("Wrong reply for l"); } + r = RedisModule_Call(ctx, "HGET", "cc", "notifications", "nosuchkey"); + if (r == NULL || RedisModule_CallReplyType(r) != REDISMODULE_REPLY_STRING) { + FAIL("Wrong or no reply for nosuchkey"); + } else { + rep = RedisModule_CallReplyStringPtr(r, &sz); + if (sz != 1 || *rep != '2') { + FAIL("Got reply '%.*s'. expected '2'", sz, rep); + } + } + RedisModule_Call(ctx, "FLUSHDB", ""); return RedisModule_ReplyWithSimpleString(ctx, "OK"); @@ -423,7 +437,7 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) if (RedisModule_CreateCommand(ctx,"test.ctxflags", TestCtxFlags,"readonly",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; - + if (RedisModule_CreateCommand(ctx,"test.unlink", TestUnlink,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; @@ -435,7 +449,8 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) RedisModule_SubscribeToKeyspaceEvents(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_SET | - REDISMODULE_NOTIFY_STRING, + REDISMODULE_NOTIFY_STRING | + REDISMODULE_NOTIFY_GENERIC, NotifyCallback); if (RedisModule_CreateCommand(ctx,"test.notify", TestNotifications,"write deny-oom",1,1,1) == REDISMODULE_ERR) From dd8b4be46baf86dc4f5e2c787a72b9d31faecdc0 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Tue, 19 Mar 2019 19:48:47 +0200 Subject: [PATCH 023/304] CommandFilter API: Extend documentation. --- src/module.c | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/src/module.c b/src/module.c index 17accfb7..ee284022 100644 --- a/src/module.c +++ b/src/module.c @@ -4797,9 +4797,47 @@ int moduleUnregisterUsedAPI(RedisModule *module) { * Module Command Filter API * -------------------------------------------------------------------------- */ -/* Register a new command filter function. Filters get executed by Redis - * before processing an inbound command and can be used to manipulate the - * behavior of standard Redis commands. +/* Register a new command filter function. + * + * Command filtering makes it possible for modules to extend Redis by plugging + * into the execution flow of all commands. + * + * A registered filter gets called before Redis executes *any* command. This + * includes both core Redis commands and commands registered by any module. The + * filter applies in all execution paths including: + * + * 1. Invocation by a client. + * 2. Invocation through `RedisModule_Call()` by any module. + * 3. Invocation through Lua 'redis.call()`. + * 4. Replication of a command from a master. + * + * The filter executes in a special filter context, which is different and more + * limited than a RedisModuleCtx. Because the filter affects any command, it + * must be implemented in a very efficient way to reduce the performance impact + * on Redis. All Redis Module API calls that require a valid context (such as + * `RedisModule_Call()`, `RedisModule_OpenKey()`, etc.) are not supported in a + * filter context. + * + * The `RedisModuleCommandFilterCtx` can be used to inspect or modify the + * executed command and its arguments. As the filter executes before Redis + * begins processing the command, any change will affect the way the command is + * processed. For example, a module can override Redis commands this way: + * + * 1. Register a `MODULE.SET` command which implements an extended version of + * the Redis `SET` command. + * 2. Register a command filter which detects invocation of `SET` on a specific + * pattern of keys. Once detected, the filter will replace the first + * argument from `SET` to `MODULE.SET`. + * 3. When filter execution is complete, Redis considers the new command name + * and therefore executes the module's own command. + * + * Note that in the above use case, if `MODULE.SET` itself uses + * `RedisModule_Call()` the filter will be applied on that call as well. If + * that is not desired, the module itself is responsible for maintaining a flag + * to identify and avoid this form of re-entrancy. + * + * If multiple filters are registered (by the same or different modules), they + * are executed in the order of registration. */ int RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { @@ -4881,7 +4919,7 @@ int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *filter, int pos, Redi int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) { if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; - + decrRefCount(filter->argv[pos]); filter->argv[pos] = arg; @@ -4901,7 +4939,7 @@ int RM_CommandFilterArgDelete(RedisModuleCommandFilterCtx *filter, int pos) filter->argv[i] = filter->argv[i+1]; } filter->argc--; - + return REDISMODULE_OK; } From 385f6190a3a9f8d2d5775bd058aaa2173dc05c8c Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 27 Sep 2018 18:12:31 +0300 Subject: [PATCH 024/304] getKeysFromCommand for TOUCH only extracted the first key. also, airty for COMMAND command was wrong. --- src/server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server.c b/src/server.c index 712cda1b..1341ab40 100644 --- a/src/server.c +++ b/src/server.c @@ -715,7 +715,7 @@ struct redisCommand redisCommandTable[] = { {"touch",touchCommand,-2, "read-only fast @keyspace", - 0,NULL,1,1,1,0,0,0}, + 0,NULL,1,-1,1,0,0,0}, {"pttl",pttlCommand,2, "read-only fast random @keyspace", @@ -863,7 +863,7 @@ struct redisCommand redisCommandTable[] = { "no-script @keyspace", 0,NULL,0,0,0,0,0,0}, - {"command",commandCommand,0, + {"command",commandCommand,-1, "ok-loading ok-stale random @connection", 0,NULL,0,0,0,0,0,0}, From 747174388f305148b0832dd97b9754e2a64bdfef Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 27 Sep 2018 18:03:47 +0300 Subject: [PATCH 025/304] change SORT and SPOP to use lookupKeyWrite rather than lookupKeyRead like in SUNIONSTORE etc, commands that perform writes are expected to open all keys, even input keys, with lookupKeyWrite --- src/sort.c | 55 ++++++++++++++++++++++++++++++----------------------- src/t_set.c | 2 +- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/sort.c b/src/sort.c index 8608cd8b..db26da15 100644 --- a/src/sort.c +++ b/src/sort.c @@ -58,7 +58,7 @@ redisSortOperation *createSortOperation(int type, robj *pattern) { * * The returned object will always have its refcount increased by 1 * when it is non-NULL. */ -robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) { +robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst, int writeflag) { char *p, *f, *k; sds spat, ssub; robj *keyobj, *fieldobj = NULL, *o; @@ -106,7 +106,10 @@ robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) { decrRefCount(subst); /* Incremented by decodeObject() */ /* Lookup substituted key */ - o = lookupKeyRead(db,keyobj); + if (!writeflag) + o = lookupKeyRead(db,keyobj); + else + o = lookupKeyWrite(db,keyobj); if (o == NULL) goto noobj; if (fieldobj) { @@ -198,30 +201,12 @@ void sortCommand(client *c) { robj *sortval, *sortby = NULL, *storekey = NULL; redisSortObject *vector; /* Resulting vector to sort */ - /* Lookup the key to sort. It must be of the right types */ - sortval = lookupKeyRead(c->db,c->argv[1]); - if (sortval && sortval->type != OBJ_SET && - sortval->type != OBJ_LIST && - sortval->type != OBJ_ZSET) - { - addReply(c,shared.wrongtypeerr); - return; - } - /* Create a list of operations to perform for every sorted element. * Operations can be GET */ operations = listCreate(); listSetFreeMethod(operations,zfree); j = 2; /* options start at argv[2] */ - /* Now we need to protect sortval incrementing its count, in the future - * SORT may have options able to overwrite/delete keys during the sorting - * and the sorted key itself may get destroyed */ - if (sortval) - incrRefCount(sortval); - else - sortval = createQuicklistObject(); - /* The SORT command has an SQL-alike syntax, parse it */ while(j < c->argc) { int leftargs = c->argc-j-1; @@ -280,11 +265,33 @@ void sortCommand(client *c) { /* Handle syntax errors set during options parsing. */ if (syntax_error) { - decrRefCount(sortval); listRelease(operations); return; } + /* Lookup the key to sort. It must be of the right types */ + if (storekey) + sortval = lookupKeyRead(c->db,c->argv[1]); + else + sortval = lookupKeyWrite(c->db,c->argv[1]); + if (sortval && sortval->type != OBJ_SET && + sortval->type != OBJ_LIST && + sortval->type != OBJ_ZSET) + { + listRelease(operations); + addReply(c,shared.wrongtypeerr); + return; + } + + /* Now we need to protect sortval incrementing its count, in the future + * SORT may have options able to overwrite/delete keys during the sorting + * and the sorted key itself may get destroyed */ + if (sortval) + incrRefCount(sortval); + else + sortval = createQuicklistObject(); + + /* When sorting a set with no sort specified, we must sort the output * so the result is consistent across scripting and replication. * @@ -452,7 +459,7 @@ void sortCommand(client *c) { robj *byval; if (sortby) { /* lookup value to sort by */ - byval = lookupKeyByPattern(c->db,sortby,vector[j].obj); + byval = lookupKeyByPattern(c->db,sortby,vector[j].obj,storekey!=NULL); if (!byval) continue; } else { /* use object itself to sort by */ @@ -515,7 +522,7 @@ void sortCommand(client *c) { while((ln = listNext(&li))) { redisSortOperation *sop = ln->value; robj *val = lookupKeyByPattern(c->db,sop->pattern, - vector[j].obj); + vector[j].obj,storekey!=NULL); if (sop->type == SORT_OP_GET) { if (!val) { @@ -545,7 +552,7 @@ void sortCommand(client *c) { while((ln = listNext(&li))) { redisSortOperation *sop = ln->value; robj *val = lookupKeyByPattern(c->db,sop->pattern, - vector[j].obj); + vector[j].obj,storekey!=NULL); if (sop->type == SORT_OP_GET) { if (!val) val = createStringObject("",0); diff --git a/src/t_set.c b/src/t_set.c index cbe55aaa..05d9ee24 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -415,7 +415,7 @@ void spopWithCountCommand(client *c) { /* Make sure a key with the name inputted exists, and that it's type is * indeed a set. Otherwise, return nil */ - if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) + if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp])) == NULL || checkType(c,set,OBJ_SET)) return; /* If count is zero, serve an empty multibulk ASAP to avoid special From c9e2900efc1ed33727356df114fb716442ae2ce6 Mon Sep 17 00:00:00 2001 From: oranagra Date: Thu, 23 Feb 2017 03:13:44 -0800 Subject: [PATCH 026/304] bugfix to restartAOF, exit will never happen since retry will get negative. also reduce an excess sleep --- src/replication.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/replication.c b/src/replication.c index f2adc799..59e42e56 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1091,12 +1091,13 @@ void replicationCreateMasterClient(int fd, int dbid) { } void restartAOF() { - int retry = 10; - while (retry-- && startAppendOnly() == C_ERR) { + unsigned int tries, max_tries = 10; + for (tries = 0; tries < max_tries; ++tries) { + if (tries) sleep(1); + if (startAppendOnly() == C_OK) break; serverLog(LL_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second."); - sleep(1); } - if (!retry) { + if (tries == max_tries) { serverLog(LL_WARNING,"FATAL: this replica instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now."); exit(1); } From b2e03f83292e65602a6c7dcaad1f6977f39f0b30 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Wed, 20 Mar 2019 17:46:19 +0200 Subject: [PATCH 027/304] diskless replication - notify slave when rdb transfer failed in diskless replication - master was not notifing the slave that rdb transfer terminated on error, and lets slave wait for replication timeout --- src/replication.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/replication.c b/src/replication.c index f2adc799..8f0d6791 100644 --- a/src/replication.c +++ b/src/replication.c @@ -593,6 +593,7 @@ int startBgsaveForReplication(int mincapa) { client *slave = ln->value; if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) { + slave->replstate = REPL_STATE_NONE; slave->flags &= ~CLIENT_SLAVE; listDelNode(server.slaves,ln); addReplyError(slave, From 99c2fe0bcf9876daf774fa7df4939cadc7972129 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Thu, 21 Mar 2019 11:47:14 +0200 Subject: [PATCH 028/304] added special flag for keyspace miss notifications --- src/db.c | 6 +++--- src/modules/testmodule.c | 2 +- src/notify.c | 6 ++++-- src/redismodule.h | 1 + src/server.h | 4 +++- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/db.c b/src/db.c index 8c904708..afe18128 100644 --- a/src/db.c +++ b/src/db.c @@ -107,7 +107,7 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { * to return NULL ASAP. */ if (server.masterhost == NULL) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); return NULL; } @@ -129,14 +129,14 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { server.current_client->cmd->flags & CMD_READONLY) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); return NULL; } } val = lookupKey(db,key,flags); if (val == NULL) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); } else server.stat_keyspace_hits++; diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index 826dd9a7..af78d21d 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -450,7 +450,7 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_STRING | - REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_KEY_MISS, NotifyCallback); if (RedisModule_CreateCommand(ctx,"test.notify", TestNotifications,"write deny-oom",1,1,1) == REDISMODULE_ERR) diff --git a/src/notify.c b/src/notify.c index 1afb36fc..d6c3ad40 100644 --- a/src/notify.c +++ b/src/notify.c @@ -55,6 +55,7 @@ int keyspaceEventsStringToFlags(char *classes) { case 'K': flags |= NOTIFY_KEYSPACE; break; case 'E': flags |= NOTIFY_KEYEVENT; break; case 't': flags |= NOTIFY_STREAM; break; + case 'm': flags |= NOTIFY_KEY_MISS; break; default: return -1; } } @@ -81,6 +82,7 @@ sds keyspaceEventsFlagsToString(int flags) { if (flags & NOTIFY_EXPIRED) res = sdscatlen(res,"x",1); if (flags & NOTIFY_EVICTED) res = sdscatlen(res,"e",1); if (flags & NOTIFY_STREAM) res = sdscatlen(res,"t",1); + if (flags & NOTIFY_KEY_MISS) res = sdscatlen(res,"m",1); } if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1); if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1); @@ -100,12 +102,12 @@ void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid) { int len = -1; char buf[24]; - /* If any modules are interested in events, notify the module system now. + /* If any modules are interested in events, notify the module system now. * This bypasses the notifications configuration, but the module engine * will only call event subscribers if the event type matches the types * they are interested in. */ moduleNotifyKeyspaceEvent(type, event, key, dbid); - + /* If notifications for this class of events are off, return ASAP. */ if (!(server.notify_keyspace_events & type)) return; diff --git a/src/redismodule.h b/src/redismodule.h index 272da08d..681bd600 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -98,6 +98,7 @@ #define REDISMODULE_NOTIFY_EXPIRED (1<<8) /* x */ #define REDISMODULE_NOTIFY_EVICTED (1<<9) /* e */ #define REDISMODULE_NOTIFY_STREAM (1<<10) /* t */ +#define REDISMODULE_NOTIFY_KEY_MISS (1<<11) /* m */ #define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM) /* A */ diff --git a/src/server.h b/src/server.h index 56c3b67d..0b433039 100644 --- a/src/server.h +++ b/src/server.h @@ -468,7 +468,9 @@ typedef long long mstime_t; /* millisecond time type. */ #define NOTIFY_EXPIRED (1<<8) /* x */ #define NOTIFY_EVICTED (1<<9) /* e */ #define NOTIFY_STREAM (1<<10) /* t */ -#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM) /* A flag */ +#define NOTIFY_KEY_MISS (1<<11) /* m */ + +#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_KEY_MISS) /* A flag */ /* Get the first bind addr or NULL */ #define NET_FIRST_BIND_ADDR (server.bindaddr_count ? server.bindaddr[0] : NULL) From 4a0ee5c6ac7908ee41e69c2d7ace55f698d94418 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Thu, 21 Mar 2019 12:47:51 +0200 Subject: [PATCH 029/304] Added missing REDISMODULE_NOTIFY_KEY_MISS flag to REDISMODULE_NOTIFY_ALL --- src/redismodule.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redismodule.h b/src/redismodule.h index 681bd600..70011f93 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -99,7 +99,7 @@ #define REDISMODULE_NOTIFY_EVICTED (1<<9) /* e */ #define REDISMODULE_NOTIFY_STREAM (1<<10) /* t */ #define REDISMODULE_NOTIFY_KEY_MISS (1<<11) /* m */ -#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM) /* A */ +#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_KEY_MISS) /* A */ /* A special pointer that we can use between the core and the module to signal From bc269c85e1d7ff15a377aff5197a1a670c65aab9 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Thu, 21 Mar 2019 12:48:37 +0200 Subject: [PATCH 030/304] remove extra linebreak --- src/server.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server.h b/src/server.h index 0b433039..b090a637 100644 --- a/src/server.h +++ b/src/server.h @@ -469,7 +469,6 @@ typedef long long mstime_t; /* millisecond time type. */ #define NOTIFY_EVICTED (1<<9) /* e */ #define NOTIFY_STREAM (1<<10) /* t */ #define NOTIFY_KEY_MISS (1<<11) /* m */ - #define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_KEY_MISS) /* A flag */ /* Get the first bind addr or NULL */ From 9dabbd1ab072f3abced48b4995d9ef3e745f0608 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Mar 2019 12:18:55 +0100 Subject: [PATCH 031/304] Alter coding style in #4696 to conform to Redis code base. --- src/zmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zmalloc.c b/src/zmalloc.c index 4c40a778..5e601027 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -148,7 +148,7 @@ void *zrealloc(void *ptr, size_t size) { size_t oldsize; void *newptr; - if (size == 0 && ptr!=NULL) { + if (size == 0 && ptr != NULL) { zfree(ptr); return NULL; } From e2626f69eccc7addf9283285a6849f798e882af8 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Thu, 21 Mar 2019 14:44:49 +0200 Subject: [PATCH 032/304] CommandFilter API: Add unregister option. A filter handle is returned and can be used to unregister a filter. In the future it can also be used to further configure or manipulate the filter. Filters are now automatically unregistered when a module unloads. --- src/module.c | 94 +++++++++++++++++++++++++-------- src/modules/hellofilter.c | 27 ++++++++-- src/redismodule.h | 15 +++--- tests/modules/commandfilter.tcl | 22 ++++++++ 4 files changed, 126 insertions(+), 32 deletions(-) diff --git a/src/module.c b/src/module.c index ee284022..ad7bba2e 100644 --- a/src/module.c +++ b/src/module.c @@ -49,6 +49,7 @@ struct RedisModule { list *types; /* Module data types. */ list *usedby; /* List of modules using APIs from this one. */ list *using; /* List of modules we use some APIs of. */ + list *filters; /* List of filters the module has registered. */ }; typedef struct RedisModule RedisModule; @@ -748,6 +749,7 @@ void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int api module->types = listCreate(); module->usedby = listCreate(); module->using = listCreate(); + module->filters = listCreate(); ctx->module = module; } @@ -4793,6 +4795,28 @@ int moduleUnregisterUsedAPI(RedisModule *module) { return count; } +/* Unregister all filters registered by a module. + * This is called when a module is being unloaded. + * + * Returns the number of filters unregistered. */ +int moduleUnregisterFilters(RedisModule *module) { + listIter li; + listNode *ln; + int count = 0; + + listRewind(module->filters,&li); + while((ln = listNext(&li))) { + RedisModuleCommandFilter *filter = ln->value; + listNode *ln = listSearchKey(moduleCommandFilters,filter); + if (ln) { + listDelNode(moduleCommandFilters,ln); + count++; + } + zfree(filter); + } + return count; +} + /* -------------------------------------------------------------------------- * Module Command Filter API * -------------------------------------------------------------------------- */ @@ -4840,12 +4864,33 @@ int moduleUnregisterUsedAPI(RedisModule *module) { * are executed in the order of registration. */ -int RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { +RedisModuleCommandFilter *RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { RedisModuleCommandFilter *filter = zmalloc(sizeof(*filter)); filter->module = ctx->module; filter->callback = callback; listAddNodeTail(moduleCommandFilters, filter); + listAddNodeTail(ctx->module->filters, filter); + return filter; +} + +/* Unregister a command filter. + */ +int RM_UnregisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilter *filter) { + listNode *ln; + + /* A module can only remove its own filters */ + if (filter->module != ctx->module) return REDISMODULE_ERR; + + ln = listSearchKey(moduleCommandFilters,filter); + if (!ln) return REDISMODULE_ERR; + listDelNode(moduleCommandFilters,ln); + + ln = listSearchKey(ctx->module->filters,filter); + if (ln) { + listDelNode(moduleCommandFilters,ln); + } + return REDISMODULE_OK; } @@ -4874,18 +4919,18 @@ void moduleCallCommandFilters(client *c) { /* Return the number of arguments a filtered command has. The number of * arguments include the command itself. */ -int RM_CommandFilterArgsCount(RedisModuleCommandFilterCtx *filter) +int RM_CommandFilterArgsCount(RedisModuleCommandFilterCtx *fctx) { - return filter->argc; + return fctx->argc; } /* Return the specified command argument. The first argument (position 0) is * the command itself, and the rest are user-provided args. */ -const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *filter, int pos) +const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *fctx, int pos) { - if (pos < 0 || pos >= filter->argc) return NULL; - return filter->argv[pos]; + if (pos < 0 || pos >= fctx->argc) return NULL; + return fctx->argv[pos]; } /* Modify the filtered command by inserting a new argument at the specified @@ -4894,18 +4939,18 @@ const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *fil * allocated, freed or used elsewhere. */ -int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) +int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) { int i; - if (pos < 0 || pos > filter->argc) return REDISMODULE_ERR; + if (pos < 0 || pos > fctx->argc) return REDISMODULE_ERR; - filter->argv = zrealloc(filter->argv, (filter->argc+1)*sizeof(RedisModuleString *)); - for (i = filter->argc; i > pos; i--) { - filter->argv[i] = filter->argv[i-1]; + fctx->argv = zrealloc(fctx->argv, (fctx->argc+1)*sizeof(RedisModuleString *)); + for (i = fctx->argc; i > pos; i--) { + fctx->argv[i] = fctx->argv[i-1]; } - filter->argv[pos] = arg; - filter->argc++; + fctx->argv[pos] = arg; + fctx->argc++; return REDISMODULE_OK; } @@ -4916,12 +4961,12 @@ int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *filter, int pos, Redi * or used elsewhere. */ -int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) +int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) { - if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; + if (pos < 0 || pos >= fctx->argc) return REDISMODULE_ERR; - decrRefCount(filter->argv[pos]); - filter->argv[pos] = arg; + decrRefCount(fctx->argv[pos]); + fctx->argv[pos] = arg; return REDISMODULE_OK; } @@ -4929,16 +4974,16 @@ int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *filter, int pos, Red /* Modify the filtered command by deleting an argument at the specified * position. */ -int RM_CommandFilterArgDelete(RedisModuleCommandFilterCtx *filter, int pos) +int RM_CommandFilterArgDelete(RedisModuleCommandFilterCtx *fctx, int pos) { int i; - if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; + if (pos < 0 || pos >= fctx->argc) return REDISMODULE_ERR; - decrRefCount(filter->argv[pos]); - for (i = pos; i < filter->argc-1; i++) { - filter->argv[i] = filter->argv[i+1]; + decrRefCount(fctx->argv[pos]); + for (i = pos; i < fctx->argc-1; i++) { + fctx->argv[i] = fctx->argv[i+1]; } - filter->argc--; + fctx->argc--; return REDISMODULE_OK; } @@ -5041,6 +5086,7 @@ void moduleLoadFromQueue(void) { void moduleFreeModuleStructure(struct RedisModule *module) { listRelease(module->types); + listRelease(module->filters); sdsfree(module->name); zfree(module); } @@ -5132,6 +5178,7 @@ int moduleUnload(sds name) { moduleUnregisterCommands(module); moduleUnregisterSharedAPI(module); moduleUnregisterUsedAPI(module); + moduleUnregisterFilters(module); /* Remove any notification subscribers this module might have */ moduleUnsubscribeNotifications(module); @@ -5396,6 +5443,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(ExportSharedAPI); REGISTER_API(GetSharedAPI); REGISTER_API(RegisterCommandFilter); + REGISTER_API(UnregisterCommandFilter); REGISTER_API(CommandFilterArgsCount); REGISTER_API(CommandFilterArgGet); REGISTER_API(CommandFilterArgInsert); diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c index d5dd405a..9cd440df 100644 --- a/src/modules/hellofilter.c +++ b/src/modules/hellofilter.c @@ -7,10 +7,27 @@ static RedisModuleString *log_key_name; static const char log_command_name[] = "hellofilter.log"; static const char ping_command_name[] = "hellofilter.ping"; +static const char unregister_command_name[] = "hellofilter.unregister"; static int in_module = 0; +static RedisModuleCommandFilter *filter = NULL; + +int HelloFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +{ + (void) argc; + (void) argv; + + RedisModule_ReplyWithLongLong(ctx, + RedisModule_UnregisterCommandFilter(ctx, filter)); + + return REDISMODULE_OK; +} + int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + (void) argc; + (void) argv; + RedisModuleCallReply *reply = RedisModule_Call(ctx, "ping", "c", "@log"); if (reply) { RedisModule_ReplyWithCallReply(ctx, reply); @@ -115,11 +132,15 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) return REDISMODULE_ERR; if (RedisModule_CreateCommand(ctx,ping_command_name, - HelloFilter_PingCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + HelloFilter_PingCommand,"deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; - if (RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter) - == REDISMODULE_ERR) return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,unregister_command_name, + HelloFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if ((filter = RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter)) + == NULL) return REDISMODULE_ERR; return REDISMODULE_OK; } diff --git a/src/redismodule.h b/src/redismodule.h index 5df83ae6..37b7d0d5 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -151,6 +151,7 @@ typedef struct RedisModuleClusterInfo RedisModuleClusterInfo; typedef struct RedisModuleDict RedisModuleDict; typedef struct RedisModuleDictIter RedisModuleDictIter; typedef struct RedisModuleCommandFilterCtx RedisModuleCommandFilterCtx; +typedef struct RedisModuleCommandFilter RedisModuleCommandFilter; typedef int (*RedisModuleCmdFunc)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); typedef void (*RedisModuleDisconnectFunc)(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc); @@ -339,12 +340,13 @@ void REDISMODULE_API_FUNC(RedisModule_SetDisconnectCallback)(RedisModuleBlockedC void REDISMODULE_API_FUNC(RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint64_t flags); int REDISMODULE_API_FUNC(RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func); void *REDISMODULE_API_FUNC(RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname); -int REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); -int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *filter); -const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *filter, int pos); -int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgInsert)(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg); -int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgReplace)(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg); -int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgDelete)(RedisModuleCommandFilterCtx *filter, int pos); +RedisModuleCommandFilter *REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); +int REDISMODULE_API_FUNC(RedisModule_UnregisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilter *filter); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *fctx); +const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *fctx, int pos); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgInsert)(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgReplace)(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgDelete)(RedisModuleCommandFilterCtx *fctx, int pos); #endif /* This is included inline inside each Redis module. */ @@ -508,6 +510,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(ExportSharedAPI); REDISMODULE_GET_API(GetSharedAPI); REDISMODULE_GET_API(RegisterCommandFilter); + REDISMODULE_GET_API(UnregisterCommandFilter); REDISMODULE_GET_API(CommandFilterArgsCount); REDISMODULE_GET_API(CommandFilterArgGet); REDISMODULE_GET_API(CommandFilterArgInsert); diff --git a/tests/modules/commandfilter.tcl b/tests/modules/commandfilter.tcl index 47d9c302..8645d827 100644 --- a/tests/modules/commandfilter.tcl +++ b/tests/modules/commandfilter.tcl @@ -42,4 +42,26 @@ start_server {tags {"modules"}} { r eval "redis.call('hellofilter.ping')" 0 r lrange log-key 0 -1 } "{ping @log}" + + test {Command Filter is unregistered implicitly on module unload} { + r del log-key + r module unload hellofilter + r set mykey @log + r lrange log-key 0 -1 + } {} + + r module load $testmodule log-key-2 + + test {Command Filter unregister works as expected} { + # Validate reloading succeeded + r set mykey @log + assert_equal "{set mykey @log}" [r lrange log-key-2 0 -1] + + # Unregister + r hellofilter.unregister + r del log-key-2 + + r set mykey @log + r lrange log-key-2 0 -1 + } {} } From 9588fd52ac3333d0bf3243523ec9a165fa18f87e Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Mar 2019 17:18:24 +0100 Subject: [PATCH 033/304] Mostly aesthetic changes to restartAOF(). See #3829. --- src/replication.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/replication.c b/src/replication.c index 59e42e56..c25e7fa6 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1093,12 +1093,16 @@ void replicationCreateMasterClient(int fd, int dbid) { void restartAOF() { unsigned int tries, max_tries = 10; for (tries = 0; tries < max_tries; ++tries) { - if (tries) sleep(1); if (startAppendOnly() == C_OK) break; - serverLog(LL_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second."); + serverLog(LL_WARNING, + "Failed enabling the AOF after successful master synchronization! " + "Trying it again in one second."); + sleep(1); } if (tries == max_tries) { - serverLog(LL_WARNING,"FATAL: this replica instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now."); + serverLog(LL_WARNING, + "FATAL: this replica instance finished the synchronization with " + "its master, but the AOF can't be turned on. Exiting now."); exit(1); } } From b3408e9a9b1bdf8ea59bf80d715c695a113820f3 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Mar 2019 17:21:25 +0100 Subject: [PATCH 034/304] More sensible name for function: restartAOFAfterSYNC(). Related to #3829. --- src/replication.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/replication.c b/src/replication.c index c25e7fa6..a27c29a3 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1090,7 +1090,11 @@ void replicationCreateMasterClient(int fd, int dbid) { if (dbid != -1) selectDb(server.master,dbid); } -void restartAOF() { +/* This function will try to re-enable the AOF file after the + * master-replica synchronization: if it fails after multiple attempts + * the replica cannot be considered reliable and exists with an + * error. */ +void restartAOFAfterSYNC() { unsigned int tries, max_tries = 10; for (tries = 0; tries < max_tries; ++tries) { if (startAppendOnly() == C_OK) break; @@ -1289,7 +1293,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { cancelReplicationHandshake(); /* Re-enable the AOF if we disabled it earlier, in order to restore * the original configuration. */ - if (aof_is_enabled) restartAOF(); + if (aof_is_enabled) restartAOFAfterSYNC(); return; } /* Final setup of the connected slave <- master link */ @@ -1314,7 +1318,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { /* Restart the AOF subsystem now that we finished the sync. This * will trigger an AOF rewrite, and when done will start appending * to the new file. */ - if (aof_is_enabled) restartAOF(); + if (aof_is_enabled) restartAOFAfterSYNC(); } return; From 4ea3ed896b286c8f2bf192e07e1c36802a3a1c38 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Thu, 21 Mar 2019 19:45:41 +0200 Subject: [PATCH 035/304] CommandFilter API: fix UnregisterCommandFilter. --- src/module.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/module.c b/src/module.c index ad7bba2e..a54bd1ad 100644 --- a/src/module.c +++ b/src/module.c @@ -4887,9 +4887,8 @@ int RM_UnregisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilter *fi listDelNode(moduleCommandFilters,ln); ln = listSearchKey(ctx->module->filters,filter); - if (ln) { - listDelNode(moduleCommandFilters,ln); - } + if (!ln) return REDISMODULE_ERR; /* Shouldn't happen */ + listDelNode(ctx->module->filters,ln); return REDISMODULE_OK; } From 6c0a5fde3d0d90c85f086ca955f9473fe41797b3 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Thu, 21 Mar 2019 19:47:43 +0200 Subject: [PATCH 036/304] CommandFilter API: REDISMODULE_CMDFILTER_NOSELF. Add a flag to automatically protect filters from being called recursively by their own module. --- src/module.c | 28 +++++++++++++++++++++++++--- src/modules/hellofilter.c | 15 +++++++++------ src/redismodule.h | 7 ++++++- tests/modules/commandfilter.tcl | 27 ++++++++++++++++++++++----- 4 files changed, 62 insertions(+), 15 deletions(-) diff --git a/src/module.c b/src/module.c index a54bd1ad..4ff865d2 100644 --- a/src/module.c +++ b/src/module.c @@ -50,6 +50,7 @@ struct RedisModule { list *usedby; /* List of modules using APIs from this one. */ list *using; /* List of modules we use some APIs of. */ list *filters; /* List of filters the module has registered. */ + int in_call; /* RM_Call() nesting level */ }; typedef struct RedisModule RedisModule; @@ -283,6 +284,8 @@ typedef struct RedisModuleCommandFilter { RedisModule *module; /* Filter callback function */ RedisModuleCommandFilterFunc callback; + /* REDISMODULE_CMDFILTER_* flags */ + int flags; } RedisModuleCommandFilter; /* Registered filters */ @@ -2756,6 +2759,8 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch c->db = ctx->client->db; c->argv = argv; c->argc = argc; + if (ctx->module) ctx->module->in_call++; + /* We handle the above format error only when the client is setup so that * we can free it normally. */ if (argv == NULL) goto cleanup; @@ -2822,6 +2827,7 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch autoMemoryAdd(ctx,REDISMODULE_AM_REPLY,reply); cleanup: + if (ctx->module) ctx->module->in_call--; freeClient(c); return reply; } @@ -4857,17 +4863,27 @@ int moduleUnregisterFilters(RedisModule *module) { * * Note that in the above use case, if `MODULE.SET` itself uses * `RedisModule_Call()` the filter will be applied on that call as well. If - * that is not desired, the module itself is responsible for maintaining a flag - * to identify and avoid this form of re-entrancy. + * that is not desired, the `REDISMODULE_CMDFILTER_NOSELF` flag can be set when + * registering the filter. + * + * The `REDISMODULE_CMDFILTER_NOSELF` flag prevents execution flows that + * originate from the module's own `RM_Call()` from reaching the filter. This + * flag is effective for all execution flows, including nested ones, as long as + * the execution begins from the module's command context or a thread-safe + * context that is associated with a blocking command. + * + * Detached thread-safe contexts are *not* associated with the module and cannot + * be protected by this flag. * * If multiple filters are registered (by the same or different modules), they * are executed in the order of registration. */ -RedisModuleCommandFilter *RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { +RedisModuleCommandFilter *RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback, int flags) { RedisModuleCommandFilter *filter = zmalloc(sizeof(*filter)); filter->module = ctx->module; filter->callback = callback; + filter->flags = flags; listAddNodeTail(moduleCommandFilters, filter); listAddNodeTail(ctx->module->filters, filter); @@ -4908,6 +4924,12 @@ void moduleCallCommandFilters(client *c) { while((ln = listNext(&li))) { RedisModuleCommandFilter *f = ln->value; + /* Skip filter if REDISMODULE_CMDFILTER_NOSELF is set and module is + * currently processing a command. + */ + if ((f->flags & REDISMODULE_CMDFILTER_NOSELF) && f->module->in_call) continue; + + /* Call filter */ f->callback(&filter); } diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c index 9cd440df..448e1298 100644 --- a/src/modules/hellofilter.c +++ b/src/modules/hellofilter.c @@ -8,7 +8,7 @@ static RedisModuleString *log_key_name; static const char log_command_name[] = "hellofilter.log"; static const char ping_command_name[] = "hellofilter.ping"; static const char unregister_command_name[] = "hellofilter.unregister"; -static int in_module = 0; +static int in_log_command = 0; static RedisModuleCommandFilter *filter = NULL; @@ -57,7 +57,7 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar RedisModule_CloseKey(log); RedisModule_FreeString(ctx, s); - in_module = 1; + in_log_command = 1; size_t cmdlen; const char *cmdname = RedisModule_StringPtrLen(argv[1], &cmdlen); @@ -69,14 +69,14 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar RedisModule_ReplyWithSimpleString(ctx, "Unknown command or invalid arguments"); } - in_module = 0; + in_log_command = 0; return REDISMODULE_OK; } void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) { - if (in_module) return; /* don't process our own RM_Call() */ + if (in_log_command) return; /* don't process our own RM_Call() from HelloFilter_LogCommand() */ /* Fun manipulations: * - Remove @delme @@ -120,12 +120,14 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) if (RedisModule_Init(ctx,"hellofilter",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; - if (argc != 1) { + if (argc != 2) { RedisModule_Log(ctx, "warning", "Log key name not specified"); return REDISMODULE_ERR; } + long long noself = 0; log_key_name = RedisModule_CreateStringFromString(ctx, argv[0]); + RedisModule_StringToLongLong(argv[1], &noself); if (RedisModule_CreateCommand(ctx,log_command_name, HelloFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) @@ -139,7 +141,8 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) HelloFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; - if ((filter = RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter)) + if ((filter = RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter, + noself ? REDISMODULE_CMDFILTER_NOSELF : 0)) == NULL) return REDISMODULE_ERR; return REDISMODULE_OK; diff --git a/src/redismodule.h b/src/redismodule.h index 37b7d0d5..e567743a 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -132,6 +132,11 @@ * of timers that are going to expire, sorted by expire time. */ typedef uint64_t RedisModuleTimerID; +/* CommandFilter Flags */ + +/* Do filter RedisModule_Call() commands initiated by module itself. */ +#define REDISMODULE_CMDFILTER_NOSELF (1<<0) + /* ------------------------- End of common defines ------------------------ */ #ifndef REDISMODULE_CORE @@ -340,7 +345,7 @@ void REDISMODULE_API_FUNC(RedisModule_SetDisconnectCallback)(RedisModuleBlockedC void REDISMODULE_API_FUNC(RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint64_t flags); int REDISMODULE_API_FUNC(RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func); void *REDISMODULE_API_FUNC(RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname); -RedisModuleCommandFilter *REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); +RedisModuleCommandFilter *REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb, int flags); int REDISMODULE_API_FUNC(RedisModule_UnregisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilter *filter); int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *fctx); const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *fctx, int pos); diff --git a/tests/modules/commandfilter.tcl b/tests/modules/commandfilter.tcl index 8645d827..1e5c41d2 100644 --- a/tests/modules/commandfilter.tcl +++ b/tests/modules/commandfilter.tcl @@ -1,7 +1,7 @@ set testmodule [file normalize src/modules/hellofilter.so] start_server {tags {"modules"}} { - r module load $testmodule log-key + r module load $testmodule log-key 0 test {Command Filter handles redirected commands} { r set mykey @log @@ -50,18 +50,35 @@ start_server {tags {"modules"}} { r lrange log-key 0 -1 } {} - r module load $testmodule log-key-2 + r module load $testmodule log-key 0 test {Command Filter unregister works as expected} { # Validate reloading succeeded + r del log-key r set mykey @log - assert_equal "{set mykey @log}" [r lrange log-key-2 0 -1] + assert_equal "{set mykey @log}" [r lrange log-key 0 -1] # Unregister r hellofilter.unregister - r del log-key-2 + r del log-key r set mykey @log - r lrange log-key-2 0 -1 + r lrange log-key 0 -1 } {} + + r module unload hellofilter + r module load $testmodule log-key 1 + + test {Command Filter REDISMODULE_CMDFILTER_NOSELF works as expected} { + r set mykey @log + assert_equal "{set mykey @log}" [r lrange log-key 0 -1] + + r del log-key + r hellofilter.ping + assert_equal {} [r lrange log-key 0 -1] + + r eval "redis.call('hellofilter.ping')" 0 + assert_equal {} [r lrange log-key 0 -1] + } + } From 29b0a5769576327af24b592f5e2d745fe884c73c Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Tue, 21 Mar 2017 07:20:02 -0700 Subject: [PATCH 037/304] diskless fork kept streaming RDB to a disconnected slave --- src/networking.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/networking.c b/src/networking.c index c08f43e6..09cbff38 100644 --- a/src/networking.c +++ b/src/networking.c @@ -911,6 +911,16 @@ void unlinkClient(client *c) { c->client_list_node = NULL; } + /* In the case of diskless replication the fork is writing to the + * sockets and just closing the fd isn't enough, if we don't also + * shutdown the socket the fork will continue to write to the slave + * and the salve will only find out that it was disconnected when + * it will finish reading the rdb. */ + if ((c->flags & CLIENT_SLAVE) && + (c->replstate == SLAVE_STATE_WAIT_BGSAVE_END)) { + shutdown(c->fd, SHUT_RDWR); + } + /* Unregister async I/O handlers and close the socket. */ aeDeleteFileEvent(server.el,c->fd,AE_READABLE); aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); From 040e52c77f870be19792b28c73690dbe8b655b9d Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Thu, 21 Mar 2019 20:33:11 +0200 Subject: [PATCH 038/304] Renamed event name from "miss" to "keymiss" --- src/db.c | 8 ++++---- src/modules/testmodule.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/db.c b/src/db.c index afe18128..b537a29a 100644 --- a/src/db.c +++ b/src/db.c @@ -83,7 +83,7 @@ robj *lookupKey(redisDb *db, robj *key, int flags) { * 1. A key gets expired if it reached it's TTL. * 2. The key last access time is updated. * 3. The global keys hits/misses stats are updated (reported in INFO). - * 4. If keyspace notifications are enabled, a "miss" notification is fired. + * 4. If keyspace notifications are enabled, a "keymiss" notification is fired. * * This API should not be used when we write to the key after obtaining * the object linked to the key, but only for read only operations. @@ -107,7 +107,7 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { * to return NULL ASAP. */ if (server.masterhost == NULL) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); return NULL; } @@ -129,14 +129,14 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { server.current_client->cmd->flags & CMD_READONLY) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); return NULL; } } val = lookupKey(db,key,flags); if (val == NULL) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); } else server.stat_keyspace_hits++; diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index af78d21d..5381380e 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -188,7 +188,7 @@ int TestNotifications(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModule_Call(ctx, "LPUSH", "cc", "l", "y"); RedisModule_Call(ctx, "LPUSH", "cc", "l", "y"); - /* Miss some keys intentionally so we will get a "miss" notification. */ + /* Miss some keys intentionally so we will get a "keymiss" notification. */ RedisModule_Call(ctx, "GET", "c", "nosuchkey"); RedisModule_Call(ctx, "SMEMBERS", "c", "nosuchkey"); From 822a992f913484162ce508fdb073d8f2ddb6d7c8 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 24 Mar 2019 12:00:33 +0200 Subject: [PATCH 039/304] fix: missing initialization. --- src/module.c | 1 + src/modules/hellofilter.c => tests/modules/commandfilter.c | 0 tests/{modules => unit/moduleapi}/commandfilter.tcl | 0 3 files changed, 1 insertion(+) rename src/modules/hellofilter.c => tests/modules/commandfilter.c (100%) rename tests/{modules => unit/moduleapi}/commandfilter.tcl (100%) diff --git a/src/module.c b/src/module.c index ff7f27cd..f468d499 100644 --- a/src/module.c +++ b/src/module.c @@ -753,6 +753,7 @@ void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int api module->usedby = listCreate(); module->using = listCreate(); module->filters = listCreate(); + module->in_call = 0; ctx->module = module; } diff --git a/src/modules/hellofilter.c b/tests/modules/commandfilter.c similarity index 100% rename from src/modules/hellofilter.c rename to tests/modules/commandfilter.c diff --git a/tests/modules/commandfilter.tcl b/tests/unit/moduleapi/commandfilter.tcl similarity index 100% rename from tests/modules/commandfilter.tcl rename to tests/unit/moduleapi/commandfilter.tcl From ec0b6bd2c35a617101a2e874307be8ae9b504ac0 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 24 Mar 2019 12:03:03 +0200 Subject: [PATCH 040/304] Add runtest-moduleapi with commandfilter coverage. --- runtest-moduleapi | 16 +++++++++++++++ src/modules/Makefile | 7 +------ tests/modules/Makefile | 24 ++++++++++++++++++++++ tests/modules/commandfilter.c | 28 +++++++++++++------------- tests/test_helper.tcl | 1 - tests/unit/moduleapi/commandfilter.tcl | 16 +++++++-------- 6 files changed, 63 insertions(+), 29 deletions(-) create mode 100755 runtest-moduleapi create mode 100644 tests/modules/Makefile diff --git a/runtest-moduleapi b/runtest-moduleapi new file mode 100755 index 00000000..84cdb9bb --- /dev/null +++ b/runtest-moduleapi @@ -0,0 +1,16 @@ +#!/bin/sh +TCL_VERSIONS="8.5 8.6" +TCLSH="" + +for VERSION in $TCL_VERSIONS; do + TCL=`which tclsh$VERSION 2>/dev/null` && TCLSH=$TCL +done + +if [ -z $TCLSH ] +then + echo "You need tcl 8.5 or newer in order to run the Redis test" + exit 1 +fi + +make -C tests/modules && \ +$TCLSH tests/test_helper.tcl --single unit/moduleapi/commandfilter "${@}" diff --git a/src/modules/Makefile b/src/modules/Makefile index 537aa0da..4f6b50f2 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -13,7 +13,7 @@ endif .SUFFIXES: .c .so .xo .o -all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so hellofilter.so +all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so .c.xo: $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ @@ -47,11 +47,6 @@ hellodict.xo: ../redismodule.h hellodict.so: hellodict.xo -hellofilter.xo: ../redismodule.h - -hellofilter.so: hellofilter.xo - $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc - testmodule.xo: ../redismodule.h testmodule.so: testmodule.xo diff --git a/tests/modules/Makefile b/tests/modules/Makefile new file mode 100644 index 00000000..014d20af --- /dev/null +++ b/tests/modules/Makefile @@ -0,0 +1,24 @@ + +# find the OS +uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') + +# Compile flags for linux / osx +ifeq ($(uname_S),Linux) + SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_LDFLAGS ?= -shared +else + SHOBJ_CFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup +endif + +.SUFFIXES: .c .so .xo .o + +all: commandfilter.so + +.c.xo: + $(CC) -I../../src $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ + +commandfilter.xo: ../../src/redismodule.h + +commandfilter.so: commandfilter.xo + $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc diff --git a/tests/modules/commandfilter.c b/tests/modules/commandfilter.c index 448e1298..d25d49c4 100644 --- a/tests/modules/commandfilter.c +++ b/tests/modules/commandfilter.c @@ -1,18 +1,18 @@ #define REDISMODULE_EXPERIMENTAL_API -#include "../redismodule.h" +#include "redismodule.h" #include static RedisModuleString *log_key_name; -static const char log_command_name[] = "hellofilter.log"; -static const char ping_command_name[] = "hellofilter.ping"; -static const char unregister_command_name[] = "hellofilter.unregister"; +static const char log_command_name[] = "commandfilter.log"; +static const char ping_command_name[] = "commandfilter.ping"; +static const char unregister_command_name[] = "commandfilter.unregister"; static int in_log_command = 0; static RedisModuleCommandFilter *filter = NULL; -int HelloFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +int CommandFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { (void) argc; (void) argv; @@ -23,7 +23,7 @@ int HelloFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, return REDISMODULE_OK; } -int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +int CommandFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { (void) argc; (void) argv; @@ -39,7 +39,7 @@ int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a return REDISMODULE_OK; } -int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +int CommandFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModuleString *s = RedisModule_CreateString(ctx, "", 0); @@ -74,9 +74,9 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar return REDISMODULE_OK; } -void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) +void CommandFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) { - if (in_log_command) return; /* don't process our own RM_Call() from HelloFilter_LogCommand() */ + if (in_log_command) return; /* don't process our own RM_Call() from CommandFilter_LogCommand() */ /* Fun manipulations: * - Remove @delme @@ -117,7 +117,7 @@ void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) } int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { - if (RedisModule_Init(ctx,"hellofilter",1,REDISMODULE_APIVER_1) + if (RedisModule_Init(ctx,"commandfilter",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; if (argc != 2) { @@ -130,18 +130,18 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) RedisModule_StringToLongLong(argv[1], &noself); if (RedisModule_CreateCommand(ctx,log_command_name, - HelloFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + CommandFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; if (RedisModule_CreateCommand(ctx,ping_command_name, - HelloFilter_PingCommand,"deny-oom",1,1,1) == REDISMODULE_ERR) + CommandFilter_PingCommand,"deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; if (RedisModule_CreateCommand(ctx,unregister_command_name, - HelloFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + CommandFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; - if ((filter = RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter, + if ((filter = RedisModule_RegisterCommandFilter(ctx, CommandFilter_CommandFilter, noself ? REDISMODULE_CMDFILTER_NOSELF : 0)) == NULL) return REDISMODULE_ERR; diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index d2f28152..568eacde 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -63,7 +63,6 @@ set ::all_tests { unit/lazyfree unit/wait unit/pendingquerybuf - modules/commandfilter } # Index to the next test to run in the ::all_tests list. set ::next_test 0 diff --git a/tests/unit/moduleapi/commandfilter.tcl b/tests/unit/moduleapi/commandfilter.tcl index 1e5c41d2..6078f64f 100644 --- a/tests/unit/moduleapi/commandfilter.tcl +++ b/tests/unit/moduleapi/commandfilter.tcl @@ -1,4 +1,4 @@ -set testmodule [file normalize src/modules/hellofilter.so] +set testmodule [file normalize tests/modules/commandfilter.so] start_server {tags {"modules"}} { r module load $testmodule log-key 0 @@ -27,7 +27,7 @@ start_server {tags {"modules"}} { test {Command Filter applies on RM_Call() commands} { r del log-key - r hellofilter.ping + r commandfilter.ping r lrange log-key 0 -1 } "{ping @log}" @@ -39,13 +39,13 @@ start_server {tags {"modules"}} { test {Command Filter applies on Lua redis.call() that calls a module} { r del log-key - r eval "redis.call('hellofilter.ping')" 0 + r eval "redis.call('commandfilter.ping')" 0 r lrange log-key 0 -1 } "{ping @log}" test {Command Filter is unregistered implicitly on module unload} { r del log-key - r module unload hellofilter + r module unload commandfilter r set mykey @log r lrange log-key 0 -1 } {} @@ -59,14 +59,14 @@ start_server {tags {"modules"}} { assert_equal "{set mykey @log}" [r lrange log-key 0 -1] # Unregister - r hellofilter.unregister + r commandfilter.unregister r del log-key r set mykey @log r lrange log-key 0 -1 } {} - r module unload hellofilter + r module unload commandfilter r module load $testmodule log-key 1 test {Command Filter REDISMODULE_CMDFILTER_NOSELF works as expected} { @@ -74,10 +74,10 @@ start_server {tags {"modules"}} { assert_equal "{set mykey @log}" [r lrange log-key 0 -1] r del log-key - r hellofilter.ping + r commandfilter.ping assert_equal {} [r lrange log-key 0 -1] - r eval "redis.call('hellofilter.ping')" 0 + r eval "redis.call('commandfilter.ping')" 0 assert_equal {} [r lrange log-key 0 -1] } From acba2fc9b4c8082e5344d2d53e51dc4c1c37942c Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 24 Mar 2019 13:10:55 +0200 Subject: [PATCH 041/304] slave corrupts replication stream when module blocked client uses large reply (or POSTPONED_ARRAY) when redis appends the blocked client reply list to the real client, it didn't bother to check if it is in fact the master client. so a slave executing that module command will send replies to the master, causing the master to send the slave error responses, which will mess up the replication offset (slave will advance it's replication offset, and the master does not) --- src/module.c | 7 +------ src/networking.c | 13 +++++++++++++ src/server.h | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/module.c b/src/module.c index ff7f27cd..0c8197ac 100644 --- a/src/module.c +++ b/src/module.c @@ -3747,12 +3747,7 @@ void moduleHandleBlockedClients(void) { * We need to glue such replies to the client output buffer and * free the temporary client we just used for the replies. */ if (c) { - if (bc->reply_client->bufpos) - addReplyProto(c,bc->reply_client->buf, - bc->reply_client->bufpos); - if (listLength(bc->reply_client->reply)) - listJoin(c->reply,bc->reply_client->reply); - c->reply_bytes += bc->reply_client->reply_bytes; + AddReplyFromClient(c, bc->reply_client); } freeClient(bc->reply_client); diff --git a/src/networking.c b/src/networking.c index 09cbff38..7fdd1984 100644 --- a/src/networking.c +++ b/src/networking.c @@ -744,6 +744,19 @@ void addReplySubcommandSyntaxError(client *c) { sdsfree(cmd); } +/* Append 'src' client output buffers into 'dst' client output buffers. + * This function clears the output buffers of 'src' */ +void AddReplyFromClient(client *dst, client *src) { + if (prepareClientToWrite(dst) != C_OK) + return; + addReplyProto(dst,src->buf, src->bufpos); + if (listLength(src->reply)) + listJoin(dst->reply,src->reply); + dst->reply_bytes += src->reply_bytes; + src->reply_bytes = 0; + src->bufpos = 0; +} + /* Copy 'src' client output buffers into 'dst' client output buffers. * The function takes care of freeing the old output buffers of the * destination client. */ diff --git a/src/server.h b/src/server.h index 95e0355a..dfd9f769 100644 --- a/src/server.h +++ b/src/server.h @@ -1529,6 +1529,7 @@ void addReplyNullArray(client *c); void addReplyBool(client *c, int b); void addReplyVerbatim(client *c, const char *s, size_t len, const char *ext); void addReplyProto(client *c, const char *s, size_t len); +void AddReplyFromClient(client *c, client *src); void addReplyBulk(client *c, robj *obj); void addReplyBulkCString(client *c, const char *s); void addReplyBulkCBuffer(client *c, const void *p, size_t len); From 4de88828d9d64b7d64b5ee75f9fb8d25aa1dfaa5 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 28 Mar 2019 06:38:16 +0000 Subject: [PATCH 042/304] build fix --- src/networking.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/networking.c b/src/networking.c index 09cbff38..02fc44e7 100644 --- a/src/networking.c +++ b/src/networking.c @@ -29,6 +29,7 @@ #include "server.h" #include "atomicvar.h" +#include #include #include #include From 75648f99a5ba41812c115f83f8b668f030acfaee Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 17:39:22 +0200 Subject: [PATCH 043/304] Fix assert comparison in fetchClusterSlotsConfiguration(). --- src/redis-benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 12e9f7e4..4e2662f2 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -1192,7 +1192,7 @@ static int fetchClusterSlotsConfiguration(client c) { assert(reply->type == REDIS_REPLY_ARRAY); for (i = 0; i < reply->elements; i++) { redisReply *r = reply->element[i]; - assert(r->type = REDIS_REPLY_ARRAY); + assert(r->type == REDIS_REPLY_ARRAY); assert(r->elements >= 3); int from, to, slot; from = r->element[0]->integer; From f8a9708aa705b6493ef63a82e42ed428997b817a Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 18:06:50 +0200 Subject: [PATCH 044/304] ACL: regression test for #5998. --- tests/unit/acl.tcl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/acl.tcl b/tests/unit/acl.tcl index 82c75f82..90f2c9bb 100644 --- a/tests/unit/acl.tcl +++ b/tests/unit/acl.tcl @@ -108,4 +108,11 @@ start_server {tags {"acl"}} { assert_match {*+debug|segfault*} $cmdstr assert_match {*+acl*} $cmdstr } + + test {ACL regression: memory leaks adding / removing subcommands} { + r AUTH default "" + r ACL setuser newuser reset -debug +debug|a +debug|b +debug|c + r ACL setuser newuser -debug + # The test framework will detect a leak if any. + } } From c24e32041b91ac32626e8d8eee1c062942e25f27 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 18:08:37 +0200 Subject: [PATCH 045/304] ACL: Fix memory leak in ACLResetSubcommandsForCommand(). This commit fixes bug reported at #5998. Thanks to @tomcat1102. --- src/acl.c | 2 ++ tests/unit/acl.tcl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/acl.c b/src/acl.c index d9f431f4..0205e51a 100644 --- a/src/acl.c +++ b/src/acl.c @@ -542,6 +542,8 @@ struct redisCommand *ACLLookupCommand(const char *name) { * and command ID. */ void ACLResetSubcommandsForCommand(user *u, unsigned long id) { if (u->allowed_subcommands && u->allowed_subcommands[id]) { + for (int i = 0; u->allowed_subcommands[id][i]; i++) + sdsfree(u->allowed_subcommands[id][i]); zfree(u->allowed_subcommands[id]); u->allowed_subcommands[id] = NULL; } diff --git a/tests/unit/acl.tcl b/tests/unit/acl.tcl index 90f2c9bb..05844143 100644 --- a/tests/unit/acl.tcl +++ b/tests/unit/acl.tcl @@ -109,7 +109,7 @@ start_server {tags {"acl"}} { assert_match {*+acl*} $cmdstr } - test {ACL regression: memory leaks adding / removing subcommands} { + test {ACL #5998 regression: memory leaks adding / removing subcommands} { r AUTH default "" r ACL setuser newuser reset -debug +debug|a +debug|b +debug|c r ACL setuser newuser -debug From d490752d58ecd0a815bdbbb350b550919fcc7a4a Mon Sep 17 00:00:00 2001 From: yongman Date: Tue, 9 Apr 2019 09:24:22 +0800 Subject: [PATCH 046/304] Fix memleak in bitfieldCommand --- src/bitops.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bitops.c b/src/bitops.c index 8d03a769..ee1ce046 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -994,12 +994,18 @@ void bitfieldCommand(client *c) { /* Lookup for read is ok if key doesn't exit, but errors * if it's not a string. */ o = lookupKeyRead(c->db,c->argv[1]); - if (o != NULL && checkType(c,o,OBJ_STRING)) return; + if (o != NULL && checkType(c,o,OBJ_STRING)) { + zfree(ops); + return; + } } else { /* Lookup by making room up to the farest bit reached by * this operation. */ if ((o = lookupStringForBitCommand(c, - highest_write_offset)) == NULL) return; + highest_write_offset)) == NULL) { + zfree(ops); + return; + } } addReplyArrayLen(c,numops); From 9e67691ffb4709535b56a089a973c3f89bfbe231 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Apr 2019 18:53:27 +0200 Subject: [PATCH 047/304] Aesthetic change to #5962 to conform to Redis style. --- src/module.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/module.c b/src/module.c index 0c8197ac..1e7c0eca 100644 --- a/src/module.c +++ b/src/module.c @@ -3746,9 +3746,7 @@ void moduleHandleBlockedClients(void) { * replies to send to the client in a thread safe context. * We need to glue such replies to the client output buffer and * free the temporary client we just used for the replies. */ - if (c) { - AddReplyFromClient(c, bc->reply_client); - } + if (c) AddReplyFromClient(c, bc->reply_client); freeClient(bc->reply_client); if (c != NULL) { From 3ccdcbc0880e4f6a6e577bed29826064d03c9509 Mon Sep 17 00:00:00 2001 From: James Rouzier Date: Thu, 11 Apr 2019 12:19:02 -0400 Subject: [PATCH 048/304] Fix start and end key initialize --- src/t_stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index f4ace87a..9e7d3d12 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -492,14 +492,14 @@ void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamI streamEncodeID(si->start_key,start); } else { si->start_key[0] = 0; - si->start_key[0] = 0; + si->start_key[1] = 0; } if (end) { streamEncodeID(si->end_key,end); } else { si->end_key[0] = UINT64_MAX; - si->end_key[0] = UINT64_MAX; + si->end_key[1] = UINT64_MAX; } /* Seek the correct node in the radix tree. */ From 487601d85d95acf71414dee8328e65e8b4fafe08 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 15 Apr 2019 16:50:26 +0200 Subject: [PATCH 049/304] Test: disable module testing for now. --- tests/test_helper.tcl | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index d2f28152..568eacde 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -63,7 +63,6 @@ set ::all_tests { unit/lazyfree unit/wait unit/pendingquerybuf - modules/commandfilter } # Index to the next test to run in the ::all_tests list. set ::next_test 0 From 26d9d4ec2c38c12dd99733db4e415a8bcfd01dc0 Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Tue, 16 Apr 2019 17:15:23 +0300 Subject: [PATCH 050/304] Adds RedisModule_ReplyWithCString Signed-off-by: Itamar Haber --- src/module.c | 11 +++++++++++ src/redismodule.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/src/module.c b/src/module.c index c2952167..ed4613af 100644 --- a/src/module.c +++ b/src/module.c @@ -1242,6 +1242,17 @@ int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len) { return REDISMODULE_OK; } +/* Reply with a bulk string, taking in input a C buffer pointer that is + * assumed to be null-terminated. + * + * The function always returns REDISMODULE_OK. */ +int RM_ReplyWithCString(RedisModuleCtx *ctx, const char *buf) { + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; + addReplyBulkCBuffer(c,(char*)buf,strlen(buf)); + return REDISMODULE_OK; +} + /* Reply with a bulk string, taking in input a RedisModuleString object. * * The function always returns REDISMODULE_OK. */ diff --git a/src/redismodule.h b/src/redismodule.h index 259a5f1d..5c7643de 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -226,6 +226,7 @@ int REDISMODULE_API_FUNC(RedisModule_ReplyWithSimpleString)(RedisModuleCtx *ctx, int REDISMODULE_API_FUNC(RedisModule_ReplyWithArray)(RedisModuleCtx *ctx, long len); void REDISMODULE_API_FUNC(RedisModule_ReplySetArrayLength)(RedisModuleCtx *ctx, long len); int REDISMODULE_API_FUNC(RedisModule_ReplyWithStringBuffer)(RedisModuleCtx *ctx, const char *buf, size_t len); +int REDISMODULE_API_FUNC(RedisModule_ReplyWithCString)(RedisModuleCtx *ctx, const char *buf); int REDISMODULE_API_FUNC(RedisModule_ReplyWithString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_ReplyWithNull)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_ReplyWithDouble)(RedisModuleCtx *ctx, double d); @@ -376,6 +377,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(ReplyWithArray); REDISMODULE_GET_API(ReplySetArrayLength); REDISMODULE_GET_API(ReplyWithStringBuffer); + REDISMODULE_GET_API(ReplyWithCString); REDISMODULE_GET_API(ReplyWithString); REDISMODULE_GET_API(ReplyWithNull); REDISMODULE_GET_API(ReplyWithCallReply); From c184b32d4c8a59614b29d11c72ba2007c8d147d5 Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Tue, 16 Apr 2019 17:38:33 +0300 Subject: [PATCH 051/304] Uses addReplyBulkCString Signed-off-by: Itamar Haber --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index ed4613af..d46f484c 100644 --- a/src/module.c +++ b/src/module.c @@ -1249,7 +1249,7 @@ int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len) { int RM_ReplyWithCString(RedisModuleCtx *ctx, const char *buf) { client *c = moduleGetReplyClient(ctx); if (c == NULL) return REDISMODULE_OK; - addReplyBulkCBuffer(c,(char*)buf,strlen(buf)); + addReplyBulkCString(c,(char*)buf); return REDISMODULE_OK; } From 52686f48664e8a01e556e6a7ee52013816514a26 Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Tue, 16 Apr 2019 22:16:12 +0300 Subject: [PATCH 052/304] Adds a "Modules" section to `INFO` Fixes #6012. As long as "INFO is broken", this should be adequate IMO. Once we rework `INFO`, perhaps into RESP3, this implementation should be revisited. --- src/module.c | 19 +++++++++++++++++++ src/server.c | 7 +++++++ src/server.h | 1 + 3 files changed, 27 insertions(+) diff --git a/src/module.c b/src/module.c index c2952167..60c9a046 100644 --- a/src/module.c +++ b/src/module.c @@ -5244,6 +5244,25 @@ void addReplyLoadedModules(client *c) { dictReleaseIterator(di); } +/* Helper function for the INFO command: adds loaded modules as to info's + * output. + * + * After the call, the passed sds info string is no longer valid and all the + * references must be substituted with the new pointer returned by the call. */ +sds genModulesInfoString(sds info) { + dictIterator *di = dictGetIterator(modules); + dictEntry *de; + + while ((de = dictNext(di)) != NULL) { + sds name = dictGetKey(de); + struct RedisModule *module = dictGetVal(de); + + info = sdscatprintf(info, "module:name=%s,ver=%d\r\n", name, module->ver); + } + dictReleaseIterator(di); + return info; +} + /* Redis MODULE command. * * MODULE LOAD [args...] */ diff --git a/src/server.c b/src/server.c index fb5d679c..49a65ef5 100644 --- a/src/server.c +++ b/src/server.c @@ -4291,6 +4291,13 @@ sds genRedisInfoString(char *section) { (long)c_ru.ru_utime.tv_sec, (long)c_ru.ru_utime.tv_usec); } + /* Modules */ + if (allsections || defsections || !strcasecmp(section,"modules")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info,"# Modules\r\n"); + info = genModulesInfoString(info); + } + /* Command statistics */ if (allsections || !strcasecmp(section,"commandstats")) { if (sections++) info = sdscat(info,"\r\n"); diff --git a/src/server.h b/src/server.h index dfd9f769..d832c646 100644 --- a/src/server.h +++ b/src/server.h @@ -2268,6 +2268,7 @@ void bugReportStart(void); void serverLogObjectDebugInfo(const robj *o); void sigsegvHandler(int sig, siginfo_t *info, void *secret); sds genRedisInfoString(char *section); +sds genModulesInfoString(sds info); void enableWatchdog(int period); void disableWatchdog(void); void watchdogScheduleSignal(int period); From 11f2c6b115e108202bd9c3c7e94038d4afd029a8 Mon Sep 17 00:00:00 2001 From: chendianqiang Date: Wed, 17 Apr 2019 21:20:10 +0800 Subject: [PATCH 053/304] stop ping when client pause --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index 9175bb42..237103d9 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2579,7 +2579,7 @@ void replicationCron(void) { /* First, send PING according to ping_slave_period. */ if ((replication_cron_loops % server.repl_ping_slave_period) == 0 && - listLength(server.slaves)) + listLength(server.slaves) && !clientsArePaused()) { ping_argv[0] = createStringObject("PING",4); replicationFeedSlaves(server.slaves, server.slaveseldb, From fa97ef34ca5899ef482c543d7122a41fba8e4509 Mon Sep 17 00:00:00 2001 From: git-hulk Date: Tue, 23 Apr 2019 20:08:14 +0800 Subject: [PATCH 054/304] FIX: core dump in redis-benchmark when the `-r` is the last arg --- src/redis-benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 4e2662f2..2759e6a3 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -1294,7 +1294,7 @@ int parseOptions(int argc, const char **argv) { if (*p < '0' || *p > '9') goto invalid; } config.randomkeys = 1; - config.randomkeys_keyspacelen = atoi(argv[++i]); + config.randomkeys_keyspacelen = atoi(next); if (config.randomkeys_keyspacelen < 0) config.randomkeys_keyspacelen = 0; } else if (!strcmp(argv[i],"-q")) { From bc36404c79b50e80583d4a98a7211bd239fc88f9 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Fri, 26 Apr 2019 18:50:51 +0800 Subject: [PATCH 055/304] update --- src/redis-benchmark.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 7bac6fdd..4e2662f2 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -418,11 +418,6 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { if (redisBufferRead(c->context) != REDIS_OK) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); - } - else if (NULL != strstr(c->context->reader->buf,"NOAUTH")) - { - fprintf(stderr,"Error: %s\n",c->context->reader->buf); - exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { From 4e38ced4886446efb70f96685a6a6dfa344095d4 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Fri, 26 Apr 2019 19:47:07 +0800 Subject: [PATCH 056/304] fix: benchmark auth fails when server have requirepass --- src/redis-benchmark.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 4e2662f2..e4134c9e 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -254,6 +254,19 @@ static redisConfig *getRedisConfig(const char *ip, int port, else fprintf(stderr,"%s: %s\n",hostsocket,err); goto fail; } + + if(config.auth){ + void *authReply = NULL; + redisAppendCommand(c, "AUTH %s", config.auth); + if (REDIS_OK != redisGetReply(c, &authReply)) goto fail; + if (reply) freeReplyObject(reply); + reply = ((redisReply *) authReply); + if (reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr, "ERROR: %s\n", reply->str); + goto fail; + } + } + redisAppendCommand(c, "CONFIG GET %s", "save"); redisAppendCommand(c, "CONFIG GET %s", "appendonly"); int i = 0; From 162208f94d57b95ef57a3615549f244691213234 Mon Sep 17 00:00:00 2001 From: abhay Date: Thu, 25 Apr 2019 13:50:25 +0530 Subject: [PATCH 057/304] removed obsolete warning as per - https://github.com/antirez/redis/issues/5291 --- redis.conf | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/redis.conf b/redis.conf index 5ea91590..06051076 100644 --- a/redis.conf +++ b/redis.conf @@ -942,13 +942,7 @@ aof-use-rdb-preamble yes lua-time-limit 5000 ################################ REDIS CLUSTER ############################### -# -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however -# in order to mark it as "mature" we need to wait for a non trivial percentage -# of users to deploy it in production. -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# + # Normal Redis instances can't be part of a Redis Cluster; only nodes that are # started as cluster nodes can. In order to start a Redis instance as a # cluster node enable the cluster support uncommenting the following: From bcac165fabcbec43843800e3f2fcb69a201d8b50 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Mon, 29 Apr 2019 14:38:28 +0800 Subject: [PATCH 058/304] aof: enhance AOF_FSYNC_EVERYSEC, more details in #5985 --- src/aof.c | 34 +++++++++++++++++++++++++++++++--- src/server.h | 1 + 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/aof.c b/src/aof.c index 615eebd0..4744847d 100644 --- a/src/aof.c +++ b/src/aof.c @@ -197,6 +197,12 @@ ssize_t aofRewriteBufferWrite(int fd) { * AOF file implementation * ------------------------------------------------------------------------- */ +/* Return true if an AOf fsync is currently already in progress in a + * BIO thread. */ +int aofFsyncInProgress(void) { + return bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; +} + /* Starts a background task that performs fsync() against the specified * file descriptor (the one of the AOF file) in another thread. */ void aof_background_fsync(int fd) { @@ -335,10 +341,24 @@ void flushAppendOnlyFile(int force) { int sync_in_progress = 0; mstime_t latency; - if (sdslen(server.aof_buf) == 0) return; + if (sdslen(server.aof_buf) == 0) { + /* Check if we need to do fsync even the aof buffer is empty, + * because previously in AOF_FSYNC_EVERYSEC mode, fsync is + * called only when aof buffer is not empty, so if users + * stop write commands before fsync called in one second, + * the data in page cache cannot be flushed in time. */ + if (server.aof_fsync == AOF_FSYNC_EVERYSEC && + server.aof_fsync_offset != server.aof_current_size && + server.unixtime > server.aof_last_fsync && + !(sync_in_progress = aofFsyncInProgress())) { + goto try_fsync; + } else { + return; + } + } if (server.aof_fsync == AOF_FSYNC_EVERYSEC) - sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; + sync_in_progress = aofFsyncInProgress(); if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { /* With this append fsync policy we do background fsyncing. @@ -470,6 +490,7 @@ void flushAppendOnlyFile(int force) { server.aof_buf = sdsempty(); } +try_fsync: /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are * children doing I/O in the background. */ if (server.aof_no_fsync_on_rewrite && @@ -484,10 +505,14 @@ void flushAppendOnlyFile(int force) { redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */ latencyEndMonitor(latency); latencyAddSampleIfNeeded("aof-fsync-always",latency); + server.aof_fsync_offset = server.aof_current_size; server.aof_last_fsync = server.unixtime; } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && server.unixtime > server.aof_last_fsync)) { - if (!sync_in_progress) aof_background_fsync(server.aof_fd); + if (!sync_in_progress) { + aof_background_fsync(server.aof_fd); + server.aof_fsync_offset = server.aof_current_size; + } server.aof_last_fsync = server.unixtime; } } @@ -694,6 +719,7 @@ int loadAppendOnlyFile(char *filename) { * operation is received. */ if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) { server.aof_current_size = 0; + server.aof_fsync_offset = server.aof_current_size; fclose(fp); return C_ERR; } @@ -832,6 +858,7 @@ loaded_ok: /* DB loaded, cleanup and return C_OK to the caller. */ stopLoading(); aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; + server.aof_fsync_offset = server.aof_current_size; return C_OK; readerr: /* Read error. If feof(fp) is true, fall through to unexpected EOF. */ @@ -1741,6 +1768,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { server.aof_selected_db = -1; /* Make sure SELECT is re-issued */ aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; + server.aof_current_size = server.aof_current_size; /* Clear regular AOF buffer since its contents was just written to * the new AOF from the background rewrite buffer. */ diff --git a/src/server.h b/src/server.h index dfd9f769..e7f01b2e 100644 --- a/src/server.h +++ b/src/server.h @@ -1140,6 +1140,7 @@ struct redisServer { off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */ off_t aof_current_size; /* AOF current size. */ + off_t aof_fsync_offset; /* AOF offset which is already synced to disk. */ int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */ pid_t aof_child_pid; /* PID if rewriting process */ list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */ From ba809f26d4bd81d23fa929d0c018f235ab298564 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 5 May 2019 08:19:52 +0300 Subject: [PATCH 059/304] make replication tests more stable on slow machines solving few replication related tests race conditions which fail on slow machines bugfix in slave buffers test: since the test is executed twice, each time with a different commands count, the threshold for the delta can't be a constant. --- tests/integration/psync2.tcl | 5 ++++- tests/integration/replication-psync.tcl | 26 +++++++++++++++++++++++++ tests/unit/maxmemory.tcl | 7 ++++--- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 8663d6fc..d1212b64 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -166,12 +166,15 @@ start_server {} { # Pick a random slave set slave_id [expr {($master_id+1)%5}] set sync_count [status $R($master_id) sync_full] + set sync_partial [status $R($master_id) sync_partial_ok] catch { $R($slave_id) config rewrite $R($slave_id) debug restart } + # note: just waiting for connected_slaves==4 has a race condition since + # we might do the check before the master realized that the slave disconnected wait_for_condition 50 1000 { - [status $R($master_id) connected_slaves] == 4 + [status $R($master_id) sync_partial_ok] == $sync_partial + 1 } else { fail "Replica not reconnecting" } diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl index a3bce2a4..bf868244 100644 --- a/tests/integration/replication-psync.tcl +++ b/tests/integration/replication-psync.tcl @@ -79,6 +79,32 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec stop_bg_complex_data $load_handle0 stop_bg_complex_data $load_handle1 stop_bg_complex_data $load_handle2 + + # Wait for the slave to reach the "online" + # state from the POV of the master. + set retry 5000 + while {$retry} { + set info [$master info] + if {[string match {*slave0:*state=online*} $info]} { + break + } else { + incr retry -1 + after 100 + } + } + if {$retry == 0} { + error "assertion:Slave not correctly synchronized" + } + + # Wait that slave acknowledge it is online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. (-LOADING error) + wait_for_condition 5000 100 { + [lindex [$slave role] 3] eq {connected} + } else { + fail "Slave still not connected after some time" + } + set retry 10 while {$retry && ([$master debug digest] ne [$slave debug digest])}\ { diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl index 1def57af..0f64ddc1 100644 --- a/tests/unit/maxmemory.tcl +++ b/tests/unit/maxmemory.tcl @@ -161,7 +161,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} } # make sure master doesn't disconnect slave because of timeout - $master config set repl-timeout 300 ;# 5 minutes + $master config set repl-timeout 1200 ;# 20 minutes (for valgrind and slow machines) $master config set maxmemory-policy allkeys-random $master config set client-output-buffer-limit "replica 100000000 100000000 300" $master config set repl-backlog-size [expr {10*1024}] @@ -212,7 +212,8 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} assert {[$master dbsize] == 100} assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers - assert {$delta < 50*1024 && $delta > -50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB + set delta_max [expr {$cmd_count / 2}] ;# 1 byte unaccounted for, with 1M commands will consume some 1MB + assert {$delta < $delta_max && $delta > -$delta_max} $master client kill type slave set killed_used [s -1 used_memory] @@ -221,7 +222,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}] set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}] assert {$killed_slave_buf == 0} - assert {$delta_no_repl > -50*1024 && $delta_no_repl < 50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB + assert {$delta_no_repl > -$delta_max && $delta_no_repl < $delta_max} } # unfreeze slave process (after the 'test' succeeded or failed, but before we attempt to terminate the server From 9f3679880a7cebc3ce73142e2e19ae3e1150f457 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 5 May 2019 20:32:53 +0300 Subject: [PATCH 060/304] Preserve client->id for blocked clients. --- src/module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index c2952167..7dee7e77 100644 --- a/src/module.c +++ b/src/module.c @@ -3866,7 +3866,10 @@ RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) { * in order to keep things like the currently selected database and similar * things. */ ctx->client = createClient(-1); - if (bc) selectDb(ctx->client,bc->dbid); + if (bc) { + selectDb(ctx->client,bc->dbid); + ctx->client->id = bc->client->id; + } return ctx; } From 1c0913dc4e22701726b3a39386a17a83058ad24c Mon Sep 17 00:00:00 2001 From: WuYunlong Date: Mon, 6 May 2019 11:46:07 +0800 Subject: [PATCH 061/304] Do not active expire keys in the background when the switch is off. --- src/server.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/server.c b/src/server.c index fb5d679c..eaa7172a 100644 --- a/src/server.c +++ b/src/server.c @@ -1674,10 +1674,12 @@ void clientsCron(void) { void databasesCron(void) { /* Expire keys by random sampling. Not required for slaves * as master will synthesize DELs for us. */ - if (server.active_expire_enabled && server.masterhost == NULL) { - activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); - } else if (server.masterhost != NULL) { - expireSlaveKeys(); + if (server.active_expire_enabled) { + if (server.masterhost == NULL) { + activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); + } else { + expireSlaveKeys(); + } } /* Defrag keys gradually. */ From b1c7e3393d8c6bfd357c981b11b7a84426ccff0d Mon Sep 17 00:00:00 2001 From: liaotonglang Date: Mon, 6 May 2019 17:15:49 +0800 Subject: [PATCH 062/304] delete sdsTest() from REDIS_TEST sdsTest() defined in sds.c dit not match the call in server.c. remove it from REDIS_TEST, since test-sds defined in Makefile. --- src/server.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/server.c b/src/server.c index fb5d679c..674eef20 100644 --- a/src/server.c +++ b/src/server.c @@ -4718,8 +4718,6 @@ int main(int argc, char **argv) { return sha1Test(argc, argv); } else if (!strcasecmp(argv[2], "util")) { return utilTest(argc, argv); - } else if (!strcasecmp(argv[2], "sds")) { - return sdsTest(argc, argv); } else if (!strcasecmp(argv[2], "endianconv")) { return endianconvTest(argc, argv); } else if (!strcasecmp(argv[2], "crc64")) { From f468e653b5e683f945b0a4a6665c3155cc768a45 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Oct 2017 08:35:05 +0200 Subject: [PATCH 063/304] Threaded IO: implement handleClientsWithPendingWritesUsingThreads(). This is just an experiment for now, there are a couple of race conditions, mostly harmless for the performance gain experiment that this commit represents so far. The general idea here is to take Redis single threaded and instead fan-out on expansive kernel calls: write(2) in this case, but the same concept could be easily implemented for read(2) and protcol parsing. However just threading writes like in this commit, is enough to evaluate if the approach is sounding. --- src/networking.c | 156 +++++++++++++++++++++++++++++++++++++++++++++-- src/server.c | 11 ++-- src/server.h | 4 ++ 3 files changed, 162 insertions(+), 9 deletions(-) diff --git a/src/networking.c b/src/networking.c index ffb43562..3958e4f5 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1065,9 +1065,17 @@ void freeClient(client *c) { * a context where calling freeClient() is not possible, because the client * should be valid for the continuation of the flow of the program. */ void freeClientAsync(client *c) { + /* We need to handle concurrent access to the server.clients_to_close list + * only in the freeClientAsync() function, since it's the only function that + * may access the list while Redis uses I/O threads. All the other accesses + * are in the context of the main thread while the other threads are + * idle. */ + static pthread_mutex_t async_free_queue_mutex = PTHREAD_MUTEX_INITIALIZER; if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_LUA) return; c->flags |= CLIENT_CLOSE_ASAP; + pthread_mutex_lock(&async_free_queue_mutex); listAddNodeTail(server.clients_to_close,c); + pthread_mutex_unlock(&async_free_queue_mutex); } void freeClientsInAsyncFreeQueue(void) { @@ -1091,7 +1099,12 @@ client *lookupClientByID(uint64_t id) { } /* Write data in output buffers to client. Return C_OK if the client - * is still valid after the call, C_ERR if it was freed. */ + * is still valid after the call, C_ERR if it was freed because of some + * error. + * + * This function is called by threads, but always with handler_installed + * set to 0. So when handler_installed is set to 0 the function must be + * thread safe. */ int writeToClient(int fd, client *c, int handler_installed) { ssize_t nwritten = 0, totwritten = 0; size_t objlen; @@ -1153,14 +1166,15 @@ int writeToClient(int fd, client *c, int handler_installed) { zmalloc_used_memory() < server.maxmemory) && !(c->flags & CLIENT_SLAVE)) break; } + /* FIXME: Fixme, use atomic var for this. */ server.stat_net_output_bytes += totwritten; if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; } else { - serverLog(LL_VERBOSE, - "Error writing to client: %s", strerror(errno)); - freeClient(c); + // serverLog(LL_VERBOSE, + // "Error writing to client: %s", strerror(errno)); + freeClientAsync(c); return C_ERR; } } @@ -1173,11 +1187,15 @@ int writeToClient(int fd, client *c, int handler_installed) { } if (!clientHasPendingReplies(c)) { c->sentlen = 0; + /* Note that writeToClient() is called in a threaded way, but + * adDeleteFileEvent() is not thread safe: however writeToClient() + * is always called with handler_installed set to 0 from threads + * so we are fine. */ if (handler_installed) aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); /* Close connection after entire reply has been sent. */ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) { - freeClient(c); + freeClientAsync(c); return C_ERR; } } @@ -2452,3 +2470,131 @@ int processEventsWhileBlocked(void) { } return count; } + +/* ============================================================================= + * Threaded I/O + * =========================================================================== */ + +#define SERVER_MAX_IO_THREADS 32 + +pthread_t io_threads[SERVER_MAX_IO_THREADS]; +pthread_mutex_t io_threads_done_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t io_threads_done_cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t io_threads_idle_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t io_threads_idle_cond = PTHREAD_COND_INITIALIZER; +pthread_cond_t io_threads_start_cond = PTHREAD_COND_INITIALIZER; +int io_threads_done = 0; /* Number of threads that completed the work. */ +int io_threads_idle = 0; /* Number of threads in idle state ready to go. */ +list *io_threads_list[SERVER_MAX_IO_THREADS]; + +void *IOThreadMain(void *myid) { + /* The ID is the thread number (from 0 to server.iothreads_num-1), and is + * used by the thread to just manipulate a single sub-array of clients. */ + long id = (unsigned long)myid; + + while(1) { + /* ... Wait for start ... */ + pthread_mutex_lock(&io_threads_idle_mutex); + io_threads_idle++; + pthread_cond_signal(&io_threads_idle_cond); + printf("[%ld] Waiting start...\n", id); + pthread_cond_wait(&io_threads_start_cond,&io_threads_idle_mutex); + printf("[%ld] Started\n", id); + pthread_mutex_unlock(&io_threads_idle_mutex); + printf("%d to handle\n", (int)listLength(io_threads_list[id])); + + /* ... Process ... */ + listIter li; + listNode *ln; + listRewind(io_threads_list[id],&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + writeToClient(c->fd,c,0); + } + listEmpty(io_threads_list[id]); + + /* Report success. */ + pthread_mutex_lock(&io_threads_done_mutex); + io_threads_done++; + pthread_cond_signal(&io_threads_done_cond); + pthread_mutex_unlock(&io_threads_done_mutex); + printf("[%ld] Done\n", id); + } +} + +/* Initialize the data structures needed for threaded I/O. */ +void initThreadedIO(void) { + pthread_t tid; + + server.io_threads_num = 4; + for (int i = 0; i < server.io_threads_num; i++) { + if (pthread_create(&tid,NULL,IOThreadMain,(void*)(long)i) != 0) { + serverLog(LL_WARNING,"Fatal: Can't initialize IO thread."); + exit(1); + } + io_threads[i] = tid; + io_threads_list[i] = listCreate(); + } +} + +int handleClientsWithPendingWritesUsingThreads(void) { + int processed = listLength(server.clients_pending_write); + if (processed == 0) return 0; /* Return ASAP if there are no clients. */ + + printf("%d TOTAL\n", processed); + + /* Wait for all threads to be ready. */ + pthread_mutex_lock(&io_threads_idle_mutex); + while(io_threads_idle < server.io_threads_num) { + pthread_cond_wait(&io_threads_idle_cond,&io_threads_idle_mutex); + } + printf("All threads are idle: %d\n", io_threads_idle); + io_threads_idle = 0; + pthread_mutex_unlock(&io_threads_idle_mutex); + + /* Distribute the clients across N different lists. */ + listIter li; + listNode *ln; + listRewind(server.clients_pending_write,&li); + int item_id = 0; + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + c->flags &= ~CLIENT_PENDING_WRITE; + int target_id = item_id % server.io_threads_num; + listAddNodeTail(io_threads_list[target_id],c); + item_id++; + } + + /* Start all threads. */ + printf("Send start condition\n"); + pthread_mutex_lock(&io_threads_done_mutex); + io_threads_done = 0; + pthread_cond_broadcast(&io_threads_start_cond); + pthread_mutex_unlock(&io_threads_done_mutex); + + /* Wait for all threads to end their work. */ + pthread_mutex_lock(&io_threads_done_mutex); + while(io_threads_done < server.io_threads_num) { + pthread_cond_wait(&io_threads_done_cond,&io_threads_done_mutex); + } + pthread_mutex_unlock(&io_threads_done_mutex); + printf("All threads finshed\n"); + + /* Run the list of clients again to install the write handler where + * needed. */ + listRewind(server.clients_pending_write,&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + + /* Install the write handler if there are pending writes in some + * of the clients. */ + if (clientHasPendingReplies(c) && + aeCreateFileEvent(server.el, c->fd, AE_WRITABLE, + sendReplyToClient, c) == AE_ERR) + { + freeClientAsync(c); + } + } + listEmpty(server.clients_pending_write); + return processed; +} diff --git a/src/server.c b/src/server.c index fb5d679c..c437880d 100644 --- a/src/server.c +++ b/src/server.c @@ -1981,9 +1981,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { flushAppendOnlyFile(0); } - /* Close clients that need to be closed asynchronous */ - freeClientsInAsyncFreeQueue(); - /* Clear the paused clients flag if needed. */ clientsArePaused(); /* Don't check return value, just use the side effect.*/ @@ -2075,7 +2072,12 @@ void beforeSleep(struct aeEventLoop *eventLoop) { flushAppendOnlyFile(0); /* Handle writes with pending output buffers. */ - handleClientsWithPendingWrites(); + /* XXX: Put a condition based on number of waiting clients: if we + * have less than a given number of clients, use non threaded code. */ + handleClientsWithPendingWritesUsingThreads(); + + /* Close clients that need to be closed asynchronous */ + freeClientsInAsyncFreeQueue(); /* Before we are going to sleep, let the threads access the dataset by * releasing the GIL. Redis main thread will not touch anything at this @@ -2861,6 +2863,7 @@ void initServer(void) { slowlogInit(); latencyMonitorInit(); bioInit(); + initThreadedIO(); server.initial_memory_usage = zmalloc_used_memory(); } diff --git a/src/server.h b/src/server.h index dfd9f769..d2a563c9 100644 --- a/src/server.h +++ b/src/server.h @@ -1062,6 +1062,8 @@ struct redisServer { int protected_mode; /* Don't accept external connections. */ int gopher_enabled; /* If true the server will reply to gopher queries. Will still serve RESP2 queries. */ + int io_threads_num; /* Number of IO threads to use. */ + /* RDB / AOF loading information */ int loading; /* We are loading data from disk if true */ off_t loading_total_bytes; @@ -1576,12 +1578,14 @@ void pauseClients(mstime_t duration); int clientsArePaused(void); int processEventsWhileBlocked(void); int handleClientsWithPendingWrites(void); +int handleClientsWithPendingWritesUsingThreads(void); int clientHasPendingReplies(client *c); void unlinkClient(client *c); int writeToClient(int fd, client *c, int handler_installed); void linkClient(client *c); void protectClient(client *c); void unprotectClient(client *c); +void initThreadedIO(void); #ifdef __GNUC__ void addReplyErrorFormat(client *c, const char *fmt, ...) From a2dbd9bd977b814ed69500538c3125c51c4963b5 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 Mar 2019 12:16:13 +0100 Subject: [PATCH 064/304] Threaded IO: allow to disable debug printf. --- src/networking.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/networking.c b/src/networking.c index 3958e4f5..5dfaaf8b 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2471,9 +2471,11 @@ int processEventsWhileBlocked(void) { return count; } -/* ============================================================================= +/* ========================================================================== * Threaded I/O - * =========================================================================== */ + * ========================================================================== */ + +int tio_debug = 0; #define SERVER_MAX_IO_THREADS 32 @@ -2497,11 +2499,11 @@ void *IOThreadMain(void *myid) { pthread_mutex_lock(&io_threads_idle_mutex); io_threads_idle++; pthread_cond_signal(&io_threads_idle_cond); - printf("[%ld] Waiting start...\n", id); + if (tio_debug) printf("[%ld] Waiting start...\n", id); pthread_cond_wait(&io_threads_start_cond,&io_threads_idle_mutex); - printf("[%ld] Started\n", id); + if (tio_debug) printf("[%ld] Started\n", id); pthread_mutex_unlock(&io_threads_idle_mutex); - printf("%d to handle\n", (int)listLength(io_threads_list[id])); + if (tio_debug) printf("%d to handle\n", (int)listLength(io_threads_list[id])); /* ... Process ... */ listIter li; @@ -2518,7 +2520,7 @@ void *IOThreadMain(void *myid) { io_threads_done++; pthread_cond_signal(&io_threads_done_cond); pthread_mutex_unlock(&io_threads_done_mutex); - printf("[%ld] Done\n", id); + if (tio_debug) printf("[%ld] Done\n", id); } } @@ -2541,14 +2543,14 @@ int handleClientsWithPendingWritesUsingThreads(void) { int processed = listLength(server.clients_pending_write); if (processed == 0) return 0; /* Return ASAP if there are no clients. */ - printf("%d TOTAL\n", processed); + if (tio_debug) printf("%d TOTAL\n", processed); /* Wait for all threads to be ready. */ pthread_mutex_lock(&io_threads_idle_mutex); while(io_threads_idle < server.io_threads_num) { pthread_cond_wait(&io_threads_idle_cond,&io_threads_idle_mutex); } - printf("All threads are idle: %d\n", io_threads_idle); + if (tio_debug) printf("All threads are idle: %d\n", io_threads_idle); io_threads_idle = 0; pthread_mutex_unlock(&io_threads_idle_mutex); @@ -2566,7 +2568,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { } /* Start all threads. */ - printf("Send start condition\n"); + if (tio_debug) printf("Send start condition\n"); pthread_mutex_lock(&io_threads_done_mutex); io_threads_done = 0; pthread_cond_broadcast(&io_threads_start_cond); @@ -2578,7 +2580,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { pthread_cond_wait(&io_threads_done_cond,&io_threads_done_mutex); } pthread_mutex_unlock(&io_threads_done_mutex); - printf("All threads finshed\n"); + if (tio_debug) printf("All threads finshed\n"); /* Run the list of clients again to install the write handler where * needed. */ From 6f4f36c0fb9498cee4289655036f6dd12a0bbebb Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 Mar 2019 12:56:48 +0100 Subject: [PATCH 065/304] Threaded IO: second attempt without signaling conditions. --- src/networking.c | 104 +++++++++++++++++++++++++---------------------- src/server.c | 2 - 2 files changed, 55 insertions(+), 51 deletions(-) diff --git a/src/networking.c b/src/networking.c index 5dfaaf8b..cd241dac 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2480,13 +2480,9 @@ int tio_debug = 0; #define SERVER_MAX_IO_THREADS 32 pthread_t io_threads[SERVER_MAX_IO_THREADS]; -pthread_mutex_t io_threads_done_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t io_threads_done_cond = PTHREAD_COND_INITIALIZER; -pthread_mutex_t io_threads_idle_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t io_threads_idle_cond = PTHREAD_COND_INITIALIZER; -pthread_cond_t io_threads_start_cond = PTHREAD_COND_INITIALIZER; -int io_threads_done = 0; /* Number of threads that completed the work. */ -int io_threads_idle = 0; /* Number of threads in idle state ready to go. */ +pthread_mutex_t io_threads_mutex[SERVER_MAX_IO_THREADS]; +_Atomic unsigned long io_threads_pending[SERVER_MAX_IO_THREADS]; +int io_threads_active; list *io_threads_list[SERVER_MAX_IO_THREADS]; void *IOThreadMain(void *myid) { @@ -2496,30 +2492,23 @@ void *IOThreadMain(void *myid) { while(1) { /* ... Wait for start ... */ - pthread_mutex_lock(&io_threads_idle_mutex); - io_threads_idle++; - pthread_cond_signal(&io_threads_idle_cond); - if (tio_debug) printf("[%ld] Waiting start...\n", id); - pthread_cond_wait(&io_threads_start_cond,&io_threads_idle_mutex); - if (tio_debug) printf("[%ld] Started\n", id); - pthread_mutex_unlock(&io_threads_idle_mutex); - if (tio_debug) printf("%d to handle\n", (int)listLength(io_threads_list[id])); + pthread_mutex_lock(&io_threads_mutex[id]); + if (io_threads_pending[id]) { + if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); - /* ... Process ... */ - listIter li; - listNode *ln; - listRewind(io_threads_list[id],&li); - while((ln = listNext(&li))) { - client *c = listNodeValue(ln); - writeToClient(c->fd,c,0); + /* ... Process ... */ + listIter li; + listNode *ln; + listRewind(io_threads_list[id],&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + writeToClient(c->fd,c,0); + io_threads_pending[id]--; + } + listEmpty(io_threads_list[id]); } - listEmpty(io_threads_list[id]); - /* Report success. */ - pthread_mutex_lock(&io_threads_done_mutex); - io_threads_done++; - pthread_cond_signal(&io_threads_done_cond); - pthread_mutex_unlock(&io_threads_done_mutex); + pthread_mutex_unlock(&io_threads_mutex[id]); if (tio_debug) printf("[%ld] Done\n", id); } } @@ -2529,30 +2518,50 @@ void initThreadedIO(void) { pthread_t tid; server.io_threads_num = 4; + io_threads_active = 0; /* We start with threads not active. */ for (int i = 0; i < server.io_threads_num; i++) { + pthread_mutex_init(&io_threads_mutex[i],NULL); + io_threads_pending[i] = 0; + io_threads_list[i] = listCreate(); + pthread_mutex_lock(&io_threads_mutex[i]); /* Thread will be stopped. */ if (pthread_create(&tid,NULL,IOThreadMain,(void*)(long)i) != 0) { serverLog(LL_WARNING,"Fatal: Can't initialize IO thread."); exit(1); } io_threads[i] = tid; - io_threads_list[i] = listCreate(); } } +void startThreadedIO(void) { + if (tio_debug) printf("--- STARTING THREADED IO ---\n"); + serverAssert(io_threads_active == 0); + for (int j = 0; j < server.io_threads_num; j++) + pthread_mutex_unlock(&io_threads_mutex[j]); + io_threads_active = 1; +} + +void stopThreadedIO(void) { + if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); + serverAssert(io_threads_active == 1); + for (int j = 0; j < server.io_threads_num; j++) + pthread_mutex_lock(&io_threads_mutex[j]); + io_threads_active = 0; +} + int handleClientsWithPendingWritesUsingThreads(void) { int processed = listLength(server.clients_pending_write); if (processed == 0) return 0; /* Return ASAP if there are no clients. */ - if (tio_debug) printf("%d TOTAL\n", processed); - - /* Wait for all threads to be ready. */ - pthread_mutex_lock(&io_threads_idle_mutex); - while(io_threads_idle < server.io_threads_num) { - pthread_cond_wait(&io_threads_idle_cond,&io_threads_idle_mutex); + /* If we have just a few clients to serve, don't use I/O threads, but the + * boring synchronous code. */ + if (processed < (server.io_threads_num*2)) { + if (io_threads_active) stopThreadedIO(); + return handleClientsWithPendingWrites(); + } else { + if (!io_threads_active) startThreadedIO(); } - if (tio_debug) printf("All threads are idle: %d\n", io_threads_idle); - io_threads_idle = 0; - pthread_mutex_unlock(&io_threads_idle_mutex); + + if (tio_debug) printf("%d TOTAL pending clients\n", processed); /* Distribute the clients across N different lists. */ listIter li; @@ -2563,23 +2572,20 @@ int handleClientsWithPendingWritesUsingThreads(void) { client *c = listNodeValue(ln); c->flags &= ~CLIENT_PENDING_WRITE; int target_id = item_id % server.io_threads_num; + pthread_mutex_lock(&io_threads_mutex[target_id]); listAddNodeTail(io_threads_list[target_id],c); + io_threads_pending[target_id]++; + pthread_mutex_unlock(&io_threads_mutex[target_id]); item_id++; } - /* Start all threads. */ - if (tio_debug) printf("Send start condition\n"); - pthread_mutex_lock(&io_threads_done_mutex); - io_threads_done = 0; - pthread_cond_broadcast(&io_threads_start_cond); - pthread_mutex_unlock(&io_threads_done_mutex); - /* Wait for all threads to end their work. */ - pthread_mutex_lock(&io_threads_done_mutex); - while(io_threads_done < server.io_threads_num) { - pthread_cond_wait(&io_threads_done_cond,&io_threads_done_mutex); + while(1) { + unsigned long pending = 0; + for (int j = 0; j < server.io_threads_num; j++) + pending += io_threads_pending[j]; + if (pending == 0) break; } - pthread_mutex_unlock(&io_threads_done_mutex); if (tio_debug) printf("All threads finshed\n"); /* Run the list of clients again to install the write handler where diff --git a/src/server.c b/src/server.c index c437880d..de5a814d 100644 --- a/src/server.c +++ b/src/server.c @@ -2072,8 +2072,6 @@ void beforeSleep(struct aeEventLoop *eventLoop) { flushAppendOnlyFile(0); /* Handle writes with pending output buffers. */ - /* XXX: Put a condition based on number of waiting clients: if we - * have less than a given number of clients, use non threaded code. */ handleClientsWithPendingWritesUsingThreads(); /* Close clients that need to be closed asynchronous */ From ea35a81c42a738a73ec4505b69e1b0d16e31fb34 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 Mar 2019 16:33:23 +0100 Subject: [PATCH 066/304] Threaded IO: 3rd version: use the mutex only to stop the thread. --- src/networking.c | 52 ++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/src/networking.c b/src/networking.c index cd241dac..17d6b186 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2491,24 +2491,34 @@ void *IOThreadMain(void *myid) { long id = (unsigned long)myid; while(1) { - /* ... Wait for start ... */ - pthread_mutex_lock(&io_threads_mutex[id]); - if (io_threads_pending[id]) { - if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); - - /* ... Process ... */ - listIter li; - listNode *ln; - listRewind(io_threads_list[id],&li); - while((ln = listNext(&li))) { - client *c = listNodeValue(ln); - writeToClient(c->fd,c,0); - io_threads_pending[id]--; - } - listEmpty(io_threads_list[id]); + /* Wait for start */ + for (int j = 0; j < 1000000; j++) { + if (io_threads_pending[id] != 0) break; } - pthread_mutex_unlock(&io_threads_mutex[id]); + /* Give the main thread a chance to stop this thread. */ + if (io_threads_pending[id] == 0) { + pthread_mutex_lock(&io_threads_mutex[id]); + pthread_mutex_unlock(&io_threads_mutex[id]); + continue; + } + + serverAssert(io_threads_pending[id] != 0); + + if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); + + /* Process: note that the main thread will never touch our list + * before we drop the pending count to 0. */ + listIter li; + listNode *ln; + listRewind(io_threads_list[id],&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + writeToClient(c->fd,c,0); + } + listEmpty(io_threads_list[id]); + io_threads_pending[id] = 0; + if (tio_debug) printf("[%ld] Done\n", id); } } @@ -2572,13 +2582,17 @@ int handleClientsWithPendingWritesUsingThreads(void) { client *c = listNodeValue(ln); c->flags &= ~CLIENT_PENDING_WRITE; int target_id = item_id % server.io_threads_num; - pthread_mutex_lock(&io_threads_mutex[target_id]); listAddNodeTail(io_threads_list[target_id],c); - io_threads_pending[target_id]++; - pthread_mutex_unlock(&io_threads_mutex[target_id]); item_id++; } + /* Give the start condition to the waiting threads, by setting the + * start condition atomic var. */ + for (int j = 0; j < server.io_threads_num; j++) { + int count = listLength(io_threads_list[j]); + io_threads_pending[j] = count; + } + /* Wait for all threads to end their work. */ while(1) { unsigned long pending = 0; From 9bf7f302a77e69bad40c3d13639537049ece433c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 25 Mar 2019 17:05:06 +0000 Subject: [PATCH 067/304] Threaded IO: stop threads when no longer needed + C11 in Makefile. Now threads are stopped even when the connections drop immediately to zero, not allowing the networking code to detect the condition and stop the threads. serverCron() will handle that. --- src/Makefile | 2 +- src/networking.c | 29 ++++++++++++++++++++++++----- src/server.c | 3 +++ src/server.h | 1 + 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/Makefile b/src/Makefile index 93cfdc28..1c80e547 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,7 +20,7 @@ DEPENDENCY_TARGETS=hiredis linenoise lua NODEPS:=clean distclean # Default settings -STD=-std=c99 -pedantic -DREDIS_STATIC='' +1TD=-std=c11 -pedantic -DREDIS_STATIC='' ifneq (,$(findstring clang,$(CC))) ifneq (,$(findstring FreeBSD,$(uname_S))) STD+=-Wno-c11-extensions diff --git a/src/networking.c b/src/networking.c index 17d6b186..d61e1f04 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2527,7 +2527,7 @@ void *IOThreadMain(void *myid) { void initThreadedIO(void) { pthread_t tid; - server.io_threads_num = 4; + server.io_threads_num = 8; io_threads_active = 0; /* We start with threads not active. */ for (int i = 0; i < server.io_threads_num; i++) { pthread_mutex_init(&io_threads_mutex[i],NULL); @@ -2543,6 +2543,7 @@ void initThreadedIO(void) { } void startThreadedIO(void) { + printf("S"); fflush(stdout); if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(io_threads_active == 0); for (int j = 0; j < server.io_threads_num; j++) @@ -2551,6 +2552,7 @@ void startThreadedIO(void) { } void stopThreadedIO(void) { + printf("E"); fflush(stdout); if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); serverAssert(io_threads_active == 1); for (int j = 0; j < server.io_threads_num; j++) @@ -2558,19 +2560,36 @@ void stopThreadedIO(void) { io_threads_active = 0; } +/* This function checks if there are not enough pending clients to justify + * taking the I/O threads active: in that case I/O threads are stopped if + * currently active. + * + * The function returns 0 if the I/O threading should be used becuase there + * are enough active threads, otherwise 1 is returned and the I/O threads + * could be possibly stopped (if already active) as a side effect. */ +int stopThreadedIOIfNeeded(void) { + int pending = listLength(server.clients_pending_write); + if (pending < (server.io_threads_num*2)) { + if (io_threads_active) stopThreadedIO(); + return 1; + } else { + return 0; + } +} + int handleClientsWithPendingWritesUsingThreads(void) { int processed = listLength(server.clients_pending_write); if (processed == 0) return 0; /* Return ASAP if there are no clients. */ /* If we have just a few clients to serve, don't use I/O threads, but the * boring synchronous code. */ - if (processed < (server.io_threads_num*2)) { - if (io_threads_active) stopThreadedIO(); + if (stopThreadedIOIfNeeded()) { return handleClientsWithPendingWrites(); - } else { - if (!io_threads_active) startThreadedIO(); } + /* Start threads if needed. */ + if (!io_threads_active) startThreadedIO(); + if (tio_debug) printf("%d TOTAL pending clients\n", processed); /* Distribute the clients across N different lists. */ diff --git a/src/server.c b/src/server.c index de5a814d..325c9010 100644 --- a/src/server.c +++ b/src/server.c @@ -2001,6 +2001,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { migrateCloseTimedoutSockets(); } + /* Stop the I/O threads if we don't have enough pending work. */ + stopThreadedIOIfNeeded(); + /* Start a scheduled BGSAVE if the corresponding flag is set. This is * useful when we are forced to postpone a BGSAVE because an AOF * rewrite is in progress. diff --git a/src/server.h b/src/server.h index d2a563c9..96ee3788 100644 --- a/src/server.h +++ b/src/server.h @@ -1579,6 +1579,7 @@ int clientsArePaused(void); int processEventsWhileBlocked(void); int handleClientsWithPendingWrites(void); int handleClientsWithPendingWritesUsingThreads(void); +int stopThreadedIOIfNeeded(void); int clientHasPendingReplies(client *c); void unlinkClient(client *c); int writeToClient(int fd, client *c, int handler_installed); From 30091dc29f5a1aa9e751ab5cbec0b525cd4d0f49 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Mar 2019 18:39:13 +0100 Subject: [PATCH 068/304] Threaded IO: use main thread if num of threads is 1. --- src/networking.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index d61e1f04..916f29eb 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2525,11 +2525,16 @@ void *IOThreadMain(void *myid) { /* Initialize the data structures needed for threaded I/O. */ void initThreadedIO(void) { - pthread_t tid; - server.io_threads_num = 8; io_threads_active = 0; /* We start with threads not active. */ + + /* Don't spawn any thread if the user selected a single thread: + * we'll handle I/O directly from the main thread. */ + if (server.io_threads_num == 1) return; + + /* Spawn the I/O threads. */ for (int i = 0; i < server.io_threads_num; i++) { + pthread_t tid; pthread_mutex_init(&io_threads_mutex[i],NULL); io_threads_pending[i] = 0; io_threads_list[i] = listCreate(); @@ -2569,6 +2574,10 @@ void stopThreadedIO(void) { * could be possibly stopped (if already active) as a side effect. */ int stopThreadedIOIfNeeded(void) { int pending = listLength(server.clients_pending_write); + + /* Return ASAP if IO threads are disabled (single threaded mode). */ + if (server.io_threads_num == 1) return 0; + if (pending < (server.io_threads_num*2)) { if (io_threads_active) stopThreadedIO(); return 1; From 9814b2a5f3e91eafb21ff1fe865a161abf71045f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Mar 2019 18:58:45 +0100 Subject: [PATCH 069/304] Threaded IO: make num of I/O threads configurable. --- src/config.c | 7 +++++++ src/networking.c | 3 +-- src/server.c | 1 + src/server.h | 1 + 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/config.c b/src/config.c index 1e052559..c4a18f3b 100644 --- a/src/config.c +++ b/src/config.c @@ -313,6 +313,11 @@ void loadServerConfigFromString(char *config) { if (server.dbnum < 1) { err = "Invalid number of databases"; goto loaderr; } + } else if (!strcasecmp(argv[0],"io-threads") && argc == 2) { + server.io_threads_num = atoi(argv[1]); + if (server.io_threads_num < 1 || server.io_threads_num > 512) { + err = "Invalid number of I/O threads"; goto loaderr; + } } else if (!strcasecmp(argv[0],"include") && argc == 2) { loadServerConfig(argv[1],NULL); } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) { @@ -1426,6 +1431,7 @@ void configGetCommand(client *c) { config_get_numerical_field("cluster-announce-bus-port",server.cluster_announce_bus_port); config_get_numerical_field("tcp-backlog",server.tcp_backlog); config_get_numerical_field("databases",server.dbnum); + config_get_numerical_field("io-threads",server.io_threads_num); config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period); config_get_numerical_field("repl-ping-replica-period",server.repl_ping_slave_period); config_get_numerical_field("repl-timeout",server.repl_timeout); @@ -2239,6 +2245,7 @@ int rewriteConfig(char *path) { rewriteConfigSaveOption(state); rewriteConfigUserOption(state); rewriteConfigNumericalOption(state,"databases",server.dbnum,CONFIG_DEFAULT_DBNUM); + rewriteConfigNumericalOption(state,"io-threads",server.dbnum,CONFIG_DEFAULT_IO_THREADS_NUM); rewriteConfigYesNoOption(state,"stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err,CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR); rewriteConfigYesNoOption(state,"rdbcompression",server.rdb_compression,CONFIG_DEFAULT_RDB_COMPRESSION); rewriteConfigYesNoOption(state,"rdbchecksum",server.rdb_checksum,CONFIG_DEFAULT_RDB_CHECKSUM); diff --git a/src/networking.c b/src/networking.c index 916f29eb..275338a6 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2525,7 +2525,6 @@ void *IOThreadMain(void *myid) { /* Initialize the data structures needed for threaded I/O. */ void initThreadedIO(void) { - server.io_threads_num = 8; io_threads_active = 0; /* We start with threads not active. */ /* Don't spawn any thread if the user selected a single thread: @@ -2576,7 +2575,7 @@ int stopThreadedIOIfNeeded(void) { int pending = listLength(server.clients_pending_write); /* Return ASAP if IO threads are disabled (single threaded mode). */ - if (server.io_threads_num == 1) return 0; + if (server.io_threads_num == 1) return 1; if (pending < (server.io_threads_num*2)) { if (io_threads_active) stopThreadedIO(); diff --git a/src/server.c b/src/server.c index 325c9010..f6d2b47f 100644 --- a/src/server.c +++ b/src/server.c @@ -2317,6 +2317,7 @@ void initServerConfig(void) { server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL; server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO; server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; + server.io_threads_num = CONFIG_DEFAULT_IO_THREADS_NUM; unsigned int lruclock = getLRUClock(); atomicSet(server.lruclock,lruclock); diff --git a/src/server.h b/src/server.h index 96ee3788..2e4de2bb 100644 --- a/src/server.h +++ b/src/server.h @@ -87,6 +87,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_TCP_BACKLOG 511 /* TCP listen backlog. */ #define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */ #define CONFIG_DEFAULT_DBNUM 16 +#define CONFIG_DEFAULT_IO_THREADS_NUM 1 /* Single threaded by default */ #define CONFIG_MAX_LINE 1024 #define CRON_DBS_PER_CALL 16 #define NET_MAX_WRITES_PER_EVENT (1024*64) From 74591fb5bddc995dfaa51c05b3362e7675187b0f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Mar 2019 18:59:39 +0100 Subject: [PATCH 070/304] Threaded IO: hide more debugging printfs under conditional. --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 275338a6..caffd3be 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2547,7 +2547,7 @@ void initThreadedIO(void) { } void startThreadedIO(void) { - printf("S"); fflush(stdout); + if (tio_debug) printf("S"); fflush(stdout); if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(io_threads_active == 0); for (int j = 0; j < server.io_threads_num; j++) @@ -2556,7 +2556,7 @@ void startThreadedIO(void) { } void stopThreadedIO(void) { - printf("E"); fflush(stdout); + if (tio_debug) printf("E"); fflush(stdout); if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); serverAssert(io_threads_active == 1); for (int j = 0; j < server.io_threads_num; j++) From dd5b105c73a02389987e457cebbeaa801ba16977 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 30 Mar 2019 11:26:58 +0100 Subject: [PATCH 071/304] Threaded IO: read side WIP. --- src/evict.c | 2 +- src/networking.c | 61 ++++++++++++++++++++++++++++++++++++++---------- src/server.c | 30 ++++++++++-------------- src/server.h | 28 +++++++++++----------- 4 files changed, 75 insertions(+), 46 deletions(-) diff --git a/src/evict.c b/src/evict.c index 773916ce..176f4c36 100644 --- a/src/evict.c +++ b/src/evict.c @@ -78,7 +78,7 @@ unsigned int getLRUClock(void) { unsigned int LRU_CLOCK(void) { unsigned int lruclock; if (1000/server.hz <= LRU_CLOCK_RESOLUTION) { - atomicGet(server.lruclock,lruclock); + lruclock = server.lruclock; } else { lruclock = getLRUClock(); } diff --git a/src/networking.c b/src/networking.c index caffd3be..fd4e990f 100644 --- a/src/networking.c +++ b/src/networking.c @@ -35,6 +35,7 @@ #include static void setProtocolError(const char *errstr, client *c); +int postponeClientRead(client *c); /* Return the size consumed from the allocator, for the specified SDS string, * including internal fragmentation. This function is used in order to compute @@ -105,8 +106,7 @@ client *createClient(int fd) { } selectDb(c,0); - uint64_t client_id; - atomicGetIncr(server.next_client_id,client_id,1); + uint64_t client_id = ++server.next_client_id; c->id = client_id; c->resp = 2; c->fd = fd; @@ -950,6 +950,14 @@ void unlinkClient(client *c) { c->flags &= ~CLIENT_PENDING_WRITE; } + /* Remove from the list of pending reads if needed. */ + if (c->flags & CLIENT_PENDING_READ) { + ln = listSearchKey(server.clients_pending_read,c); + serverAssert(ln != NULL); + listDelNode(server.clients_pending_read,ln); + c->flags &= ~CLIENT_PENDING_READ; + } + /* When client was just unblocked because of a blocking operation, * remove it from the list of unblocked clients. */ if (c->flags & CLIENT_UNBLOCKED) { @@ -1642,13 +1650,19 @@ void processInputBuffer(client *c) { } /* This is a wrapper for processInputBuffer that also cares about handling - * the replication forwarding to the sub-slaves, in case the client 'c' + * the replication forwarding to the sub-replicas, in case the client 'c' * is flagged as master. Usually you want to call this instead of the * raw processInputBuffer(). */ void processInputBufferAndReplicate(client *c) { if (!(c->flags & CLIENT_MASTER)) { processInputBuffer(c); } else { + /* If the client is a master we need to compute the difference + * between the applied offset before and after processing the buffer, + * to understand how much of the replication stream was actually + * applied to the master state: this quantity, and its corresponding + * part of the replication stream, will be propagated to the + * sub-replicas and to the replication backlog. */ size_t prev_offset = c->reploff; processInputBuffer(c); size_t applied = c->reploff - prev_offset; @@ -1667,6 +1681,10 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { UNUSED(el); UNUSED(mask); + /* Check if we want to read from the client later when exiting from + * the event loop. This is the case if threaded I/O is enabled. */ + if (postponeClientRead(c)) return; + readlen = PROTO_IOBUF_LEN; /* If this is a multi bulk request, and we are processing a bulk reply * that is large enough, try to maximize the probability that the query @@ -1716,20 +1734,21 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty(); bytes = sdscatrepr(bytes,c->querybuf,64); - serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); +// FIXME: This may be called from an I/O thread and it is not safe to +// log from there for now. +// serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); freeClient(c); return; } - /* Time to process the buffer. If the client is a master we need to - * compute the difference between the applied offset before and after - * processing the buffer, to understand how much of the replication stream - * was actually applied to the master state: this quantity, and its - * corresponding part of the replication stream, will be propagated to - * the sub-slaves and to the replication backlog. */ - processInputBufferAndReplicate(c); + /* There is more data in the client input buffer, continue parsing it + * in case to check if there is a full command to execute. + * Don't do it if the client is flagged as CLIENT_PENDING_READ: it means + * we are currently in the context of an I/O thread. */ + if (!(c->flags & CLIENT_PENDING_READ)) + processInputBufferAndReplicate(c); } void getClientsMaxBuffers(unsigned long *longest_output_list, @@ -2566,7 +2585,9 @@ void stopThreadedIO(void) { /* This function checks if there are not enough pending clients to justify * taking the I/O threads active: in that case I/O threads are stopped if - * currently active. + * currently active. We track the pending writes as a measure of clients + * we need to handle in parallel, however the I/O threading is disabled + * globally for reads as well if we have too little pending clients. * * The function returns 0 if the I/O threading should be used becuase there * are enough active threads, otherwise 1 is returned and the I/O threads @@ -2647,3 +2668,19 @@ int handleClientsWithPendingWritesUsingThreads(void) { listEmpty(server.clients_pending_write); return processed; } + +/* Return 1 if we want to handle the client read later using threaded I/O. + * This is called by the readable handler of the event loop. + * As a side effect of calling this function the client is put in the + * pending read clients and flagged as such. */ +int postponeClientRead(client *c) { + if (io_threads_active && + !(c->flags & (CLIENT_MASTER|CLIENT_SLAVE|CLIENT_PENDING_READ))) + { + c->flags |= CLIENT_PENDING_READ; + listAddNodeHead(server.clients_pending_read,c); + return 1; + } else { + return 0; + } +} diff --git a/src/server.c b/src/server.c index f6d2b47f..ef6b85c4 100644 --- a/src/server.c +++ b/src/server.c @@ -1728,16 +1728,17 @@ void databasesCron(void) { * every object access, and accuracy is not needed. To access a global var is * a lot faster than calling time(NULL) */ void updateCachedTime(void) { - time_t unixtime = time(NULL); - atomicSet(server.unixtime,unixtime); + server.unixtime = time(NULL); server.mstime = mstime(); - /* To get information about daylight saving time, we need to call localtime_r - * and cache the result. However calling localtime_r in this context is safe - * since we will never fork() while here, in the main thread. The logging - * function will call a thread safe version of localtime that has no locks. */ + /* To get information about daylight saving time, we need to call + * localtime_r and cache the result. However calling localtime_r in this + * context is safe since we will never fork() while here, in the main + * thread. The logging function will call a thread safe version of + * localtime that has no locks. */ struct tm tm; - localtime_r(&server.unixtime,&tm); + time_t ut = server.unixtime; + localtime_r(&ut,&tm); server.daylight_active = tm.tm_isdst; } @@ -1807,8 +1808,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * * Note that you can change the resolution altering the * LRU_CLOCK_RESOLUTION define. */ - unsigned long lruclock = getLRUClock(); - atomicSet(server.lruclock,lruclock); + server.lruclock = getLRUClock(); /* Record the max memory used since the server was started. */ if (zmalloc_used_memory() > server.stat_peak_memory) @@ -2202,10 +2202,6 @@ void createSharedObjects(void) { void initServerConfig(void) { int j; - pthread_mutex_init(&server.next_client_id_mutex,NULL); - pthread_mutex_init(&server.lruclock_mutex,NULL); - pthread_mutex_init(&server.unixtime_mutex,NULL); - updateCachedTime(); getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE); server.runid[CONFIG_RUN_ID_SIZE] = '\0'; @@ -2319,8 +2315,7 @@ void initServerConfig(void) { server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; server.io_threads_num = CONFIG_DEFAULT_IO_THREADS_NUM; - unsigned int lruclock = getLRUClock(); - atomicSet(server.lruclock,lruclock); + server.lruclock = getLRUClock(); resetServerSaveParams(); appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */ @@ -2718,6 +2713,7 @@ void initServer(void) { server.slaves = listCreate(); server.monitors = listCreate(); server.clients_pending_write = listCreate(); + server.clients_pending_read = listCreate(); server.slaveseldb = -1; /* Force to emit the first SELECT command. */ server.unblocked_clients = listCreate(); server.ready_keys = listCreate(); @@ -3821,8 +3817,6 @@ sds genRedisInfoString(char *section) { call_uname = 0; } - unsigned int lruclock; - atomicGet(server.lruclock,lruclock); info = sdscatprintf(info, "# Server\r\n" "redis_version:%s\r\n" @@ -3866,7 +3860,7 @@ sds genRedisInfoString(char *section) { (intmax_t)(uptime/(3600*24)), server.hz, server.config_hz, - (unsigned long) lruclock, + (unsigned long) server.lruclock, server.executable ? server.executable : "", server.configfile ? server.configfile : ""); } diff --git a/src/server.h b/src/server.h index 2e4de2bb..dcfcb55f 100644 --- a/src/server.h +++ b/src/server.h @@ -285,6 +285,9 @@ typedef long long mstime_t; /* millisecond time type. */ #define CLIENT_LUA_DEBUG_SYNC (1<<26) /* EVAL debugging without fork() */ #define CLIENT_MODULE (1<<27) /* Non connected client used by some module. */ #define CLIENT_PROTECTED (1<<28) /* Client should not be freed for now. */ +#define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put + in the list of clients we can read + from. */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -1018,7 +1021,7 @@ struct redisServer { dict *commands; /* Command table */ dict *orig_commands; /* Command table before command renaming. */ aeEventLoop *el; - unsigned int lruclock; /* Clock for LRU eviction */ + _Atomic unsigned int lruclock; /* Clock for LRU eviction */ int shutdown_asap; /* SHUTDOWN needed ASAP */ int activerehashing; /* Incremental rehash in serverCron() */ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */ @@ -1052,6 +1055,7 @@ struct redisServer { list *clients; /* List of active clients */ list *clients_to_close; /* Clients to close asynchronously */ list *clients_pending_write; /* There is to write or install handler. */ + list *clients_pending_read; /* Client has pending read socket buffers. */ list *slaves, *monitors; /* List of slaves and MONITORs */ client *current_client; /* Current client, only used on crash report */ rax *clients_index; /* Active clients dictionary by client ID. */ @@ -1059,7 +1063,7 @@ struct redisServer { mstime_t clients_pause_end_time; /* Time when we undo clients_paused */ char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */ dict *migrate_cached_sockets;/* MIGRATE cached sockets */ - uint64_t next_client_id; /* Next client unique ID. Incremental. */ + _Atomic uint64_t next_client_id; /* Next client unique ID. Incremental. */ int protected_mode; /* Don't accept external connections. */ int gopher_enabled; /* If true the server will reply to gopher queries. Will still serve RESP2 queries. */ @@ -1104,8 +1108,8 @@ struct redisServer { long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */ struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */ - long long stat_net_input_bytes; /* Bytes read from network. */ - long long stat_net_output_bytes; /* Bytes written to network. */ + _Atomic long long stat_net_input_bytes; /* Bytes read from network. */ + _Atomic long long stat_net_output_bytes; /* Bytes written to network. */ size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */ size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */ /* The following two are used to track instantaneous metrics, like @@ -1128,7 +1132,7 @@ struct redisServer { int active_defrag_cycle_min; /* minimal effort for defrag in CPU percentage */ int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */ unsigned long active_defrag_max_scan_fields; /* maximum number of fields of set/hash/zset/list to process from within the main dict scan */ - size_t client_max_querybuf_len; /* Limit for client query buffer length */ + _Atomic size_t client_max_querybuf_len; /* Limit for client query buffer length */ int dbnum; /* Total number of configured DBs */ int supervised; /* 1 if supervised, 0 otherwise. */ int supervised_mode; /* See SUPERVISED_* */ @@ -1297,10 +1301,10 @@ struct redisServer { int list_max_ziplist_size; int list_compress_depth; /* time cache */ - time_t unixtime; /* Unix time sampled every cron cycle. */ - time_t timezone; /* Cached timezone. As set by tzset(). */ - int daylight_active; /* Currently in daylight saving time. */ - long long mstime; /* Like 'unixtime' but with milliseconds resolution. */ + _Atomic time_t unixtime; /* Unix time sampled every cron cycle. */ + time_t timezone; /* Cached timezone. As set by tzset(). */ + int daylight_active; /* Currently in daylight saving time. */ + long long mstime; /* 'unixtime' with milliseconds resolution. */ /* Pubsub */ dict *pubsub_channels; /* Map channels to list of subscribed clients */ list *pubsub_patterns; /* A list of pubsub_patterns */ @@ -1360,12 +1364,6 @@ struct redisServer { int watchdog_period; /* Software watchdog period in ms. 0 = off */ /* System hardware info */ size_t system_memory_size; /* Total memory in system as reported by OS */ - - /* Mutexes used to protect atomic variables when atomic builtins are - * not available. */ - pthread_mutex_t lruclock_mutex; - pthread_mutex_t next_client_id_mutex; - pthread_mutex_t unixtime_mutex; }; typedef struct pubsubPattern { From a2245f8ff146629159d8c52d60713a262fa1b69a Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 31 Mar 2019 15:58:54 +0200 Subject: [PATCH 072/304] Threaded IO: read side WIP 2. --- src/networking.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/networking.c b/src/networking.c index fd4e990f..7d847048 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2496,13 +2496,16 @@ int processEventsWhileBlocked(void) { int tio_debug = 0; -#define SERVER_MAX_IO_THREADS 32 +#define IO_THREADS_MAX_NUM 128 +#define IO_THREADS_OP_READ 0 +#define IO_THREADS_OP_WRITE 1 -pthread_t io_threads[SERVER_MAX_IO_THREADS]; -pthread_mutex_t io_threads_mutex[SERVER_MAX_IO_THREADS]; -_Atomic unsigned long io_threads_pending[SERVER_MAX_IO_THREADS]; -int io_threads_active; -list *io_threads_list[SERVER_MAX_IO_THREADS]; +pthread_t io_threads[IO_THREADS_MAX_NUM]; +pthread_mutex_t io_threads_mutex[IO_THREADS_MAX_NUM]; +_Atomic unsigned long io_threads_pending[IO_THREADS_MAX_NUM]; +int io_threads_active; /* Are the threads currently spinning waiting I/O? */ +int io_threads_op; /* IO_THREADS_OP_WRITE or IO_THREADS_OP_READ. */ +list *io_threads_list[IO_THREADS_MAX_NUM]; void *IOThreadMain(void *myid) { /* The ID is the thread number (from 0 to server.iothreads_num-1), and is @@ -2533,7 +2536,11 @@ void *IOThreadMain(void *myid) { listRewind(io_threads_list[id],&li); while((ln = listNext(&li))) { client *c = listNodeValue(ln); - writeToClient(c->fd,c,0); + if (io_threads_op == IO_THREADS_OP_WRITE) { + writeToClient(c->fd,c,0); + } else { + readQueryFromClient(NULL,c->fd,c,0); + } } listEmpty(io_threads_list[id]); io_threads_pending[id] = 0; @@ -2550,6 +2557,12 @@ void initThreadedIO(void) { * we'll handle I/O directly from the main thread. */ if (server.io_threads_num == 1) return; + if (server.io_threads_num > IO_THREADS_MAX_NUM) { + serverLog(LL_WARNING,"Fatal: too many I/O threads configured. " + "The maximum number is %d.", IO_THREADS_MAX_NUM); + exit(1); + } + /* Spawn the I/O threads. */ for (int i = 0; i < server.io_threads_num; i++) { pthread_t tid; @@ -2684,3 +2697,6 @@ int postponeClientRead(client *c) { return 0; } } + +int handleClientsWithPendingReadsUsingThreads(void) { +} From 63a0ffd36a99083b909e2110a7604fe335656a8d Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 31 Mar 2019 21:59:50 +0200 Subject: [PATCH 073/304] Threaded IO: read side WIP 3. --- src/networking.c | 59 +++++++++++++++++++++++++++++++++++++++++++----- src/server.c | 1 + src/server.h | 1 + 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/src/networking.c b/src/networking.c index 7d847048..3a36badb 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1711,12 +1711,12 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { return; } else { serverLog(LL_VERBOSE, "Reading from client: %s",strerror(errno)); - freeClient(c); + freeClientAsync(c); return; } } else if (nread == 0) { serverLog(LL_VERBOSE, "Client closed connection"); - freeClient(c); + freeClientAsync(c); return; } else if (c->flags & CLIENT_MASTER) { /* Append the query buffer to the pending (not applied) buffer @@ -1739,7 +1739,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { // serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); - freeClient(c); + freeClientAsync(c); return; } @@ -2538,8 +2538,10 @@ void *IOThreadMain(void *myid) { client *c = listNodeValue(ln); if (io_threads_op == IO_THREADS_OP_WRITE) { writeToClient(c->fd,c,0); - } else { + } else if (io_threads_op == IO_THREADS_OP_READ) { readQueryFromClient(NULL,c->fd,c,0); + } else { + serverPanic("io_threads_op value is unknown"); } } listEmpty(io_threads_list[id]); @@ -2632,7 +2634,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { /* Start threads if needed. */ if (!io_threads_active) startThreadedIO(); - if (tio_debug) printf("%d TOTAL pending clients\n", processed); + if (tio_debug) printf("%d TOTAL WRITE pending clients\n", processed); /* Distribute the clients across N different lists. */ listIter li; @@ -2649,6 +2651,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { /* Give the start condition to the waiting threads, by setting the * start condition atomic var. */ + io_threads_op = IO_THREADS_OP_WRITE; for (int j = 0; j < server.io_threads_num; j++) { int count = listLength(io_threads_list[j]); io_threads_pending[j] = count; @@ -2661,7 +2664,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { pending += io_threads_pending[j]; if (pending == 0) break; } - if (tio_debug) printf("All threads finshed\n"); + if (tio_debug) printf("I/O WRITE All threads finshed\n"); /* Run the list of clients again to install the write handler where * needed. */ @@ -2699,4 +2702,48 @@ int postponeClientRead(client *c) { } int handleClientsWithPendingReadsUsingThreads(void) { + if (!io_threads_active) return 0; + int processed = listLength(server.clients_pending_read); + if (processed == 0) return 0; + + if (tio_debug) printf("%d TOTAL READ pending clients\n", processed); + + /* Distribute the clients across N different lists. */ + listIter li; + listNode *ln; + listRewind(server.clients_pending_read,&li); + int item_id = 0; + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + int target_id = item_id % server.io_threads_num; + listAddNodeTail(io_threads_list[target_id],c); + item_id++; + } + + /* Give the start condition to the waiting threads, by setting the + * start condition atomic var. */ + io_threads_op = IO_THREADS_OP_READ; + for (int j = 0; j < server.io_threads_num; j++) { + int count = listLength(io_threads_list[j]); + io_threads_pending[j] = count; + } + + /* Wait for all threads to end their work. */ + while(1) { + unsigned long pending = 0; + for (int j = 0; j < server.io_threads_num; j++) + pending += io_threads_pending[j]; + if (pending == 0) break; + } + if (tio_debug) printf("I/O READ All threads finshed\n"); + + /* Run the list of clients again to process the new buffers. */ + listRewind(server.clients_pending_read,&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + c->flags &= ~CLIENT_PENDING_READ; + processInputBufferAndReplicate(c); + } + listEmpty(server.clients_pending_read); + return processed; } diff --git a/src/server.c b/src/server.c index ef6b85c4..e0c48b09 100644 --- a/src/server.c +++ b/src/server.c @@ -2092,6 +2092,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) { void afterSleep(struct aeEventLoop *eventLoop) { UNUSED(eventLoop); if (moduleCount()) moduleAcquireGIL(); + handleClientsWithPendingReadsUsingThreads(); } /* =========================== Server initialization ======================== */ diff --git a/src/server.h b/src/server.h index dcfcb55f..0d788241 100644 --- a/src/server.h +++ b/src/server.h @@ -1578,6 +1578,7 @@ int clientsArePaused(void); int processEventsWhileBlocked(void); int handleClientsWithPendingWrites(void); int handleClientsWithPendingWritesUsingThreads(void); +int handleClientsWithPendingReadsUsingThreads(void); int stopThreadedIOIfNeeded(void); int clientHasPendingReplies(client *c); void unlinkClient(client *c); From 8d7d2be24fb74234603667e8da4de2d2f466aff1 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 31 Mar 2019 22:06:00 +0200 Subject: [PATCH 074/304] Threaded IO: process read queue before stopping threads. --- src/networking.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 3a36badb..29a56e98 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2590,8 +2590,13 @@ void startThreadedIO(void) { } void stopThreadedIO(void) { + /* We may have still clients with pending reads when this function + * is called: handle them before stopping the threads. */ + handleClientsWithPendingReadsUsingThreads(); if (tio_debug) printf("E"); fflush(stdout); - if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); + if (tio_debug) printf("--- STOPPING THREADED IO [R%d] [W%d] ---\n", + (int) listLength(server.clients_pending_read), + (int) listLength(server.clients_pending_write)); serverAssert(io_threads_active == 1); for (int j = 0; j < server.io_threads_num; j++) pthread_mutex_lock(&io_threads_mutex[j]); From 463ccf86642ae35e18cf0c84be4e8e9e7c905c70 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 13:12:10 +0200 Subject: [PATCH 075/304] Threaded IO: logging should be safe in I/O threads. Potentially it is possible that we get interleaved writes, even if serverLog() makes sure to write into a buffer and then use printf(), so even this should be ok. However in general POSIX guarantees that writing to the same file pointer object from multiple threads is safe. Anyway currently we *reopen* the file at each call, but for the standard output logging. The logging functions actually also access global configuration while performing the log (for instance in order to check the log level, the log filename and so forth), however dunring the I/O threads execution we cannot alter such shared state in any way. --- src/networking.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index 29a56e98..0e11e1f3 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1174,14 +1174,13 @@ int writeToClient(int fd, client *c, int handler_installed) { zmalloc_used_memory() < server.maxmemory) && !(c->flags & CLIENT_SLAVE)) break; } - /* FIXME: Fixme, use atomic var for this. */ server.stat_net_output_bytes += totwritten; if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; } else { - // serverLog(LL_VERBOSE, - // "Error writing to client: %s", strerror(errno)); + serverLog(LL_VERBOSE, + "Error writing to client: %s", strerror(errno)); freeClientAsync(c); return C_ERR; } From 647a66ebba5d12d461e830f174a1c90a4e96c5cd Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 12 Apr 2019 17:18:10 +0200 Subject: [PATCH 076/304] Threaded IO: parsing WIP 1: set current_client in a better scoped way. --- src/networking.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/networking.c b/src/networking.c index 0e11e1f3..3faaf4a1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1563,7 +1563,7 @@ int processMultibulkBuffer(client *c) { * or because a client was blocked and later reactivated, so there could be * pending query buffer, already representing a full command, to process. */ void processInputBuffer(client *c) { - server.current_client = c; + int deadclient = 0; /* Keep processing while there is something in the input buffer */ while(c->qb_pos < sdslen(c->querybuf)) { @@ -1619,6 +1619,7 @@ void processInputBuffer(client *c) { resetClient(c); } else { /* Only reset the client when the command was executed. */ + server.current_client = c; if (processCommand(c) == C_OK) { if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { /* Update the applied replication offset of our master. */ @@ -1629,23 +1630,26 @@ void processInputBuffer(client *c) { * module blocking command, so that the reply callback will * still be able to access the client argv and argc field. * The client will be reset in unblockClientFromModule(). */ - if (!(c->flags & CLIENT_BLOCKED) || c->btype != BLOCKED_MODULE) + if (!(c->flags & CLIENT_BLOCKED) || + c->btype != BLOCKED_MODULE) + { resetClient(c); + } } + if (server.current_client == NULL) deadclient = 1; + server.current_client = NULL; /* freeMemoryIfNeeded may flush slave output buffers. This may * result into a slave, that may be the active client, to be * freed. */ - if (server.current_client == NULL) break; + if (deadclient) break; } } /* Trim to pos */ - if (server.current_client != NULL && c->qb_pos) { + if (!deadclient && c->qb_pos) { sdsrange(c->querybuf,c->qb_pos,-1); c->qb_pos = 0; } - - server.current_client = NULL; } /* This is a wrapper for processInputBuffer that also cares about handling @@ -1743,11 +1747,8 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { } /* There is more data in the client input buffer, continue parsing it - * in case to check if there is a full command to execute. - * Don't do it if the client is flagged as CLIENT_PENDING_READ: it means - * we are currently in the context of an I/O thread. */ - if (!(c->flags & CLIENT_PENDING_READ)) - processInputBufferAndReplicate(c); + * in case to check if there is a full command to execute. */ + processInputBufferAndReplicate(c); } void getClientsMaxBuffers(unsigned long *longest_output_list, From 6ab6a97fe6991d1496a3c8efa52280db3a3df3eb Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 26 Apr 2019 19:29:50 +0200 Subject: [PATCH 077/304] Threaded IO: parsing WIP 2: refactoring to parse from thread. --- src/networking.c | 89 ++++++++++++++++++++++++++++++++---------------- src/server.h | 1 + 2 files changed, 61 insertions(+), 29 deletions(-) diff --git a/src/networking.c b/src/networking.c index 3faaf4a1..4361ab1a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1558,13 +1558,47 @@ int processMultibulkBuffer(client *c) { return C_ERR; } +/* This function calls processCommand(), but also performs a few sub tasks + * that are useful in that context: + * + * 1. It sets the current client to the client 'c'. + * 2. In the case of master clients, the replication offset is updated. + * 3. The client is reset unless there are reasons to avoid doing it. + * + * The function returns C_ERR in case the client was freed as a side effect + * of processing the command, otherwise C_OK is returned. */ +int processCommandAndResetClient(client *c) { + int deadclient = 0; + server.current_client = c; + if (processCommand(c) == C_OK) { + if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { + /* Update the applied replication offset of our master. */ + c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; + } + + /* Don't reset the client structure for clients blocked in a + * module blocking command, so that the reply callback will + * still be able to access the client argv and argc field. + * The client will be reset in unblockClientFromModule(). */ + if (!(c->flags & CLIENT_BLOCKED) || + c->btype != BLOCKED_MODULE) + { + resetClient(c); + } + } + if (server.current_client == NULL) deadclient = 1; + server.current_client = NULL; + /* freeMemoryIfNeeded may flush slave output buffers. This may + * result into a slave, that may be the active client, to be + * freed. */ + return deadclient ? C_ERR : C_OK; +} + /* This function is called every time, in the client structure 'c', there is * more query buffer to process, because we read more data from the socket * or because a client was blocked and later reactivated, so there could be * pending query buffer, already representing a full command, to process. */ void processInputBuffer(client *c) { - int deadclient = 0; - /* Keep processing while there is something in the input buffer */ while(c->qb_pos < sdslen(c->querybuf)) { /* Return if clients are paused. */ @@ -1573,6 +1607,10 @@ void processInputBuffer(client *c) { /* Immediately abort if the client is in the middle of something. */ if (c->flags & CLIENT_BLOCKED) break; + /* Don't process more buffers from clients that have already pending + * commands to execute in c->argv. */ + if (c->flags & CLIENT_PENDING_COMMAND) break; + /* Don't process input from the master while there is a busy script * condition on the slave. We want just to accumulate the replication * stream (instead of replying -BUSY like we do with other clients) and @@ -1618,35 +1656,26 @@ void processInputBuffer(client *c) { if (c->argc == 0) { resetClient(c); } else { - /* Only reset the client when the command was executed. */ - server.current_client = c; - if (processCommand(c) == C_OK) { - if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { - /* Update the applied replication offset of our master. */ - c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; - } - - /* Don't reset the client structure for clients blocked in a - * module blocking command, so that the reply callback will - * still be able to access the client argv and argc field. - * The client will be reset in unblockClientFromModule(). */ - if (!(c->flags & CLIENT_BLOCKED) || - c->btype != BLOCKED_MODULE) - { - resetClient(c); - } + /* If we are in the context of an I/O thread, we can't really + * execute the command here. All we can do is to flag the client + * as one that needs to process the command. */ + if (c->flags & CLIENT_PENDING_READ) { + c->flags |= CLIENT_PENDING_COMMAND; + break; + } + + /* We are finally ready to execute the command. */ + if (processCommandAndResetClient(c) == C_ERR) { + /* If the client is no longer valid, we avoid exiting this + * loop and trimming the client buffer later. So we return + * ASAP in that case. */ + return; } - if (server.current_client == NULL) deadclient = 1; - server.current_client = NULL; - /* freeMemoryIfNeeded may flush slave output buffers. This may - * result into a slave, that may be the active client, to be - * freed. */ - if (deadclient) break; } } /* Trim to pos */ - if (!deadclient && c->qb_pos) { + if (c->qb_pos) { sdsrange(c->querybuf,c->qb_pos,-1); c->qb_pos = 0; } @@ -1737,9 +1766,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty(); bytes = sdscatrepr(bytes,c->querybuf,64); -// FIXME: This may be called from an I/O thread and it is not safe to -// log from there for now. -// serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); + serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); freeClientAsync(c); @@ -2747,6 +2774,10 @@ int handleClientsWithPendingReadsUsingThreads(void) { while((ln = listNext(&li))) { client *c = listNodeValue(ln); c->flags &= ~CLIENT_PENDING_READ; + if (c->flags & CLIENT_PENDING_COMMAND) { + c->flags &= ~ CLIENT_PENDING_COMMAND; + processCommandAndResetClient(c); + } processInputBufferAndReplicate(c); } listEmpty(server.clients_pending_read); diff --git a/src/server.h b/src/server.h index 0d788241..c088d356 100644 --- a/src/server.h +++ b/src/server.h @@ -288,6 +288,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put in the list of clients we can read from. */ +#define CLIENT_PENDING_COMMAND (1<<30) /* */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ From 90d720e7a5777ec34c93258d97592d8c6b439988 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 29 Apr 2019 12:46:23 +0200 Subject: [PATCH 078/304] Threaded IO: put fflush() inside tio_debug conditional. --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 4361ab1a..74bd0f13 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2608,7 +2608,7 @@ void initThreadedIO(void) { } void startThreadedIO(void) { - if (tio_debug) printf("S"); fflush(stdout); + if (tio_debug) { printf("S"); fflush(stdout); } if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(io_threads_active == 0); for (int j = 0; j < server.io_threads_num; j++) @@ -2620,7 +2620,7 @@ void stopThreadedIO(void) { /* We may have still clients with pending reads when this function * is called: handle them before stopping the threads. */ handleClientsWithPendingReadsUsingThreads(); - if (tio_debug) printf("E"); fflush(stdout); + if (tio_debug) { printf("E"); fflush(stdout); } if (tio_debug) printf("--- STOPPING THREADED IO [R%d] [W%d] ---\n", (int) listLength(server.clients_pending_read), (int) listLength(server.clients_pending_write)); From 1c0c436757f278565b400c36e763531d073ef4bb Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 30 Apr 2019 15:39:27 +0200 Subject: [PATCH 079/304] Threaded IO: ability to disable reads from threaded path. --- src/networking.c | 3 ++- src/server.c | 1 + src/server.h | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 74bd0f13..651dbdb8 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2723,6 +2723,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { * pending read clients and flagged as such. */ int postponeClientRead(client *c) { if (io_threads_active && + server.io_threads_do_reads && !(c->flags & (CLIENT_MASTER|CLIENT_SLAVE|CLIENT_PENDING_READ))) { c->flags |= CLIENT_PENDING_READ; @@ -2734,7 +2735,7 @@ int postponeClientRead(client *c) { } int handleClientsWithPendingReadsUsingThreads(void) { - if (!io_threads_active) return 0; + if (!io_threads_active || !server.io_threads_do_reads) return 0; int processed = listLength(server.clients_pending_read); if (processed == 0) return 0; diff --git a/src/server.c b/src/server.c index e0c48b09..2643d726 100644 --- a/src/server.c +++ b/src/server.c @@ -2315,6 +2315,7 @@ void initServerConfig(void) { server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO; server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; server.io_threads_num = CONFIG_DEFAULT_IO_THREADS_NUM; + server.io_threads_do_reads = CONFIG_DEFAULT_IO_THREADS_DO_READS; server.lruclock = getLRUClock(); resetServerSaveParams(); diff --git a/src/server.h b/src/server.h index c088d356..3987ab5f 100644 --- a/src/server.h +++ b/src/server.h @@ -88,6 +88,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */ #define CONFIG_DEFAULT_DBNUM 16 #define CONFIG_DEFAULT_IO_THREADS_NUM 1 /* Single threaded by default */ +#define CONFIG_DEFAULT_IO_THREADS_DO_READS 0 /* Read + parse from threads? */ #define CONFIG_MAX_LINE 1024 #define CRON_DBS_PER_CALL 16 #define NET_MAX_WRITES_PER_EVENT (1024*64) @@ -1069,6 +1070,7 @@ struct redisServer { int gopher_enabled; /* If true the server will reply to gopher queries. Will still serve RESP2 queries. */ int io_threads_num; /* Number of IO threads to use. */ + int io_threads_do_reads; /* Read and parse from IO threads? */ /* RDB / AOF loading information */ int loading; /* We are loading data from disk if true */ From 5baeb14cf3ca8eb345a7a7352bf482542168728e Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 30 Apr 2019 15:55:02 +0200 Subject: [PATCH 080/304] Threaded IO: configuration directive for turning on/off reads. --- src/config.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/config.c b/src/config.c index c4a18f3b..1686743a 100644 --- a/src/config.c +++ b/src/config.c @@ -318,6 +318,10 @@ void loadServerConfigFromString(char *config) { if (server.io_threads_num < 1 || server.io_threads_num > 512) { err = "Invalid number of I/O threads"; goto loaderr; } + } else if (!strcasecmp(argv[0],"io-threads-do-reads") && argc == 2) { + if ((server.io_threads_do_reads = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"include") && argc == 2) { loadServerConfig(argv[1],NULL); } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) { @@ -1485,6 +1489,7 @@ void configGetCommand(client *c) { config_get_bool_field("activedefrag", server.active_defrag_enabled); config_get_bool_field("protected-mode", server.protected_mode); config_get_bool_field("gopher-enabled", server.gopher_enabled); + config_get_bool_field("io-threads-do-reads", server.io_threads_do_reads); config_get_bool_field("repl-disable-tcp-nodelay", server.repl_disable_tcp_nodelay); config_get_bool_field("repl-diskless-sync", @@ -2316,6 +2321,7 @@ int rewriteConfig(char *path) { rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG); rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE); rewriteConfigYesNoOption(state,"gopher-enabled",server.gopher_enabled,CONFIG_DEFAULT_GOPHER_ENABLED); + rewriteConfigYesNoOption(state,"io-threads-do-reads",server.io_threads_do_reads,CONFIG_DEFAULT_IO_THREADS_DO_READS); rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.config_hz,CONFIG_DEFAULT_HZ); rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC); From 3d053dbb6dea51d59709913e9c0e9f96cc1d24f8 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 30 Apr 2019 15:59:23 +0200 Subject: [PATCH 081/304] Threaded IO: handleClientsWithPendingReadsUsingThreads top comment. --- src/networking.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/networking.c b/src/networking.c index 651dbdb8..6fec9760 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2734,6 +2734,12 @@ int postponeClientRead(client *c) { } } +/* When threaded I/O is also enabled for the reading + parsing side, the + * readable handler will just put normal clients into a queue of clients to + * process (instead of serving them synchronously). This function runs + * the queue using the I/O threads, and process them in order to accumulate + * the reads in the buffers, and also parse the first command available + * rendering it in the client structures. */ int handleClientsWithPendingReadsUsingThreads(void) { if (!io_threads_active || !server.io_threads_do_reads) return 0; int processed = listLength(server.clients_pending_read); From 340a723b87eff9df140bfb1de239ea65e318fee1 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Tue, 7 May 2019 13:35:27 +0800 Subject: [PATCH 082/304] Makefile: 1TD -> STD --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 1c80e547..f35685ef 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,7 +20,7 @@ DEPENDENCY_TARGETS=hiredis linenoise lua NODEPS:=clean distclean # Default settings -1TD=-std=c11 -pedantic -DREDIS_STATIC='' +STD=-std=c11 -pedantic -DREDIS_STATIC='' ifneq (,$(findstring clang,$(CC))) ifneq (,$(findstring FreeBSD,$(uname_S))) STD+=-Wno-c11-extensions From d9d3d3065ba3f6fc941fc6886da8392193d0cb41 Mon Sep 17 00:00:00 2001 From: stan011 Date: Tue, 7 May 2019 14:22:40 +0800 Subject: [PATCH 083/304] change the comments there may have a mis type --- src/t_list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_list.c b/src/t_list.c index 45d2e331..54e4959b 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -617,7 +617,7 @@ void rpoplpushCommand(client *c) { * the AOF and replication channel. * * The argument 'where' is LIST_TAIL or LIST_HEAD, and indicates if the - * 'value' element was popped fron the head (BLPOP) or tail (BRPOP) so that + * 'value' element was popped from the head (BLPOP) or tail (BRPOP) so that * we can propagate the command properly. * * The function returns C_OK if we are able to serve the client, otherwise From 48d591a010bbe4cf0c09a4d84a55ee3f31e5664f Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Tue, 7 May 2019 15:59:16 +0800 Subject: [PATCH 084/304] fix memory leak when rewrite config file --- src/config.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/config.c b/src/config.c index 1686743a..7f0e9af8 100644 --- a/src/config.c +++ b/src/config.c @@ -1711,12 +1711,11 @@ void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char * * If the old file does not exist at all, an empty state is returned. */ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) { FILE *fp = fopen(path,"r"); - struct rewriteConfigState *state = zmalloc(sizeof(*state)); - char buf[CONFIG_MAX_LINE+1]; - int linenum = -1; - if (fp == NULL && errno != ENOENT) return NULL; + char buf[CONFIG_MAX_LINE+1]; + int linenum = -1; + struct rewriteConfigState *state = zmalloc(sizeof(*state)); state->option_to_line = dictCreate(&optionToLineDictType,NULL); state->rewritten = dictCreate(&optionSetDictType,NULL); state->numlines = 0; From 842dd85b264f7d77a12273f8b2e7700ce99dd610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=94=90=E6=9D=83?= Date: Wed, 8 May 2019 12:53:56 +0800 Subject: [PATCH 085/304] Update ziplist.c Hi, @antirez In the code, to get the size of ziplist, "unsigned int bytes = ZIPLIST_HEADER_SIZE+1;" is correct, but why not make it more readable and easy to understand --- src/ziplist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ziplist.c b/src/ziplist.c index 1579d110..ef40d6aa 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -576,7 +576,7 @@ void zipEntry(unsigned char *p, zlentry *e) { /* Create a new empty ziplist. */ unsigned char *ziplistNew(void) { - unsigned int bytes = ZIPLIST_HEADER_SIZE+1; + unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE; unsigned char *zl = zmalloc(bytes); ZIPLIST_BYTES(zl) = intrev32ifbe(bytes); ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE); From fb4ee7f0c5ad81cae88a10587d5246a1b4f4dd84 Mon Sep 17 00:00:00 2001 From: yongman Date: Wed, 8 May 2019 16:13:42 +0800 Subject: [PATCH 086/304] Fix uint64_t hash value in active defrag --- src/defrag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/defrag.c b/src/defrag.c index d67b6e25..ecf0255d 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -47,7 +47,7 @@ int je_get_defrag_hint(void* ptr, int *bin_util, int *run_util); /* forward declarations*/ void defragDictBucketCallback(void *privdata, dictEntry **bucketref); -dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, long *defragged); +dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, uint64_t hash, long *defragged); /* Defrag helper for generic allocations. * @@ -355,7 +355,7 @@ long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) { sdsele = ln->value; if ((newsds = activeDefragSds(sdsele))) { /* When defragging an sds value, we need to update the dict key */ - unsigned int hash = dictGetHash(d, sdsele); + uint64_t hash = dictGetHash(d, sdsele); replaceSateliteDictKeyPtrAndOrDefragDictEntry(d, sdsele, newsds, hash, &defragged); ln->value = newsds; defragged++; @@ -392,7 +392,7 @@ long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) { * moved. Return value is the the dictEntry if found, or NULL if not found. * NOTE: this is very ugly code, but it let's us avoid the complication of * doing a scan on another dict. */ -dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, long *defragged) { +dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, uint64_t hash, long *defragged) { dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash); if (deref) { dictEntry *de = *deref; From bea09a7fa6c2d332d6b298fb7a91cc04099faf47 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Wed, 8 May 2019 11:36:31 +0100 Subject: [PATCH 087/304] Add include to deps/hiredis/read.c to fix Implicit Declaration of strcasecmp warning --- deps/hiredis/read.c | 1 + 1 file changed, 1 insertion(+) diff --git a/deps/hiredis/read.c b/deps/hiredis/read.c index c75c3435..cc0f3cc7 100644 --- a/deps/hiredis/read.c +++ b/deps/hiredis/read.c @@ -31,6 +31,7 @@ #include "fmacros.h" #include +#include #include #ifndef _MSC_VER #include From a1fb0be1d7c1228c7b1c076426cbdf30f9489077 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Wed, 8 May 2019 12:13:45 +0100 Subject: [PATCH 088/304] Enlarge error buffer in redis-check-aof.c to remove compiler warning of output truncation through snprintf format string --- src/redis-check-aof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index 54ed85f0..eedb09db 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -37,7 +37,7 @@ snprintf(error, sizeof(error), "0x%16llx: %s", (long long)epos, __buf); \ } -static char error[1024]; +static char error[1044]; static off_t epos; int consumeNewline(char *buf) { From c64aec9ce7ead321f7b29f6a2a29649b36d4a4a0 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Fri, 10 May 2019 16:27:25 +0800 Subject: [PATCH 089/304] test cases: skiptill -> skip-till --- tests/test_helper.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 568eacde..1442067f 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -503,7 +503,7 @@ for {set j 0} {$j < [llength $argv]} {incr j} { } elseif {$opt eq {--only}} { lappend ::only_tests $arg incr j - } elseif {$opt eq {--skiptill}} { + } elseif {$opt eq {--skip-till}} { set ::skip_till $arg incr j } elseif {$opt eq {--list-tests}} { From 4f4676a1420a446e4233c04a80f2009aa819b21f Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 13 May 2019 17:27:06 +0200 Subject: [PATCH 090/304] Fix test false positive introduced by threaded I/O. Now clients that are ready to be terminated asynchronously are processed more often in beforeSleep() instead of being processed in serverCron(). This means that the test will not be able to catch the moment the client was terminated, also note that the 'omem' figure now changes in big steps, because of the new client output buffers layout. So we have to change the test range in order to accomodate for that. Yet the test is useful enough to be worth taking, even if its precision is reduced by this commit. Probably if we get more problems, a thing that makes sense is just to check that the limit is < 200k. That's more than enough actually. --- tests/unit/obuf-limits.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/obuf-limits.tcl b/tests/unit/obuf-limits.tcl index 5d625cf4..c45bf8e8 100644 --- a/tests/unit/obuf-limits.tcl +++ b/tests/unit/obuf-limits.tcl @@ -15,7 +15,7 @@ start_server {tags {"obuf-limits"}} { if {![regexp {omem=([0-9]+)} $c - omem]} break if {$omem > 200000} break } - assert {$omem >= 90000 && $omem < 200000} + assert {$omem >= 70000 && $omem < 200000} $rd1 close } From 9724ca4e22678038c511d312a78df6e8d9d29374 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 13 May 2019 17:30:02 +0200 Subject: [PATCH 091/304] Make comment in getClientOutputBufferMemoryUsage() describing the present. --- src/networking.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/networking.c b/src/networking.c index 6fec9760..4bc22120 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2295,15 +2295,8 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) { } } -/* This function returns the number of bytes that Redis is virtually +/* This function returns the number of bytes that Redis is * using to store the reply still not read by the client. - * It is "virtual" since the reply output list may contain objects that - * are shared and are not really using additional memory. - * - * The function returns the total sum of the length of all the objects - * stored in the output list, plus the memory used to allocate every - * list node. The static reply buffer is not taken into account since it - * is allocated anyway. * * Note: this function is very fast so can be called as many time as * the caller wishes. The main usage of this function currently is From 1a1ba483900005dab36e6749b112735593561f30 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 14 May 2019 16:54:59 +0200 Subject: [PATCH 092/304] Test: fix slowlog test false positive. In fast systems "SLOWLOG RESET" is fast enough to don't be logged even when the time limit is "1" sometimes. Leading to false positives such as: [err]: SLOWLOG - can be disabled in tests/unit/slowlog.tcl Expected '1' to be equal to '0' --- tests/unit/slowlog.tcl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/slowlog.tcl b/tests/unit/slowlog.tcl index dbd7a154..22f08810 100644 --- a/tests/unit/slowlog.tcl +++ b/tests/unit/slowlog.tcl @@ -80,9 +80,11 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} { } test {SLOWLOG - can be disabled} { + r config set slowlog-max-len 1 r config set slowlog-log-slower-than 1 r slowlog reset - assert_equal [r slowlog len] 1 + r debug sleep 0.2 + assert_equal [r slowlog len] 1 r config set slowlog-log-slower-than -1 r slowlog reset r debug sleep 0.2 From 074d24df1e82b5c794a5da9fb6cda5b77b60b27b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 May 2019 12:16:43 +0200 Subject: [PATCH 093/304] Narrow the effects of PR #6029 to the exact state. CLIENT PAUSE may be used, in other contexts, for a long time making all the slaves time out. Better for now to be more specific about what should disable senidng PINGs. An alternative to that would be to virtually refresh the slave interactions when clients are paused, however for now I went for this more conservative solution. --- src/replication.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/replication.c b/src/replication.c index bfe50c92..63a67a06 100644 --- a/src/replication.c +++ b/src/replication.c @@ -30,6 +30,7 @@ #include "server.h" +#include "cluster.h" #include #include @@ -2601,12 +2602,23 @@ void replicationCron(void) { /* First, send PING according to ping_slave_period. */ if ((replication_cron_loops % server.repl_ping_slave_period) == 0 && - listLength(server.slaves) && !clientsArePaused()) + listLength(server.slaves)) { - ping_argv[0] = createStringObject("PING",4); - replicationFeedSlaves(server.slaves, server.slaveseldb, - ping_argv, 1); - decrRefCount(ping_argv[0]); + /* Note that we don't send the PING if the clients are paused during + * a Redis Cluster manual failover: the PING we send will otherwise + * alter the replication offsets of master and slave, and will no longer + * match the one stored into 'mf_master_offset' state. */ + int manual_failover_in_progress = + server.cluster_enabled && + server.cluster->mf_end && + clientsArePaused(); + + if (!manual_failover_in_progress) { + ping_argv[0] = createStringObject("PING",4); + replicationFeedSlaves(server.slaves, server.slaveseldb, + ping_argv, 1); + decrRefCount(ping_argv[0]); + } } /* Second, send a newline to all the slaves in pre-synchronization From fd9407dfc8dcf61628a060c6af7f92749169a5d3 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 May 2019 12:46:01 +0200 Subject: [PATCH 094/304] Update CONTRIBUTING with present info. --- CONTRIBUTING | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING b/CONTRIBUTING index 7dee24c7..5fb038e4 100644 --- a/CONTRIBUTING +++ b/CONTRIBUTING @@ -14,9 +14,7 @@ each source file that you contribute. PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected bugs in the Github issues system. We'll be very happy to help you and provide - all the support at the Reddit sub: - - http://reddit.com/r/redis + all the support in the mainling list. There is also an active community of Redis users at Stack Overflow: @@ -24,7 +22,12 @@ each source file that you contribute. # How to provide a patch for a new feature -1. If it is a major feature or a semantical change, please post it as a new submission in r/redis on Reddit at http://reddit.com/r/redis. Try to be passionate about why the feature is needed, make users upvote your proposal to gain traction and so forth. Read feedbacks about the community. But in this first step **please don't write code yet**. +1. If it is a major feature or a semantical change, please don't start coding +straight away: if your feature is not a conceptual fit you'll lose a lot of +time writing the code without any reason. Start by posting in the mailing list +and creating an issue at Github with the description of, excatly, what you want +to accomplish and why. Use cases are important for features to be accepted. +Here you'll see if there is consensus about your idea. 2. If in step 1 you get an acknowledgment from the project leaders, use the following procedure to submit a patch: @@ -35,6 +38,13 @@ each source file that you contribute. d. Initiate a pull request on github ( https://help.github.com/articles/creating-a-pull-request/ ) e. Done :) -For minor fixes just open a pull request on Github. +3. Keep in mind that we are very overloaded, so issues and PRs sometimes wait +for a *very* long time. However this is not lack of interest, as the project +gets more and more users, we find ourselves in a constant need to prioritize +certain issues/PRs over others. If you think your issue/PR is very important +try to popularize it, have other users commenting and sharing their point of +view and so forth. This helps. + +4. For minor fixes just open a pull request on Github. Thanks! From 7fababd44e8b8b2df53cad0906ec5ab7c678dd44 Mon Sep 17 00:00:00 2001 From: Christian Zeller Date: Wed, 15 May 2019 16:10:48 +0200 Subject: [PATCH 095/304] Typo fixes in CONTRIBUTING --- CONTRIBUTING | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING b/CONTRIBUTING index 5fb038e4..000edbea 100644 --- a/CONTRIBUTING +++ b/CONTRIBUTING @@ -14,7 +14,7 @@ each source file that you contribute. PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected bugs in the Github issues system. We'll be very happy to help you and provide - all the support in the mainling list. + all the support in the mailing list. There is also an active community of Redis users at Stack Overflow: @@ -25,7 +25,7 @@ each source file that you contribute. 1. If it is a major feature or a semantical change, please don't start coding straight away: if your feature is not a conceptual fit you'll lose a lot of time writing the code without any reason. Start by posting in the mailing list -and creating an issue at Github with the description of, excatly, what you want +and creating an issue at Github with the description of, exactly, what you want to accomplish and why. Use cases are important for features to be accepted. Here you'll see if there is consensus about your idea. From bf963253ecfd367b49081a26c1b5c410558aecfc Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Wed, 22 May 2019 16:39:04 +0100 Subject: [PATCH 096/304] Implement `SCAN cursor [TYPE type]` modifier suggested in issue #6107. Add tests to check basic functionality of this optional keyword, and also tested with a module (redisgraph). Checked quickly with valgrind, no issues. Copies name the type name canonicalisation code from `typeCommand`, perhaps this would be better factored out to prevent the two diverging and both needing to be edited to add new `OBJ_*` types, but this is a little fiddly with C strings. The [redis-doc](https://github.com/antirez/redis-doc/blob/master/commands.json) repo will need to be updated with this new arg if accepted. A quirk to be aware of here is that the GEO commands are backed by zsets not their own type, so they're not distinguishable from other zsets. Additionally, for sparse types this has the same behaviour as `MATCH` in that it may return many empty results before giving something, even for large `COUNT`s. --- src/db.c | 32 +++++++++++++++++++++++++++++++- tests/unit/scan.tcl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index b537a29a..6623f7f2 100644 --- a/src/db.c +++ b/src/db.c @@ -613,7 +613,7 @@ int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor) { } /* This command implements SCAN, HSCAN and SSCAN commands. - * If object 'o' is passed, then it must be a Hash or Set object, otherwise + * If object 'o' is passed, then it must be a Hash, Set or Zset object, otherwise * if 'o' is NULL the command will operate on the dictionary associated with * the current database. * @@ -629,6 +629,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { listNode *node, *nextnode; long count = 10; sds pat = NULL; + sds typename = NULL; int patlen = 0, use_pattern = 0; dict *ht; @@ -665,6 +666,10 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { use_pattern = !(pat[0] == '*' && patlen == 1); i += 2; + } else if (!strcasecmp(c->argv[i]->ptr, "type") && o == NULL && j >= 2) { + /* SCAN for a particular type only applies to the db dict */ + typename = c->argv[i+1]->ptr; + i+= 2; } else { addReply(c,shared.syntaxerr); goto cleanup; @@ -759,6 +764,31 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { } } + /* Filter an element if it isn't the type we want. */ + if (!filter && o == NULL && typename){ + robj* typecheck; + char *type; + typecheck = lookupKeyReadWithFlags(c->db, kobj, LOOKUP_NOTOUCH); + if (typecheck == NULL) { + type = "none"; + } else { + switch(typecheck->type) { + case OBJ_STRING: type = "string"; break; + case OBJ_LIST: type = "list"; break; + case OBJ_SET: type = "set"; break; + case OBJ_ZSET: type = "zset"; break; + case OBJ_HASH: type = "hash"; break; + case OBJ_STREAM: type = "stream"; break; + case OBJ_MODULE: { + moduleValue *mv = typecheck->ptr; + type = mv->type->name; + }; break; + default: type = "unknown"; break; + } + } + if (strcasecmp((char*) typename, type)) filter = 1; + } + /* Filter element if it is an expired key. */ if (!filter && o == NULL && expireIfNeeded(c->db, kobj)) filter = 1; diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl index c0f4349d..9f9ff4df 100644 --- a/tests/unit/scan.tcl +++ b/tests/unit/scan.tcl @@ -53,6 +53,51 @@ start_server {tags {"scan"}} { assert_equal 100 [llength $keys] } + test "SCAN TYPE" { + r flushdb + # populate only creates strings + r debug populate 1000 + + # Check non-strings are excluded + set cur 0 + set keys {} + while 1 { + set res [r scan $cur type "list"] + set cur [lindex $res 0] + set k [lindex $res 1] + lappend keys {*}$k + if {$cur == 0} break + } + + assert_equal 0 [llength $keys] + + # Check strings are included + set cur 0 + set keys {} + while 1 { + set res [r scan $cur type "string"] + set cur [lindex $res 0] + set k [lindex $res 1] + lappend keys {*}$k + if {$cur == 0} break + } + + assert_equal 1000 [llength $keys] + + # Check all three args work together + set cur 0 + set keys {} + while 1 { + set res [r scan $cur type "string" match "key:*" count 10] + set cur [lindex $res 0] + set k [lindex $res 1] + lappend keys {*}$k + if {$cur == 0} break + } + + assert_equal 1000 [llength $keys] + } + foreach enc {intset hashtable} { test "SSCAN with encoding $enc" { # Create the Set From 2fec7d9c6c630db3bcb13a07a08c39404abad447 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 30 May 2019 11:51:58 +0300 Subject: [PATCH 097/304] Jemalloc: Avoid blocking on background thread lock for stats. Background threads may run for a long time, especially when the # of dirty pages is high. Avoid blocking stats calls because of this (which may cause latency spikes). see https://github.com/jemalloc/jemalloc/issues/1502 cherry picked from commit 1a71533511027dbe3f9d989659efeec446915d6b --- deps/jemalloc/src/background_thread.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/deps/jemalloc/src/background_thread.c b/deps/jemalloc/src/background_thread.c index 3517a3bb..457669c9 100644 --- a/deps/jemalloc/src/background_thread.c +++ b/deps/jemalloc/src/background_thread.c @@ -787,7 +787,13 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { nstime_init(&stats->run_interval, 0); for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; - malloc_mutex_lock(tsdn, &info->mtx); + if (malloc_mutex_trylock(tsdn, &info->mtx)) { + /* + * Each background thread run may take a long time; + * avoid waiting on the stats if the thread is active. + */ + continue; + } if (info->state != background_thread_stopped) { num_runs += info->tot_n_runs; nstime_add(&stats->run_interval, &info->tot_sleep_time); From 09f99c2a925a0351985e799c106614082d6053cf Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 30 May 2019 12:51:32 +0300 Subject: [PATCH 098/304] make redis purge jemalloc after flush, and enable background purging thread jemalloc 5 doesn't immediately release memory back to the OS, instead there's a decaying mechanism, which doesn't work when there's no traffic (no allocations). this is most evident if there's no traffic after flushdb, the RSS will remain high. 1) enable jemalloc background purging 2) explicitly purge in flushdb --- src/config.c | 9 ++++++++ src/db.c | 14 ++++++++++++ src/debug.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/server.c | 2 ++ src/server.h | 1 + src/zmalloc.c | 34 ++++++++++++++++++++++++++++ src/zmalloc.h | 2 ++ 7 files changed, 124 insertions(+) diff --git a/src/config.c b/src/config.c index 7f0e9af8..16850a1f 100644 --- a/src/config.c +++ b/src/config.c @@ -474,6 +474,10 @@ void loadServerConfigFromString(char *config) { err = "active defrag can't be enabled without proper jemalloc support"; goto loaderr; #endif } + } else if (!strcasecmp(argv[0],"jemalloc-bg-thread") && argc == 2) { + if ((server.jemalloc_bg_thread = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) { if ((server.daemonize = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -1152,6 +1156,9 @@ void configSetCommand(client *c) { return; } #endif + } config_set_bool_field( + "jemalloc-bg-thread",server.jemalloc_bg_thread) { + set_jemalloc_bg_thread(server.jemalloc_bg_thread); } config_set_bool_field( "protected-mode",server.protected_mode) { } config_set_bool_field( @@ -1487,6 +1494,7 @@ void configGetCommand(client *c) { config_get_bool_field("rdbchecksum", server.rdb_checksum); config_get_bool_field("activerehashing", server.activerehashing); config_get_bool_field("activedefrag", server.active_defrag_enabled); + config_get_bool_field("jemalloc-bg-thread", server.jemalloc_bg_thread); config_get_bool_field("protected-mode", server.protected_mode); config_get_bool_field("gopher-enabled", server.gopher_enabled); config_get_bool_field("io-threads-do-reads", server.io_threads_do_reads); @@ -2318,6 +2326,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES); rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,CONFIG_DEFAULT_ACTIVE_REHASHING); rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG); + rewriteConfigYesNoOption(state,"jemalloc-bg-thread",server.jemalloc_bg_thread,1); rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE); rewriteConfigYesNoOption(state,"gopher-enabled",server.gopher_enabled,CONFIG_DEFAULT_GOPHER_ENABLED); rewriteConfigYesNoOption(state,"io-threads-do-reads",server.io_threads_do_reads,CONFIG_DEFAULT_IO_THREADS_DO_READS); diff --git a/src/db.c b/src/db.c index b537a29a..50e23d6b 100644 --- a/src/db.c +++ b/src/db.c @@ -441,6 +441,13 @@ void flushdbCommand(client *c) { signalFlushedDb(c->db->id); server.dirty += emptyDb(c->db->id,flags,NULL); addReply(c,shared.ok); +#if defined(USE_JEMALLOC) + /* jemalloc 5 doesn't release pages back to the OS when there's no traffic. + * for large databases, flushdb blocks for long anyway, so a bit more won't + * harm and this way the flush and purge will be synchroneus. */ + if (!(flags & EMPTYDB_ASYNC)) + jemalloc_purge(); +#endif } /* FLUSHALL [ASYNC] @@ -464,6 +471,13 @@ void flushallCommand(client *c) { server.dirty = saved_dirty; } server.dirty++; +#if defined(USE_JEMALLOC) + /* jemalloc 5 doesn't release pages back to the OS when there's no traffic. + * for large databases, flushdb blocks for long anyway, so a bit more won't + * harm and this way the flush and purge will be synchroneus. */ + if (!(flags & EMPTYDB_ASYNC)) + jemalloc_purge(); +#endif } /* This command implements DEL and LAZYDEL. */ diff --git a/src/debug.c b/src/debug.c index 0c6b5630..c82c99b1 100644 --- a/src/debug.c +++ b/src/debug.c @@ -297,6 +297,56 @@ void computeDatasetDigest(unsigned char *final) { } } +#ifdef USE_JEMALLOC +void mallctl_int(client *c, robj **argv, int argc) { + int ret; + /* start with the biggest size (int64), and if that fails, try smaller sizes (int32, bool) */ + int64_t old = 0, val; + if (argc > 1) { + long long ll; + if (getLongLongFromObjectOrReply(c, argv[1], &ll, NULL) != C_OK) + return; + val = ll; + } + size_t sz = sizeof(old); + while (sz > 0) { + if ((ret=je_mallctl(argv[0]->ptr, &old, &sz, argc > 1? &val: NULL, argc > 1?sz: 0))) { + if (ret==EINVAL) { + /* size might be wrong, try a smaller one */ + sz /= 2; +#if BYTE_ORDER == BIG_ENDIAN + val <<= 8*sz; +#endif + continue; + } + addReplyErrorFormat(c,"%s", strerror(ret)); + return; + } else { +#if BYTE_ORDER == BIG_ENDIAN + old >>= 64 - 8*sz; +#endif + addReplyLongLong(c, old); + return; + } + } + addReplyErrorFormat(c,"%s", strerror(EINVAL)); +} + +void mallctl_string(client *c, robj **argv, int argc) { + int ret; + char *old; + size_t sz = sizeof(old); + /* for strings, it seems we need to first get the old value, before overriding it. */ + if ((ret=je_mallctl(argv[0]->ptr, &old, &sz, NULL, 0))) { + addReplyErrorFormat(c,"%s", strerror(ret)); + return; + } + addReplyBulkCString(c, old); + if(argc > 1) + je_mallctl(argv[0]->ptr, NULL, 0, &argv[1]->ptr, sizeof(char*)); +} +#endif + void debugCommand(client *c) { if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { const char *help[] = { @@ -323,6 +373,10 @@ void debugCommand(client *c) { "STRUCTSIZE -- Return the size of different Redis core C structures.", "ZIPLIST -- Show low level info about the ziplist encoding.", "STRINGMATCH-TEST -- Run a fuzz tester against the stringmatchlen() function.", +#ifdef USE_JEMALLOC +"MALLCTL [] -- Get or set a malloc tunning integer.", +"MALLCTL-STR [] -- Get or set a malloc tunning string.", +#endif NULL }; addReplyHelp(c, help); @@ -676,6 +730,14 @@ NULL { stringmatchlen_fuzz_test(); addReplyStatus(c,"Apparently Redis did not crash: test passed"); +#ifdef USE_JEMALLOC + } else if(!strcasecmp(c->argv[1]->ptr,"mallctl") && c->argc >= 3) { + mallctl_int(c, c->argv+2, c->argc-2); + return; + } else if(!strcasecmp(c->argv[1]->ptr,"mallctl-str") && c->argc >= 3) { + mallctl_string(c, c->argv+2, c->argc-2); + return; +#endif } else { addReplySubcommandSyntaxError(c); return; diff --git a/src/server.c b/src/server.c index 4b87b6ac..fa2c7b1e 100644 --- a/src/server.c +++ b/src/server.c @@ -2230,6 +2230,7 @@ void initServerConfig(void) { server.maxidletime = CONFIG_DEFAULT_CLIENT_TIMEOUT; server.tcpkeepalive = CONFIG_DEFAULT_TCP_KEEPALIVE; server.active_expire_enabled = 1; + server.jemalloc_bg_thread = 1; server.active_defrag_enabled = CONFIG_DEFAULT_ACTIVE_DEFRAG; server.active_defrag_ignore_bytes = CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES; server.active_defrag_threshold_lower = CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER; @@ -2866,6 +2867,7 @@ void initServer(void) { latencyMonitorInit(); bioInit(); initThreadedIO(); + set_jemalloc_bg_thread(server.jemalloc_bg_thread); server.initial_memory_usage = zmalloc_used_memory(); } diff --git a/src/server.h b/src/server.h index 0813f8bd..4ae079ff 100644 --- a/src/server.h +++ b/src/server.h @@ -1129,6 +1129,7 @@ struct redisServer { int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */ int active_expire_enabled; /* Can be disabled for testing purposes. */ int active_defrag_enabled; + int jemalloc_bg_thread; /* Enable jemalloc background thread */ size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */ int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */ int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */ diff --git a/src/zmalloc.c b/src/zmalloc.c index 5e601027..58896a72 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -306,6 +306,7 @@ size_t zmalloc_get_rss(void) { #endif #if defined(USE_JEMALLOC) + int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident) { @@ -327,13 +328,46 @@ int zmalloc_get_allocator_info(size_t *allocated, je_mallctl("stats.allocated", allocated, &sz, NULL, 0); return 1; } + +void set_jemalloc_bg_thread(int enable) { + /* let jemalloc do purging asynchronously, required when there's no traffic + * after flushdb */ + if (enable) { + char val = 1; + je_mallctl("background_thread", NULL, 0, &val, 1); + } +} + +int jemalloc_purge() { + /* return all unused (reserved) pages to the OS */ + char tmp[32]; + unsigned narenas = 0; + size_t sz = sizeof(unsigned); + if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) { + sprintf(tmp, "arena.%d.purge", narenas); + if (!je_mallctl(tmp, NULL, 0, NULL, 0)) + return 0; + } + return -1; +} + #else + int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident) { *allocated = *resident = *active = 0; return 1; } + +void set_jemalloc_bg_thread(int enable) { + ((void)(enable)); +} + +int jemalloc_purge() { + return 0; +} + #endif /* Get the sum of the specified field (converted form kb to bytes) in diff --git a/src/zmalloc.h b/src/zmalloc.h index 6fb19b04..b136a910 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -86,6 +86,8 @@ size_t zmalloc_used_memory(void); void zmalloc_set_oom_handler(void (*oom_handler)(size_t)); size_t zmalloc_get_rss(void); int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident); +void set_jemalloc_bg_thread(int enable); +int jemalloc_purge(); size_t zmalloc_get_private_dirty(long pid); size_t zmalloc_get_smap_bytes_by_field(char *field, long pid); size_t zmalloc_get_memory_size(void); From d3ed53f11b66d8af4b2334e8651a2eded880d086 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 5 Jun 2019 16:34:55 +0200 Subject: [PATCH 099/304] Redis Benchmark: prevent CONFIG failure from exiting program --- src/redis-benchmark.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 2785167a..1d16fa4e 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -1540,7 +1540,10 @@ int main(int argc, const char **argv) { if (node->name) printf("%s ", node->name); printf("%s:%d\n", node->ip, node->port); node->redis_config = getRedisConfig(node->ip, node->port, NULL); - if (node->redis_config == NULL) exit(1); + if (node->redis_config == NULL) { + fprintf(stderr, "WARN: could not fetch node CONFIG %s:%d\n", + node->ip, node->port); + } } printf("\n"); /* Automatically set thread number to node count if not specified @@ -1550,7 +1553,8 @@ int main(int argc, const char **argv) { } else { config.redis_config = getRedisConfig(config.hostip, config.hostport, config.hostsocket); - if (config.redis_config == NULL) exit(1); + if (config.redis_config == NULL) + fprintf(stderr, "WARN: could not fetch server CONFIG\n"); } if (config.num_threads > 0) { From dd5f4378718c95eeb753ddc5cb083cd76de5694a Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Fri, 7 Jun 2019 13:20:22 -0700 Subject: [PATCH 100/304] Fixed some spelling issues in ACL codepath including user facing error --- src/acl.c | 22 +++++++++++----------- src/server.c | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/acl.c b/src/acl.c index 0205e51a..a2ee65dd 100644 --- a/src/acl.c +++ b/src/acl.c @@ -295,7 +295,7 @@ int ACLGetCommandBitCoordinates(uint64_t id, uint64_t *word, uint64_t *bit) { * Note that this function does not check the ALLCOMMANDS flag of the user * but just the lowlevel bitmask. * - * If the bit overflows the user internal represetation, zero is returned + * If the bit overflows the user internal representation, zero is returned * in order to disallow the execution of the command in such edge case. */ int ACLGetUserCommandBit(user *u, unsigned long id) { uint64_t word, bit; @@ -311,7 +311,7 @@ int ACLUserCanExecuteFutureCommands(user *u) { } /* Set the specified command bit for the specified user to 'value' (0 or 1). - * If the bit overflows the user internal represetation, no operation + * If the bit overflows the user internal representation, no operation * is performed. As a side effect of calling this function with a value of * zero, the user flag ALLCOMMANDS is cleared since it is no longer possible * to skip the command bit explicit test. */ @@ -350,7 +350,7 @@ int ACLSetUserCommandBitsForCategory(user *u, const char *category, int value) { /* Return the number of commands allowed (on) and denied (off) for the user 'u' * in the subset of commands flagged with the specified category name. - * If the categoty name is not valid, C_ERR is returend, otherwise C_OK is + * If the category name is not valid, C_ERR is returned, otherwise C_OK is * returned and on and off are populated by reference. */ int ACLCountCategoryBitsForUser(user *u, unsigned long *on, unsigned long *off, const char *category) @@ -626,7 +626,7 @@ void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub) { * It is possible to specify multiple patterns. * allkeys Alias for ~* * resetkeys Flush the list of allowed keys patterns. - * > Add this passowrd to the list of valid password for the user. + * > Add this password to the list of valid password for the user. * For example >mypass will add "mypass" to the list. * This directive clears the "nopass" flag (see later). * < Remove this password from the list of valid passwords. @@ -949,9 +949,9 @@ user *ACLGetUserByName(const char *name, size_t namelen) { return myuser; } -/* Check if the command ready to be excuted in the client 'c', and already - * referenced by c->cmd, can be executed by this client according to the - * ACls associated to the client user c->user. +/* Check if the command is ready to be executed in the client 'c', already + * referenced by c->cmd, and can be executed by this client according to the + * ACLs associated to the client user c->user. * * If the user can execute the command ACL_OK is returned, otherwise * ACL_DENIED_CMD or ACL_DENIED_KEY is returned: the first in case the @@ -1122,7 +1122,7 @@ int ACLLoadConfiguredUsers(void) { } /* This function loads the ACL from the specified filename: every line - * is validated and shold be either empty or in the format used to specify + * is validated and should be either empty or in the format used to specify * users in the redis.conf configuration or in the ACL file, that is: * * user ... rules ... @@ -1172,7 +1172,7 @@ sds ACLLoadFromFile(const char *filename) { * to the real user mentioned in the ACL line. */ user *fakeuser = ACLCreateUnlinkedUser(); - /* We do all the loading in a fresh insteance of the Users radix tree, + /* We do all the loading in a fresh instance of the Users radix tree, * so if there are errors loading the ACL file we can rollback to the * old version. */ rax *old_users = Users; @@ -1248,7 +1248,7 @@ sds ACLLoadFromFile(const char *filename) { } /* Note that the same rules already applied to the fake user, so - * we just assert that everything goess well: it should. */ + * we just assert that everything goes well: it should. */ for (j = 2; j < argc; j++) serverAssert(ACLSetUser(u,argv[j],sdslen(argv[j])) == C_OK); @@ -1611,7 +1611,7 @@ void addReplyCommandCategories(client *c, struct redisCommand *cmd) { setDeferredSetLen(c, flaglen, flagcount); } -/* AUTH +/* AUTH * AUTH (Redis >= 6.0 form) * * When the user is omitted it means that we are trying to authenticate diff --git a/src/server.c b/src/server.c index 2643d726..e4df0469 100644 --- a/src/server.c +++ b/src/server.c @@ -3325,7 +3325,7 @@ int processCommand(client *c) { if (acl_retval == ACL_DENIED_CMD) addReplyErrorFormat(c, "-NOPERM this user has no permissions to run " - "the '%s' command or its subcommnad", c->cmd->name); + "the '%s' command or its subcommand", c->cmd->name); else addReplyErrorFormat(c, "-NOPERM this user has no permissions to access " From e2adea21884260dc983242cc483f4602104146e5 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Mon, 10 Jun 2019 17:41:44 +0100 Subject: [PATCH 101/304] Add char* typeNameCanonicalize(robj*) to remove duplicate code between SCAN and TYPE commands, and to keep OBJ_* enum to string canonicalization in one place. --- src/db.c | 37 +++++++++++-------------------------- src/server.h | 6 ++++++ 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/src/db.c b/src/db.c index 6623f7f2..6557ddc3 100644 --- a/src/db.c +++ b/src/db.c @@ -766,26 +766,8 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { /* Filter an element if it isn't the type we want. */ if (!filter && o == NULL && typename){ - robj* typecheck; - char *type; - typecheck = lookupKeyReadWithFlags(c->db, kobj, LOOKUP_NOTOUCH); - if (typecheck == NULL) { - type = "none"; - } else { - switch(typecheck->type) { - case OBJ_STRING: type = "string"; break; - case OBJ_LIST: type = "list"; break; - case OBJ_SET: type = "set"; break; - case OBJ_ZSET: type = "zset"; break; - case OBJ_HASH: type = "hash"; break; - case OBJ_STREAM: type = "stream"; break; - case OBJ_MODULE: { - moduleValue *mv = typecheck->ptr; - type = mv->type->name; - }; break; - default: type = "unknown"; break; - } - } + robj* typecheck = lookupKeyReadWithFlags(c->db, kobj, LOOKUP_NOTOUCH); + char* type = typeNameCanonicalize(typecheck); if (strcasecmp((char*) typename, type)) filter = 1; } @@ -845,11 +827,8 @@ void lastsaveCommand(client *c) { addReplyLongLong(c,server.lastsave); } -void typeCommand(client *c) { - robj *o; - char *type; - - o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH); +char* typeNameCanonicalize(robj *o) { + char* type; if (o == NULL) { type = "none"; } else { @@ -867,7 +846,13 @@ void typeCommand(client *c) { default: type = "unknown"; break; } } - addReplyStatus(c,type); + return type; +} + +void typeCommand(client *c) { + robj *o; + o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH); + addReplyStatus(c, typeNameCanonicalize(o)); } void shutdownCommand(client *c) { diff --git a/src/server.h b/src/server.h index 0813f8bd..06d0611f 100644 --- a/src/server.h +++ b/src/server.h @@ -646,6 +646,12 @@ typedef struct redisObject { void *ptr; } robj; +/* The 'cannonical' name for a type as enumerated above is given by the + * below function. Native types are checked against the OBJ_STRING, + * OBJ_LIST, OBJ_* defines, and Module types have their registered name + * returned.*/ +char* typeNameCanonicalize(robj*); + /* Macro used to initialize a Redis object allocated on the stack. * Note that this macro is taken near the structure definition to make sure * we'll update it when the structure is changed, to avoid bugs like From f54d95e803cb9dc017e1d40e1a0051101d5cb7dd Mon Sep 17 00:00:00 2001 From: swilly22 Date: Wed, 12 Jun 2019 15:37:19 +0300 Subject: [PATCH 102/304] Extend REDISMODULE_CTX_FLAGS to indicate if redis is currently loading from either RDB or AOF --- src/module.c | 3 +++ src/redismodule.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/module.c b/src/module.c index 7dee7e77..1cdd94d1 100644 --- a/src/module.c +++ b/src/module.c @@ -1455,6 +1455,9 @@ int RM_GetContextFlags(RedisModuleCtx *ctx) { if (server.cluster_enabled) flags |= REDISMODULE_CTX_FLAGS_CLUSTER; + if (server.loading) + flags |= REDISMODULE_CTX_FLAGS_LOADING; + /* Maxmemory and eviction policy */ if (server.maxmemory > 0) { flags |= REDISMODULE_CTX_FLAGS_MAXMEMORY; diff --git a/src/redismodule.h b/src/redismodule.h index 259a5f1d..16b8c193 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -87,6 +87,8 @@ #define REDISMODULE_CTX_FLAGS_OOM_WARNING (1<<11) /* The command was sent over the replication link. */ #define REDISMODULE_CTX_FLAGS_REPLICATED (1<<12) +/* Redis is currently loading either from AOF or RDB. */ +#define REDISMODULE_CTX_FLAGS_LOADING (1<<13) #define REDISMODULE_NOTIFY_GENERIC (1<<2) /* g */ From 38cd5fd9f66aecc0d9a09892701cb938a48d61b1 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Thu, 13 Jun 2019 17:49:33 +0100 Subject: [PATCH 103/304] Spelling cannonical -> canonical --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index 06d0611f..dc02edb5 100644 --- a/src/server.h +++ b/src/server.h @@ -646,7 +646,7 @@ typedef struct redisObject { void *ptr; } robj; -/* The 'cannonical' name for a type as enumerated above is given by the +/* The 'canonical' name for a type as enumerated above is given by the * below function. Native types are checked against the OBJ_STRING, * OBJ_LIST, OBJ_* defines, and Module types have their registered name * returned.*/ From dd51fc5a4aa90ccd6897ed268883d4f72b15ed53 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Fri, 31 May 2019 12:05:18 -0700 Subject: [PATCH 104/304] Refactored yesno configs so there was less duplication --- src/config.c | 354 +++++++++++++-------------------------------------- 1 file changed, 89 insertions(+), 265 deletions(-) diff --git a/src/config.c b/src/config.c index 7f0e9af8..2e6e9a6b 100644 --- a/src/config.c +++ b/src/config.c @@ -98,6 +98,48 @@ clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = { {1024*1024*32, 1024*1024*8, 60} /* pubsub */ }; +/* Configuration values that require no special handling to set, get, load or + * rewrite. */ +typedef struct configYesNo { + const char *name; /* The user visible name of this config */ + const char *alias; /* An alias that can also be used for this config */ + int *config; /* The pointer to the server config this value is stored in */ + const int modifiable; /* Can this value be updated by CONFIG SET? */ + const int default_value; /* The default value of the config on rewrite */ +} configYesNo; + +configYesNo configs_yesno[] = { + /* Non-Modifiable */ + {"rdbchecksum",NULL,&server.rdb_checksum,0,CONFIG_DEFAULT_RDB_CHECKSUM}, + {"daemonize",NULL,&server.daemonize,0,0}, + {"io-threads-do-reads",NULL,&server.io_threads_do_reads, 0, CONFIG_DEFAULT_IO_THREADS_DO_READS}, + {"always-show-logo",NULL,&server.always_show_logo,0,CONFIG_DEFAULT_ALWAYS_SHOW_LOGO}, + /* Modifiable */ + {"protected-mode",NULL,&server.protected_mode,1,CONFIG_DEFAULT_PROTECTED_MODE}, + {"rdbcompression",NULL,&server.rdb_compression,1,CONFIG_DEFAULT_RDB_COMPRESSION}, + {"activerehashing",NULL,&server.activerehashing,1,CONFIG_DEFAULT_ACTIVE_REHASHING}, + {"stop-writes-on-bgsave-error",NULL,&server.stop_writes_on_bgsave_err,1,CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR}, + {"dynamic-hz",NULL,&server.dynamic_hz,1,CONFIG_DEFAULT_DYNAMIC_HZ}, + {"lazyfree-lazy-eviction",NULL,&server.lazyfree_lazy_eviction,1,CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION}, + {"lazyfree-lazy-expire",NULL,&server.lazyfree_lazy_expire,1,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE}, + {"lazyfree-lazy-server-del",NULL,&server.lazyfree_lazy_server_del,1,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL}, + {"repl-disable-tcp-nodelay",NULL,&server.repl_disable_tcp_nodelay,1,CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY}, + {"repl-diskless-sync",NULL,&server.repl_diskless_sync,1,CONFIG_DEFAULT_REPL_DISKLESS_SYNC}, + {"gopher-enabled",NULL,&server.gopher_enabled,1,CONFIG_DEFAULT_GOPHER_ENABLED}, + {"aof-rewrite-incremental-fsync",NULL,&server.aof_rewrite_incremental_fsync,1,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC}, + {"no-appendfsync-on-rewrite",NULL,&server.aof_no_fsync_on_rewrite,1,CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE}, + {"cluster-require-full-coverage",NULL,&server.cluster_require_full_coverage,CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE}, + {"rdb-save-incremental-fsync",NULL,&server.rdb_save_incremental_fsync,1,CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC}, + {"aof-load-truncated",NULL,&server.aof_load_truncated,1,CONFIG_DEFAULT_AOF_LOAD_TRUNCATED}, + {"aof-use-rdb-preamble",NULL,&server.aof_use_rdb_preamble,1,CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE}, + {"cluster-replica-no-failover","cluster-slave-no-failover",&server.cluster_slave_no_failover,1,CLUSTER_DEFAULT_SLAVE_NO_FAILOVER}, + {"replica-lazy-flush","slave-lazy-flush",&server.repl_slave_lazy_flush,1,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH}, + {"replica-serve-stale-data","slave-serve-stale-data",&server.repl_serve_stale_data,1,CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA}, + {"replica-read-only","slave-read-only",&server.repl_slave_ro,1,CONFIG_DEFAULT_SLAVE_READ_ONLY}, + {"replica-ignore-maxmemory","slave-ignore-maxmemory",&server.repl_slave_ignore_maxmemory,1,CONFIG_DEFAULT_SLAVE_IGNORE_MAXMEMORY}, + {NULL, NULL, 0, 0} +}; + /*----------------------------------------------------------------------------- * Enum access functions *----------------------------------------------------------------------------*/ @@ -201,6 +243,26 @@ void loadServerConfigFromString(char *config) { } sdstolower(argv[0]); + /* Iterate the configs that are standard */ + int match = 0; + for (configYesNo *config = configs_yesno; config->name != NULL; config++) { + if ((!strcasecmp(argv[0],config->name) || + (config->alias && !strcasecmp(argv[0],config->alias))) && + (argc == 2)) + { + if ((*(config->config) = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } + match = 1; + break; + } + } + + if (match) { + sdsfreesplitres(argv,argc); + continue; + } + /* Execute config directives */ if (!strcasecmp(argv[0],"timeout") && argc == 2) { server.maxidletime = atoi(argv[1]); @@ -212,14 +274,6 @@ void loadServerConfigFromString(char *config) { if (server.tcpkeepalive < 0) { err = "Invalid tcp-keepalive value"; goto loaderr; } - } else if (!strcasecmp(argv[0],"protected-mode") && argc == 2) { - if ((server.protected_mode = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"gopher-enabled") && argc == 2) { - if ((server.gopher_enabled = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"port") && argc == 2) { server.port = atoi(argv[1]); if (server.port < 0 || server.port > 65535) { @@ -290,10 +344,6 @@ void loadServerConfigFromString(char *config) { } else if (!strcasecmp(argv[0],"aclfile") && argc == 2) { zfree(server.acl_filename); server.acl_filename = zstrdup(argv[1]); - } else if (!strcasecmp(argv[0],"always-show-logo") && argc == 2) { - if ((server.always_show_logo = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"syslog-enabled") && argc == 2) { if ((server.syslog_enabled = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -318,10 +368,6 @@ void loadServerConfigFromString(char *config) { if (server.io_threads_num < 1 || server.io_threads_num > 512) { err = "Invalid number of I/O threads"; goto loaderr; } - } else if (!strcasecmp(argv[0],"io-threads-do-reads") && argc == 2) { - if ((server.io_threads_do_reads = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"include") && argc == 2) { loadServerConfig(argv[1],NULL); } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) { @@ -381,14 +427,6 @@ void loadServerConfigFromString(char *config) { err = "repl-timeout must be 1 or greater"; goto loaderr; } - } else if (!strcasecmp(argv[0],"repl-disable-tcp-nodelay") && argc==2) { - if ((server.repl_disable_tcp_nodelay = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"repl-diskless-sync") && argc==2) { - if ((server.repl_diskless_sync = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"repl-diskless-sync-delay") && argc==2) { server.repl_diskless_sync_delay = atoi(argv[1]); if (server.repl_diskless_sync_delay < 0) { @@ -414,57 +452,6 @@ void loadServerConfigFromString(char *config) { } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) { zfree(server.masterauth); server.masterauth = argv[1][0] ? zstrdup(argv[1]) : NULL; - } else if ((!strcasecmp(argv[0],"slave-serve-stale-data") || - !strcasecmp(argv[0],"replica-serve-stale-data")) - && argc == 2) - { - if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if ((!strcasecmp(argv[0],"slave-read-only") || - !strcasecmp(argv[0],"replica-read-only")) - && argc == 2) - { - if ((server.repl_slave_ro = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if ((!strcasecmp(argv[0],"slave-ignore-maxmemory") || - !strcasecmp(argv[0],"replica-ignore-maxmemory")) - && argc == 2) - { - if ((server.repl_slave_ignore_maxmemory = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) { - if ((server.rdb_compression = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"rdbchecksum") && argc == 2) { - if ((server.rdb_checksum = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"activerehashing") && argc == 2) { - if ((server.activerehashing = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"lazyfree-lazy-eviction") && argc == 2) { - if ((server.lazyfree_lazy_eviction = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"lazyfree-lazy-expire") && argc == 2) { - if ((server.lazyfree_lazy_expire = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"lazyfree-lazy-server-del") && argc == 2){ - if ((server.lazyfree_lazy_server_del = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if ((!strcasecmp(argv[0],"slave-lazy-flush") || - !strcasecmp(argv[0],"replica-lazy-flush")) && argc == 2) - { - if ((server.repl_slave_lazy_flush = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"activedefrag") && argc == 2) { if ((server.active_defrag_enabled = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -474,14 +461,6 @@ void loadServerConfigFromString(char *config) { err = "active defrag can't be enabled without proper jemalloc support"; goto loaderr; #endif } - } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) { - if ((server.daemonize = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"dynamic-hz") && argc == 2) { - if ((server.dynamic_hz = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"hz") && argc == 2) { server.config_hz = atoi(argv[1]); if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ; @@ -500,11 +479,6 @@ void loadServerConfigFromString(char *config) { } zfree(server.aof_filename); server.aof_filename = zstrdup(argv[1]); - } else if (!strcasecmp(argv[0],"no-appendfsync-on-rewrite") - && argc == 2) { - if ((server.aof_no_fsync_on_rewrite= yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) { server.aof_fsync = configEnumGetValue(aof_fsync_enum,argv[1]); if (server.aof_fsync == INT_MIN) { @@ -523,28 +497,6 @@ void loadServerConfigFromString(char *config) { argc == 2) { server.aof_rewrite_min_size = memtoll(argv[1],NULL); - } else if (!strcasecmp(argv[0],"aof-rewrite-incremental-fsync") && - argc == 2) - { - if ((server.aof_rewrite_incremental_fsync = - yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"rdb-save-incremental-fsync") && - argc == 2) - { - if ((server.rdb_save_incremental_fsync = - yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"aof-load-truncated") && argc == 2) { - if ((server.aof_load_truncated = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"aof-use-rdb-preamble") && argc == 2) { - if ((server.aof_use_rdb_preamble = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) { if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) { err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN"; @@ -678,13 +630,6 @@ void loadServerConfigFromString(char *config) { { err = "Invalid port"; goto loaderr; } - } else if (!strcasecmp(argv[0],"cluster-require-full-coverage") && - argc == 2) - { - if ((server.cluster_require_full_coverage = yesnotoi(argv[1])) == -1) - { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"cluster-node-timeout") && argc == 2) { server.cluster_node_timeout = strtoll(argv[1],NULL,10); if (server.cluster_node_timeout <= 0) { @@ -707,15 +652,6 @@ void loadServerConfigFromString(char *config) { err = "cluster replica validity factor must be zero or positive"; goto loaderr; } - } else if ((!strcasecmp(argv[0],"cluster-slave-no-failover") || - !strcasecmp(argv[0],"cluster-replica-no-failover")) && - argc == 2) - { - server.cluster_slave_no_failover = yesnotoi(argv[1]); - if (server.cluster_slave_no_failover == -1) { - err = "argument must be 'yes' or 'no'"; - goto loaderr; - } } else if (!strcasecmp(argv[0],"lua-time-limit") && argc == 2) { server.lua_time_limit = strtoll(argv[1],NULL,10); } else if (!strcasecmp(argv[0],"lua-replicate-commands") && argc == 2) { @@ -756,11 +692,6 @@ void loadServerConfigFromString(char *config) { server.client_obuf_limits[class].hard_limit_bytes = hard; server.client_obuf_limits[class].soft_limit_bytes = soft; server.client_obuf_limits[class].soft_limit_seconds = soft_seconds; - } else if (!strcasecmp(argv[0],"stop-writes-on-bgsave-error") && - argc == 2) { - if ((server.stop_writes_on_bgsave_err = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if ((!strcasecmp(argv[0],"slave-priority") || !strcasecmp(argv[0],"replica-priority")) && argc == 2) { @@ -941,6 +872,19 @@ void configSetCommand(client *c) { serverAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3])); o = c->argv[3]; + /* Iterate the configs that are standard */ + for (configYesNo *config = configs_yesno; config->name != NULL; config++) { + if(config->modifiable && (!strcasecmp(c->argv[2]->ptr,config->name) || + (config->alias && !strcasecmp(c->argv[2]->ptr,config->alias)))) + { + int yn = yesnotoi(o->ptr); + if (yn == -1) goto badfmt; + *(config->config) = yn; + addReply(c,shared.ok); + return; + } + } + if (0) { /* this starts the config_set macros else-if chain. */ /* Special fields that can't be handled with general macros. */ @@ -1105,40 +1049,6 @@ void configSetCommand(client *c) { /* Boolean fields. * config_set_bool_field(name,var). */ - } config_set_bool_field( - "rdbcompression", server.rdb_compression) { - } config_set_bool_field( - "repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay) { - } config_set_bool_field( - "repl-diskless-sync",server.repl_diskless_sync) { - } config_set_bool_field( - "cluster-require-full-coverage",server.cluster_require_full_coverage) { - } config_set_bool_field( - "cluster-slave-no-failover",server.cluster_slave_no_failover) { - } config_set_bool_field( - "cluster-replica-no-failover",server.cluster_slave_no_failover) { - } config_set_bool_field( - "aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) { - } config_set_bool_field( - "rdb-save-incremental-fsync",server.rdb_save_incremental_fsync) { - } config_set_bool_field( - "aof-load-truncated",server.aof_load_truncated) { - } config_set_bool_field( - "aof-use-rdb-preamble",server.aof_use_rdb_preamble) { - } config_set_bool_field( - "slave-serve-stale-data",server.repl_serve_stale_data) { - } config_set_bool_field( - "replica-serve-stale-data",server.repl_serve_stale_data) { - } config_set_bool_field( - "slave-read-only",server.repl_slave_ro) { - } config_set_bool_field( - "replica-read-only",server.repl_slave_ro) { - } config_set_bool_field( - "slave-ignore-maxmemory",server.repl_slave_ignore_maxmemory) { - } config_set_bool_field( - "replica-ignore-maxmemory",server.repl_slave_ignore_maxmemory) { - } config_set_bool_field( - "activerehashing",server.activerehashing) { } config_set_bool_field( "activedefrag",server.active_defrag_enabled) { #ifndef HAVE_DEFRAG @@ -1152,27 +1062,6 @@ void configSetCommand(client *c) { return; } #endif - } config_set_bool_field( - "protected-mode",server.protected_mode) { - } config_set_bool_field( - "gopher-enabled",server.gopher_enabled) { - } config_set_bool_field( - "stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err) { - } config_set_bool_field( - "lazyfree-lazy-eviction",server.lazyfree_lazy_eviction) { - } config_set_bool_field( - "lazyfree-lazy-expire",server.lazyfree_lazy_expire) { - } config_set_bool_field( - "lazyfree-lazy-server-del",server.lazyfree_lazy_server_del) { - } config_set_bool_field( - "slave-lazy-flush",server.repl_slave_lazy_flush) { - } config_set_bool_field( - "replica-lazy-flush",server.repl_slave_lazy_flush) { - } config_set_bool_field( - "no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) { - } config_set_bool_field( - "dynamic-hz",server.dynamic_hz) { - /* Numerical fields. * config_set_numerical_field(name,var,min,max) */ } config_set_numerical_field( @@ -1460,60 +1349,15 @@ void configGetCommand(client *c) { config_get_numerical_field("tcp-keepalive",server.tcpkeepalive); /* Bool (yes/no) values */ - config_get_bool_field("cluster-require-full-coverage", - server.cluster_require_full_coverage); - config_get_bool_field("cluster-slave-no-failover", - server.cluster_slave_no_failover); - config_get_bool_field("cluster-replica-no-failover", - server.cluster_slave_no_failover); - config_get_bool_field("no-appendfsync-on-rewrite", - server.aof_no_fsync_on_rewrite); - config_get_bool_field("slave-serve-stale-data", - server.repl_serve_stale_data); - config_get_bool_field("replica-serve-stale-data", - server.repl_serve_stale_data); - config_get_bool_field("slave-read-only", - server.repl_slave_ro); - config_get_bool_field("replica-read-only", - server.repl_slave_ro); - config_get_bool_field("slave-ignore-maxmemory", - server.repl_slave_ignore_maxmemory); - config_get_bool_field("replica-ignore-maxmemory", - server.repl_slave_ignore_maxmemory); - config_get_bool_field("stop-writes-on-bgsave-error", - server.stop_writes_on_bgsave_err); - config_get_bool_field("daemonize", server.daemonize); - config_get_bool_field("rdbcompression", server.rdb_compression); - config_get_bool_field("rdbchecksum", server.rdb_checksum); - config_get_bool_field("activerehashing", server.activerehashing); + /* Iterate the configs that are standard */ + for (configYesNo *config = configs_yesno; config->name != NULL; config++) { + config_get_bool_field(config->name, *(config->config)); + if (config->alias) { + config_get_bool_field(config->alias, *(config->config)); + } + } + config_get_bool_field("activedefrag", server.active_defrag_enabled); - config_get_bool_field("protected-mode", server.protected_mode); - config_get_bool_field("gopher-enabled", server.gopher_enabled); - config_get_bool_field("io-threads-do-reads", server.io_threads_do_reads); - config_get_bool_field("repl-disable-tcp-nodelay", - server.repl_disable_tcp_nodelay); - config_get_bool_field("repl-diskless-sync", - server.repl_diskless_sync); - config_get_bool_field("aof-rewrite-incremental-fsync", - server.aof_rewrite_incremental_fsync); - config_get_bool_field("rdb-save-incremental-fsync", - server.rdb_save_incremental_fsync); - config_get_bool_field("aof-load-truncated", - server.aof_load_truncated); - config_get_bool_field("aof-use-rdb-preamble", - server.aof_use_rdb_preamble); - config_get_bool_field("lazyfree-lazy-eviction", - server.lazyfree_lazy_eviction); - config_get_bool_field("lazyfree-lazy-expire", - server.lazyfree_lazy_expire); - config_get_bool_field("lazyfree-lazy-server-del", - server.lazyfree_lazy_server_del); - config_get_bool_field("slave-lazy-flush", - server.repl_slave_lazy_flush); - config_get_bool_field("replica-lazy-flush", - server.repl_slave_lazy_flush); - config_get_bool_field("dynamic-hz", - server.dynamic_hz); /* Enum values */ config_get_enum_field("maxmemory-policy", @@ -1858,7 +1702,7 @@ void rewriteConfigBytesOption(struct rewriteConfigState *state, char *option, lo } /* Rewrite a yes/no option. */ -void rewriteConfigYesNoOption(struct rewriteConfigState *state, char *option, int value, int defvalue) { +void rewriteConfigYesNoOption(struct rewriteConfigState *state, const char *option, int value, int defvalue) { int force = value != defvalue; sds line = sdscatprintf(sdsempty(),"%s %s",option, value ? "yes" : "no"); @@ -2228,7 +2072,11 @@ int rewriteConfig(char *path) { /* Step 2: rewrite every single option, replacing or appending it inside * the rewrite state. */ - rewriteConfigYesNoOption(state,"daemonize",server.daemonize,0); + /* Iterate the configs that are standard */ + for (configYesNo *config = configs_yesno; config->name != NULL; config++) { + rewriteConfigYesNoOption(state,config->name,*(config->config),config->default_value); + } + rewriteConfigStringOption(state,"pidfile",server.pidfile,CONFIG_DEFAULT_PID_FILE); rewriteConfigNumericalOption(state,"port",server.port,CONFIG_DEFAULT_SERVER_PORT); rewriteConfigNumericalOption(state,"cluster-announce-port",server.cluster_announce_port,CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT); @@ -2250,9 +2098,6 @@ int rewriteConfig(char *path) { rewriteConfigUserOption(state); rewriteConfigNumericalOption(state,"databases",server.dbnum,CONFIG_DEFAULT_DBNUM); rewriteConfigNumericalOption(state,"io-threads",server.dbnum,CONFIG_DEFAULT_IO_THREADS_NUM); - rewriteConfigYesNoOption(state,"stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err,CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR); - rewriteConfigYesNoOption(state,"rdbcompression",server.rdb_compression,CONFIG_DEFAULT_RDB_COMPRESSION); - rewriteConfigYesNoOption(state,"rdbchecksum",server.rdb_checksum,CONFIG_DEFAULT_RDB_CHECKSUM); rewriteConfigStringOption(state,"dbfilename",server.rdb_filename,CONFIG_DEFAULT_RDB_FILENAME); rewriteConfigDirOption(state); rewriteConfigSlaveofOption(state,"replicaof"); @@ -2260,15 +2105,10 @@ int rewriteConfig(char *path) { rewriteConfigStringOption(state,"masteruser",server.masteruser,NULL); rewriteConfigStringOption(state,"masterauth",server.masterauth,NULL); rewriteConfigStringOption(state,"cluster-announce-ip",server.cluster_announce_ip,NULL); - rewriteConfigYesNoOption(state,"replica-serve-stale-data",server.repl_serve_stale_data,CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA); - rewriteConfigYesNoOption(state,"replica-read-only",server.repl_slave_ro,CONFIG_DEFAULT_SLAVE_READ_ONLY); - rewriteConfigYesNoOption(state,"replica-ignore-maxmemory",server.repl_slave_ignore_maxmemory,CONFIG_DEFAULT_SLAVE_IGNORE_MAXMEMORY); rewriteConfigNumericalOption(state,"repl-ping-replica-period",server.repl_ping_slave_period,CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD); rewriteConfigNumericalOption(state,"repl-timeout",server.repl_timeout,CONFIG_DEFAULT_REPL_TIMEOUT); rewriteConfigBytesOption(state,"repl-backlog-size",server.repl_backlog_size,CONFIG_DEFAULT_REPL_BACKLOG_SIZE); rewriteConfigBytesOption(state,"repl-backlog-ttl",server.repl_backlog_time_limit,CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT); - rewriteConfigYesNoOption(state,"repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay,CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY); - rewriteConfigYesNoOption(state,"repl-diskless-sync",server.repl_diskless_sync,CONFIG_DEFAULT_REPL_DISKLESS_SYNC); rewriteConfigNumericalOption(state,"repl-diskless-sync-delay",server.repl_diskless_sync_delay,CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY); rewriteConfigNumericalOption(state,"replica-priority",server.slave_priority,CONFIG_DEFAULT_SLAVE_PRIORITY); rewriteConfigNumericalOption(state,"min-replicas-to-write",server.repl_min_slaves_to_write,CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE); @@ -2291,14 +2131,11 @@ int rewriteConfig(char *path) { rewriteConfigYesNoOption(state,"appendonly",server.aof_state != AOF_OFF,0); rewriteConfigStringOption(state,"appendfilename",server.aof_filename,CONFIG_DEFAULT_AOF_FILENAME); rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,aof_fsync_enum,CONFIG_DEFAULT_AOF_FSYNC); - rewriteConfigYesNoOption(state,"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite,CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE); rewriteConfigNumericalOption(state,"auto-aof-rewrite-percentage",server.aof_rewrite_perc,AOF_REWRITE_PERC); rewriteConfigBytesOption(state,"auto-aof-rewrite-min-size",server.aof_rewrite_min_size,AOF_REWRITE_MIN_SIZE); rewriteConfigNumericalOption(state,"lua-time-limit",server.lua_time_limit,LUA_SCRIPT_TIME_LIMIT); rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0); rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,CONFIG_DEFAULT_CLUSTER_CONFIG_FILE); - rewriteConfigYesNoOption(state,"cluster-require-full-coverage",server.cluster_require_full_coverage,CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE); - rewriteConfigYesNoOption(state,"cluster-replica-no-failover",server.cluster_slave_no_failover,CLUSTER_DEFAULT_SLAVE_NO_FAILOVER); rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,CLUSTER_DEFAULT_NODE_TIMEOUT); rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,CLUSTER_DEFAULT_MIGRATION_BARRIER); rewriteConfigNumericalOption(state,"cluster-replica-validity-factor",server.cluster_slave_validity_factor,CLUSTER_DEFAULT_SLAVE_VALIDITY); @@ -2316,23 +2153,10 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,OBJ_ZSET_MAX_ZIPLIST_ENTRIES); rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,OBJ_ZSET_MAX_ZIPLIST_VALUE); rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES); - rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,CONFIG_DEFAULT_ACTIVE_REHASHING); rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG); - rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE); - rewriteConfigYesNoOption(state,"gopher-enabled",server.gopher_enabled,CONFIG_DEFAULT_GOPHER_ENABLED); - rewriteConfigYesNoOption(state,"io-threads-do-reads",server.io_threads_do_reads,CONFIG_DEFAULT_IO_THREADS_DO_READS); rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.config_hz,CONFIG_DEFAULT_HZ); - rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC); - rewriteConfigYesNoOption(state,"rdb-save-incremental-fsync",server.rdb_save_incremental_fsync,CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC); - rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,CONFIG_DEFAULT_AOF_LOAD_TRUNCATED); - rewriteConfigYesNoOption(state,"aof-use-rdb-preamble",server.aof_use_rdb_preamble,CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE); rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE); - rewriteConfigYesNoOption(state,"lazyfree-lazy-eviction",server.lazyfree_lazy_eviction,CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION); - rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE); - rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL); - rewriteConfigYesNoOption(state,"replica-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH); - rewriteConfigYesNoOption(state,"dynamic-hz",server.dynamic_hz,CONFIG_DEFAULT_DYNAMIC_HZ); /* Rewrite Sentinel config if in Sentinel mode. */ if (server.sentinel_mode) rewriteConfigSentinelOption(state); From 2d236d7aecbf12373618191b7ef4b2365d487c6f Mon Sep 17 00:00:00 2001 From: "zheng.ren01@mljr.com" Date: Tue, 25 Jun 2019 18:34:35 +0800 Subject: [PATCH 105/304] =?UTF-8?q?fix=20readme.md=EF=BC=8CRedis=20data=20?= =?UTF-8?q?types=20should=20add=20`t=5Fstream.c`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6c9435b5..3442659e 100644 --- a/README.md +++ b/README.md @@ -406,7 +406,7 @@ replicas, or to continue the replication after a disconnection. Other C files --- -* `t_hash.c`, `t_list.c`, `t_set.c`, `t_string.c` and `t_zset.c` contains the implementation of the Redis data types. They implement both an API to access a given data type, and the client commands implementations for these data types. +* `t_hash.c`, `t_list.c`, `t_set.c`, `t_string.c`, `t_zset.c` and `t_stream.c` contains the implementation of the Redis data types. They implement both an API to access a given data type, and the client commands implementations for these data types. * `ae.c` implements the Redis event loop, it's a self contained library which is simple to read and understand. * `sds.c` is the Redis string library, check http://github.com/antirez/sds for more information. * `anet.c` is a library to use POSIX networking in a simpler way compared to the raw interface exposed by the kernel. From a28d7918d7ba229666670f29f8202cc67f4f3a1a Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 29 Jun 2019 09:09:38 -0400 Subject: [PATCH 106/304] Client side caching: add new file and description. --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index f35685ef..e608309f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -164,7 +164,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o acl.o gopher.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o acl.o gopher.o tracking.c REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark From 45d64f229eaa0fa17e56bf0589da78d81065344c Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 29 Jun 2019 20:08:41 -0400 Subject: [PATCH 107/304] Client side caching: fields and flags for tracking mode. --- src/networking.c | 5 +++++ src/server.h | 21 ++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index 4bc22120..44979770 100644 --- a/src/networking.c +++ b/src/networking.c @@ -158,6 +158,7 @@ client *createClient(int fd) { c->pubsub_patterns = listCreate(); c->peerid = NULL; c->client_list_node = NULL; + c->client_tracking_redirection = 0; listSetFreeMethod(c->pubsub_patterns,decrRefCountVoid); listSetMatchMethod(c->pubsub_patterns,listMatchObjects); if (fd != -1) linkClient(c); @@ -966,6 +967,9 @@ void unlinkClient(client *c) { listDelNode(server.unblocked_clients,ln); c->flags &= ~CLIENT_UNBLOCKED; } + + /* Clear the tracking status. */ + if (c->flags & CLIENT_TRACKING) disableTracking(c); } void freeClient(client *c) { @@ -1849,6 +1853,7 @@ sds catClientInfoString(sds s, client *client) { if (client->flags & CLIENT_PUBSUB) *p++ = 'P'; if (client->flags & CLIENT_MULTI) *p++ = 'x'; if (client->flags & CLIENT_BLOCKED) *p++ = 'b'; + if (client->flags & CLIENT_TRACKING) *p++ = 't'; if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd'; if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c'; if (client->flags & CLIENT_UNBLOCKED) *p++ = 'u'; diff --git a/src/server.h b/src/server.h index 0813f8bd..a6c6a4da 100644 --- a/src/server.h +++ b/src/server.h @@ -254,8 +254,8 @@ typedef long long mstime_t; /* millisecond time type. */ #define AOF_WAIT_REWRITE 2 /* AOF waits rewrite to start appending */ /* Client flags */ -#define CLIENT_SLAVE (1<<0) /* This client is a slave server */ -#define CLIENT_MASTER (1<<1) /* This client is a master server */ +#define CLIENT_SLAVE (1<<0) /* This client is a repliaca */ +#define CLIENT_MASTER (1<<1) /* This client is a master */ #define CLIENT_MONITOR (1<<2) /* This client is a slave monitor, see MONITOR */ #define CLIENT_MULTI (1<<3) /* This client is in a MULTI context */ #define CLIENT_BLOCKED (1<<4) /* The client is waiting in a blocking operation */ @@ -289,7 +289,12 @@ typedef long long mstime_t; /* millisecond time type. */ #define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put in the list of clients we can read from. */ -#define CLIENT_PENDING_COMMAND (1<<30) /* */ +#define CLIENT_PENDING_COMMAND (1<<30) /* Used in threaded I/O to signal after + we return single threaded that the + client has already pending commands + to be executed. */ +#define CLIENT_TRACKING (1<<31) /* Client enabled keys tracking in order to + perform client side caching. */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -845,6 +850,11 @@ typedef struct client { sds peerid; /* Cached peer ID. */ listNode *client_list_node; /* list node in client list */ + /* If this client is in tracking mode and this field is non zero, + * invalidation messages for keys fetched by this client will be send to + * the specified client ID. */ + uint64_t client_tracking_redirection; + /* Response buffer */ int bufpos; char buf[PROTO_REPLY_CHUNK_BYTES]; @@ -1286,6 +1296,8 @@ struct redisServer { unsigned int blocked_clients_by_type[BLOCKED_NUM]; list *unblocked_clients; /* list of clients to unblock before next loop */ list *ready_keys; /* List of readyList structures for BLPOP & co */ + /* Client side caching. */ + unsigned int tracking_clients; /* # of clients with tracking enabled.*/ /* Sort parameters - qsort_r() is only available under BSD so we * have to take this state global, in order to pass it to sortCompare() */ int sort_desc; @@ -1602,6 +1614,9 @@ void addReplyErrorFormat(client *c, const char *fmt, ...); void addReplyStatusFormat(client *c, const char *fmt, ...); #endif +/* Client side caching (tracking mode) */ +void disableTracking(client *c); + /* List data type */ void listTypeTryConversion(robj *subject, robj *value); void listTypePush(robj *subject, robj *value, int where); From c29f3bcf2e8d9fa4ebc11834ac4504eaf0393cf7 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 30 Jun 2019 06:19:04 -0400 Subject: [PATCH 108/304] Client side caching: enable tracking mode. --- src/server.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server.h b/src/server.h index a6c6a4da..8c97f83f 100644 --- a/src/server.h +++ b/src/server.h @@ -1615,6 +1615,7 @@ void addReplyStatusFormat(client *c, const char *fmt, ...); #endif /* Client side caching (tracking mode) */ +void enableTracking(client *c, uint64_t redirect_to); void disableTracking(client *c); /* List data type */ From 593f6656c1c6ce71eccd71496bb0e62c5f0741b7 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Tue, 2 Jul 2019 14:28:48 +0100 Subject: [PATCH 109/304] RESP3 double representation for -infinity is `,-inf\r\n`, not `-inf\r\n` --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 4bc22120..6225229c 100644 --- a/src/networking.c +++ b/src/networking.c @@ -506,7 +506,7 @@ void addReplyDouble(client *c, double d) { if (c->resp == 2) { addReplyBulkCString(c, d > 0 ? "inf" : "-inf"); } else { - addReplyProto(c, d > 0 ? ",inf\r\n" : "-inf\r\n", + addReplyProto(c, d > 0 ? ",inf\r\n" : ",-inf\r\n", d > 0 ? 6 : 7); } } else { From db16a861a1ca2f43f340c68c00a47b69a15a6f03 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Jul 2019 11:58:20 +0200 Subject: [PATCH 110/304] Client side caching: CLIENT TRACKING subcommand. --- src/networking.c | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 44979770..18506026 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1966,6 +1966,7 @@ void clientCommand(client *c) { "reply (on|off|skip) -- Control the replies sent to the current connection.", "setname -- Assign the name to the current connection.", "unblock [TIMEOUT|ERROR] -- Unblock the specified blocked client.", +"tracking (on|off) [REDIRECT ] -- Enable client keys tracking for client side caching.", NULL }; addReplyHelp(c, help); @@ -2122,20 +2123,56 @@ NULL addReply(c,shared.czero); } } else if (!strcasecmp(c->argv[1]->ptr,"setname") && c->argc == 3) { + /* CLIENT SETNAME */ if (clientSetNameOrReply(c,c->argv[2]) == C_OK) addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"getname") && c->argc == 2) { + /* CLIENT GETNAME */ if (c->name) addReplyBulk(c,c->name); else addReplyNull(c); } else if (!strcasecmp(c->argv[1]->ptr,"pause") && c->argc == 3) { + /* CLIENT PAUSE */ long long duration; - if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration,UNIT_MILLISECONDS) - != C_OK) return; + if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration, + UNIT_MILLISECONDS) != C_OK) return; pauseClients(duration); addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"tracking") && + (c->argc == 3 || c->argc == 5)) + { + /* CLIENT TRACKING (on|off) [REDIRECT ] */ + long long redir = 0; + + /* Parse the redirection option: we'll require the client with + * the specified ID to exist right now, even if it is possible + * it will get disconnected later. */ + if (c->argc == 5) { + if (strcasecmp(c->argv[3]->ptr,"redirect") != 0) { + addReply(c,shared.syntaxerr); + return; + } else { + if (getLongLongFromObjectOrReply(c,c->argv[4],&redir,NULL) != + C_OK) return; + if (lookupClientByID(redir) == NULL) { + addReplyError(c,"The client ID you want redirect to " + "does not exist"); + return; + } + } + } + + if (!strcasecmp(c->argv[2]->ptr,"on")) { + enableTracking(c,redir); + } else if (!strcasecmp(c->argv[2]->ptr,"off")) { + disableTracking(c); + } else { + addReply(c,shared.syntaxerr); + return; + } + addReply(c,shared.ok); } else { addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try CLIENT HELP", (char*)c->argv[1]->ptr); } From 506764b3f87789bfe219da0b07b79f01917bff9f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Jul 2019 12:42:16 +0200 Subject: [PATCH 111/304] Client side caching: hook inside call() for tracking. --- src/server.c | 11 +++++++++++ src/server.h | 1 + 2 files changed, 12 insertions(+) diff --git a/src/server.c b/src/server.c index 4b87b6ac..bb891594 100644 --- a/src/server.c +++ b/src/server.c @@ -3194,6 +3194,7 @@ void call(client *c, int flags) { latencyAddSampleIfNeeded(latency_event,duration/1000); slowlogPushEntryIfNeeded(c,c->argv,c->argc,duration); } + if (flags & CMD_CALL_STATS) { /* use the real command that was executed (cmd and lastamc) may be * different, in case of MULTI-EXEC or re-written commands such as @@ -3261,6 +3262,16 @@ void call(client *c, int flags) { redisOpArrayFree(&server.also_propagate); } server.also_propagate = prev_also_propagate; + + /* If the client has keys tracking enabled for client side caching, + * make sure to remember the keys it fetched via this command. */ + if (c->cmd->flags & CMD_READONLY) { + client *caller = (c->flags & CLIENT_LUA && server.lua_caller) ? + server.lua_caller : c; + if (caller->flags & CLIENT_TRACKING) + trackingRememberKeys(caller); + } + server.stat_numcommands++; } diff --git a/src/server.h b/src/server.h index 8c97f83f..022e4830 100644 --- a/src/server.h +++ b/src/server.h @@ -1617,6 +1617,7 @@ void addReplyStatusFormat(client *c, const char *fmt, ...); /* Client side caching (tracking mode) */ void enableTracking(client *c, uint64_t redirect_to); void disableTracking(client *c); +void trackingRememberKeys(client *c); /* List data type */ void listTypeTryConversion(robj *subject, robj *value); From 46edb55de94cd5d8ebf2dd910092ddaf53181fb8 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Jul 2019 19:16:20 +0200 Subject: [PATCH 112/304] Client side caching: implement trackingInvalidateKey(). --- src/db.c | 1 + src/debug.c | 2 +- src/expire.c | 1 + src/networking.c | 1 + src/server.h | 5 +- src/tracking.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 src/tracking.c diff --git a/src/db.c b/src/db.c index b537a29a..4977873e 100644 --- a/src/db.c +++ b/src/db.c @@ -399,6 +399,7 @@ int selectDb(client *c, int id) { void signalModifiedKey(redisDb *db, robj *key) { touchWatchedKey(db,key); + if (server.tracking_clients) trackingInvalidateKey(key); } void signalFlushedDb(int dbid) { diff --git a/src/debug.c b/src/debug.c index 0c6b5630..1f1157d4 100644 --- a/src/debug.c +++ b/src/debug.c @@ -702,7 +702,7 @@ void _serverAssertPrintClientInfo(const client *c) { bugReportStart(); serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ==="); - serverLog(LL_WARNING,"client->flags = %d", c->flags); + serverLog(LL_WARNING,"client->flags = %llu", (unsigned long long)c->flags); serverLog(LL_WARNING,"client->fd = %d", c->fd); serverLog(LL_WARNING,"client->argc = %d", c->argc); for (j=0; j < c->argc; j++) { diff --git a/src/expire.c b/src/expire.c index 0b92ee3f..b23117a3 100644 --- a/src/expire.c +++ b/src/expire.c @@ -64,6 +64,7 @@ int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) { dbSyncDelete(db,keyobj); notifyKeyspaceEvent(NOTIFY_EXPIRED, "expired",keyobj,db->id); + if (server.tracking_clients) trackingInvalidateKey(keyobj); decrRefCount(keyobj); server.stat_expiredkeys++; return 1; diff --git a/src/networking.c b/src/networking.c index 18506026..716b3585 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1854,6 +1854,7 @@ sds catClientInfoString(sds s, client *client) { if (client->flags & CLIENT_MULTI) *p++ = 'x'; if (client->flags & CLIENT_BLOCKED) *p++ = 'b'; if (client->flags & CLIENT_TRACKING) *p++ = 't'; + if (client->flags & CLIENT_TRACKING_BROKEN_REDIR) *p++ = 'R'; if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd'; if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c'; if (client->flags & CLIENT_UNBLOCKED) *p++ = 'u'; diff --git a/src/server.h b/src/server.h index 022e4830..cd665225 100644 --- a/src/server.h +++ b/src/server.h @@ -295,6 +295,7 @@ typedef long long mstime_t; /* millisecond time type. */ to be executed. */ #define CLIENT_TRACKING (1<<31) /* Client enabled keys tracking in order to perform client side caching. */ +#define CLIENT_TRACKING_BROKEN_REDIR (1ULL<<32) /* Target client is invalid. */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -821,7 +822,7 @@ typedef struct client { time_t ctime; /* Client creation time. */ time_t lastinteraction; /* Time of the last interaction, used for timeout */ time_t obuf_soft_limit_reached_time; - int flags; /* Client flags: CLIENT_* macros. */ + uint64_t flags; /* Client flags: CLIENT_* macros. */ int authenticated; /* Needed when the default user requires auth. */ int replstate; /* Replication state if this is a slave. */ int repl_put_online_on_ack; /* Install slave write handler on ACK. */ @@ -1603,6 +1604,7 @@ void linkClient(client *c); void protectClient(client *c); void unprotectClient(client *c); void initThreadedIO(void); +client *lookupClientByID(uint64_t id); #ifdef __GNUC__ void addReplyErrorFormat(client *c, const char *fmt, ...) @@ -1618,6 +1620,7 @@ void addReplyStatusFormat(client *c, const char *fmt, ...); void enableTracking(client *c, uint64_t redirect_to); void disableTracking(client *c); void trackingRememberKeys(client *c); +void trackingInvalidateKey(robj *keyobj); /* List data type */ void listTypeTryConversion(robj *subject, robj *value); diff --git a/src/tracking.c b/src/tracking.c new file mode 100644 index 00000000..66615ed9 --- /dev/null +++ b/src/tracking.c @@ -0,0 +1,162 @@ +/* tracking.c - Client side caching: keys tracking and invalidation + * + * Copyright (c) 2019, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" + +/* The tracking table is constituted by 2^24 radix trees (each tree, and the + * table itself, are allocated in a lazy way only when needed) tracking + * clients that may have certain keys in their local, client side, cache. + * + * Keys are grouped into 2^24 slots, in a way similar to Redis Cluster hash + * slots, however here the function we use is crc64, taking the least + * significant 24 bits of the output. + * + * When a client enables tracking with "CLIENT TRACKING on", each key served to + * the client is hashed to one of such slots, and Redis will remember what + * client may have keys about such slot. Later, when a key in a given slot is + * modified, all the clients that may have local copies of keys in that slot + * will receive an invalidation message. There is no distinction of database + * number: a single table is used. + * + * Clients will normally take frequently requested objects in memory, removing + * them when invalidation messages are received. A strategy clients may use is + * to just cache objects in a dictionary, associating to each cached object + * some incremental epoch, or just a timestamp. When invalidation messages are + * received clients may store, in a different table, the timestamp (or epoch) + * of the invalidation of such given slot: later when accessing objects, the + * eviction of stale objects may be performed in a lazy way by checking if the + * cached object timestamp is older than the invalidation timestamp for such + * objects. + * + * The output of the 24 bit hash function is very large (more than 16 million + * possible slots), so clients that may want to use less resources may only + * use the most significant bits instead of the full 24 bits. */ +#define TRACKING_TABLE_SIZE (1<<24) +rax **TrackingTable = NULL; + +/* Remove the tracking state from the client 'c'. Note that there is not much + * to do for us here, if not to decrement the counter of the clients in + * tracking mode, because we just store the ID of the client in the tracking + * table, so we'll remove the ID reference in a lazy way. Otherwise when a + * client with many entries in the table is removed, it would cost a lot of + * time to do the cleanup. */ +void disableTracking(client *c) { + if (c->flags & CLIENT_TRACKING) { + server.tracking_clients--; + c->flags &= ~(CLIENT_TRACKING|CLIENT_TRACKING_BROKEN_REDIR); + } +} + +/* Enable the tracking state for the client 'c', and as a side effect allocates + * the tracking table if needed. If the 'redirect_to' argument is non zero, the + * invalidation messages for this client will be sent to the client ID + * specified by the 'redirect_to' argument. Note that if such client will + * eventually get freed, we'll send a message to the original client to + * inform it of the condition. Multiple clients can redirect the invalidation + * messages to the same client ID. */ +void enableTracking(client *c, uint64_t redirect_to) { + if (c->flags & CLIENT_TRACKING) return; + c->flags |= CLIENT_TRACKING; + c->flags &= ~CLIENT_TRACKING_BROKEN_REDIR; + c->client_tracking_redirection = redirect_to; + server.tracking_clients++; + if (TrackingTable == NULL) + TrackingTable = zcalloc(sizeof(rax*) * TRACKING_TABLE_SIZE); +} + +/* This function is called after the excution of a readonly command in the + * case the client 'c' has keys tracking enabled. It will populate the + * tracking ivalidation table according to the keys the user fetched, so that + * Redis will know what are the clients that should receive an invalidation + * message with certain groups of keys are modified. */ +void trackingRememberKeys(client *c) { + int numkeys; + int *keys = getKeysFromCommand(c->cmd,c->argv,c->argc,&numkeys); + if (keys == NULL) return; + + for(int j = 0; j < numkeys; j++) { + int idx = keys[j]; + sds sdskey = c->argv[idx]->ptr; + uint64_t hash = crc64(0, + (unsigned char*)sdskey,sdslen(sdskey))&(TRACKING_TABLE_SIZE-1); + if (TrackingTable[hash] == NULL) + TrackingTable[hash] = raxNew(); + raxTryInsert(TrackingTable[hash], + (unsigned char*)&c->id,sizeof(c->id),NULL,NULL); + } + getKeysFreeResult(keys); +} + +/* This function is called from signalModifiedKey() or other places in Redis + * when a key changes value. In the context of keys tracking, our task here is + * to send a notification to every client that may have keys about such . */ +void trackingInvalidateKey(robj *keyobj) { + sds sdskey = keyobj->ptr; + uint64_t hash = crc64(0, + (unsigned char*)sdskey,sdslen(sdskey))&(TRACKING_TABLE_SIZE-1); + if (TrackingTable == NULL || TrackingTable[hash] == NULL) return; + + raxIterator ri; + raxStart(&ri,TrackingTable[hash]); + raxSeek(&ri,"^",NULL,0); + while(raxNext(&ri)) { + uint64_t id; + memcpy(&id,ri.key,ri.key_len); + client *c = lookupClientByID(id); + if (c->client_tracking_redirection) { + client *redir = lookupClientByID(c->client_tracking_redirection); + if (!redir) { + /* We need to signal to the original connection that we + * are unable to send invalidation messages to the redirected + * connection, because the client no longer exist. */ + if (c->resp > 2) { + addReplyPushLen(c,3); + addReplyBulkCBuffer(c,"tracking-redir-broken",21); + addReplyLongLong(c,c->client_tracking_redirection); + } + continue; + } + c = redir; + } + + /* Only send such info for clients in RESP version 3 or more. */ + if (c->resp > 2) { + addReplyPushLen(c,3); + addReplyBulkCBuffer(c,"invalidate",10); + addReplyBulk(c,keyobj); + } + } + raxStop(&ri); + + /* Free the tracking table: we'll create the radix tree and populate it + * again if more keys will be modified in this hash slot. */ + raxFree(TrackingTable[hash]); + TrackingTable[hash] = NULL; +} From 3e43aed1acd222806afb7fc51907a0a4358f0f38 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 4 Jul 2019 14:06:44 +0200 Subject: [PATCH 113/304] Client side caching: fix invalidate message len and content. --- src/tracking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tracking.c b/src/tracking.c index 66615ed9..aade137c 100644 --- a/src/tracking.c +++ b/src/tracking.c @@ -148,9 +148,9 @@ void trackingInvalidateKey(robj *keyobj) { /* Only send such info for clients in RESP version 3 or more. */ if (c->resp > 2) { - addReplyPushLen(c,3); + addReplyPushLen(c,2); addReplyBulkCBuffer(c,"invalidate",10); - addReplyBulk(c,keyobj); + addReplyLongLong(c,hash); } } raxStop(&ri); From 41c5ebcab6b1b2f9eabe78af88e75de841f8e0e5 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 4 Jul 2019 10:02:26 +0300 Subject: [PATCH 114/304] missing per-skiplist overheads in MEMORY USAGE these had severe impact for small zsets, for instance ones with just one element that is longer than 64 (causing it not to be ziplist encoded) --- src/object.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index 234e11f8..10209a6c 100644 --- a/src/object.c +++ b/src/object.c @@ -834,7 +834,9 @@ size_t objectComputeSize(robj *o, size_t sample_size) { d = ((zset*)o->ptr)->dict; zskiplist *zsl = ((zset*)o->ptr)->zsl; zskiplistNode *znode = zsl->header->level[0].forward; - asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d)); + asize = sizeof(*o)+sizeof(zset)+sizeof(zskiplist)+sizeof(dict)+ + (sizeof(struct dictEntry*)*dictSlots(d))+ + zmalloc_size(zsl->header); while(znode != NULL && samples < sample_size) { elesize += sdsAllocSize(znode->ele); elesize += sizeof(struct dictEntry) + zmalloc_size(znode); From 6b29f2d83d0b15878b2de603a62d2fcb01a2ddc0 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 5 Jul 2019 12:24:28 +0200 Subject: [PATCH 115/304] Client side caching: RESP2 support. --- src/server.h | 1 + src/tracking.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/server.h b/src/server.h index cd665225..cb70b93a 100644 --- a/src/server.h +++ b/src/server.h @@ -1946,6 +1946,7 @@ int pubsubUnsubscribeAllPatterns(client *c, int notify); void freePubsubPattern(void *p); int listMatchPubsubPattern(void *a, void *b); int pubsubPublishMessage(robj *channel, robj *message); +void addReplyPubsubMessage(client *c, robj *channel, robj *msg); /* Keyspace events notification */ void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid); diff --git a/src/tracking.c b/src/tracking.c index aade137c..9d9585c9 100644 --- a/src/tracking.c +++ b/src/tracking.c @@ -60,6 +60,7 @@ * use the most significant bits instead of the full 24 bits. */ #define TRACKING_TABLE_SIZE (1<<24) rax **TrackingTable = NULL; +robj *TrackingChannelName; /* Remove the tracking state from the client 'c'. Note that there is not much * to do for us here, if not to decrement the counter of the clients in @@ -87,8 +88,10 @@ void enableTracking(client *c, uint64_t redirect_to) { c->flags &= ~CLIENT_TRACKING_BROKEN_REDIR; c->client_tracking_redirection = redirect_to; server.tracking_clients++; - if (TrackingTable == NULL) + if (TrackingTable == NULL) { TrackingTable = zcalloc(sizeof(rax*) * TRACKING_TABLE_SIZE); + TrackingChannelName = createStringObject("__redis__:invalidate",20); + } } /* This function is called after the excution of a readonly command in the @@ -130,6 +133,7 @@ void trackingInvalidateKey(robj *keyobj) { uint64_t id; memcpy(&id,ri.key,ri.key_len); client *c = lookupClientByID(id); + int using_redirection = 0; if (c->client_tracking_redirection) { client *redir = lookupClientByID(c->client_tracking_redirection); if (!redir) { @@ -144,13 +148,21 @@ void trackingInvalidateKey(robj *keyobj) { continue; } c = redir; + using_redirection = 1; } - /* Only send such info for clients in RESP version 3 or more. */ + /* Only send such info for clients in RESP version 3 or more. However + * if redirection is active, and the connection we redirect to is + * in Pub/Sub mode, we can support the feature with RESP 2 as well, + * by sending Pub/Sub messages in the __redis__:invalidate channel. */ if (c->resp > 2) { addReplyPushLen(c,2); addReplyBulkCBuffer(c,"invalidate",10); addReplyLongLong(c,hash); + } else if (using_redirection && c->flags & CLIENT_PUBSUB) { + robj *msg = createStringObjectFromLongLong(hash); + addReplyPubsubMessage(c,TrackingChannelName,msg); + decrRefCount(msg); } } raxStop(&ri); From 2d07883cab2bc301f20ceb291904dfafa359f2f6 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 7 Jul 2019 18:28:15 +0300 Subject: [PATCH 116/304] fix build tracking.c should be tracking.o thanks to @rafie --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index e608309f..b6cc69e2 100644 --- a/src/Makefile +++ b/src/Makefile @@ -164,7 +164,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o acl.o gopher.o tracking.c +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o acl.o gopher.o tracking.o REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark From 6eb52e200ce3af68433b69e50e2a5044f7074b08 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Mon, 8 Jul 2019 11:04:37 +0100 Subject: [PATCH 117/304] Change typeNameCanonicalize -> getObjectTypeName, and other style changes --- src/db.c | 6 +++--- src/server.h | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/db.c b/src/db.c index 6557ddc3..bb53081f 100644 --- a/src/db.c +++ b/src/db.c @@ -767,7 +767,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { /* Filter an element if it isn't the type we want. */ if (!filter && o == NULL && typename){ robj* typecheck = lookupKeyReadWithFlags(c->db, kobj, LOOKUP_NOTOUCH); - char* type = typeNameCanonicalize(typecheck); + char* type = getObjectTypeName(typecheck); if (strcasecmp((char*) typename, type)) filter = 1; } @@ -827,7 +827,7 @@ void lastsaveCommand(client *c) { addReplyLongLong(c,server.lastsave); } -char* typeNameCanonicalize(robj *o) { +char* getObjectTypeName(robj *o) { char* type; if (o == NULL) { type = "none"; @@ -852,7 +852,7 @@ char* typeNameCanonicalize(robj *o) { void typeCommand(client *c) { robj *o; o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH); - addReplyStatus(c, typeNameCanonicalize(o)); + addReplyStatus(c, getObjectTypeName(o)); } void shutdownCommand(client *c) { diff --git a/src/server.h b/src/server.h index dc02edb5..19ef1ac5 100644 --- a/src/server.h +++ b/src/server.h @@ -646,11 +646,10 @@ typedef struct redisObject { void *ptr; } robj; -/* The 'canonical' name for a type as enumerated above is given by the - * below function. Native types are checked against the OBJ_STRING, - * OBJ_LIST, OBJ_* defines, and Module types have their registered name - * returned.*/ -char* typeNameCanonicalize(robj*); +/* The a string name for an object's type as listed above + * Native types are checked against the OBJ_STRING, OBJ_LIST, OBJ_* defines, + * and Module types have their registered name returned. */ +char *getObjectTypeName(robj*); /* Macro used to initialize a Redis object allocated on the stack. * Note that this macro is taken near the structure definition to make sure From 2de544cfcc6d1aa7cf6d0c75a6116f7fc27b6fd6 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Mon, 1 Jul 2019 15:22:29 +0300 Subject: [PATCH 118/304] diskless replication on slave side (don't store rdb to file), plus some other related fixes The implementation of the diskless replication was currently diskless only on the master side. The slave side was still storing the received rdb file to the disk before loading it back in and parsing it. This commit adds two modes to load rdb directly from socket: 1) when-empty 2) using "swapdb" the third mode of using diskless slave by flushdb is risky and currently not included. other changes: -------------- distinguish between aof configuration and state so that we can re-enable aof only when sync eventually succeeds (and not when exiting from readSyncBulkPayload after a failed attempt) also a CONFIG GET and INFO during rdb loading would have lied When loading rdb from the network, don't kill the server on short read (that can be a network error) Fix rdb check when performed on preamble AOF tests: run replication tests for diskless slave too make replication test a bit more aggressive Add test for diskless load swapdb --- redis.conf | 16 ++ src/anet.c | 14 + src/anet.h | 1 + src/aof.c | 2 +- src/config.c | 38 ++- src/db.c | 23 +- src/rdb.c | 40 ++- src/redis-check-rdb.c | 4 +- src/replication.c | 340 +++++++++++++++--------- src/rio.c | 109 +++++++- src/rio.h | 10 + src/server.c | 7 +- src/server.h | 19 +- tests/integration/replication-4.tcl | 9 - tests/integration/replication-psync.tcl | 40 ++- tests/integration/replication.tcl | 216 ++++++++++----- tests/support/util.tcl | 12 + 17 files changed, 648 insertions(+), 252 deletions(-) diff --git a/redis.conf b/redis.conf index 06051076..74b6c018 100644 --- a/redis.conf +++ b/redis.conf @@ -377,6 +377,22 @@ repl-diskless-sync no # it entirely just set it to 0 seconds and the transfer will start ASAP. repl-diskless-sync-delay 5 +# Replica can load the rdb it reads from the replication link directly from the +# socket, or store the rdb to a file and read that file after it was completely +# recived from the master. +# In many cases the disk is slower than the network, and storing and loading +# the rdb file may increase replication time (and even increase the master's +# Copy on Write memory and salve buffers). +# However, parsing the rdb file directly from the socket may mean that we have +# to flush the contents of the current database before the full rdb was received. +# for this reason we have the following options: +# "disabled" - Don't use diskless load (store the rdb file to the disk first) +# "on-empty-db" - Use diskless load only when it is completely safe. +# "swapdb" - Keep a copy of the current db contents in RAM while parsing +# the data directly from the socket. note that this requires +# sufficient memory, if you don't have it, you risk an OOM kill. +repl-diskless-load disabled + # Replicas send PINGs to server in a predefined interval. It's possible to change # this interval with the repl_ping_replica_period option. The default value is 10 # seconds. diff --git a/src/anet.c b/src/anet.c index 2981fca1..2088f4fb 100644 --- a/src/anet.c +++ b/src/anet.c @@ -193,6 +193,20 @@ int anetSendTimeout(char *err, int fd, long long ms) { return ANET_OK; } +/* Set the socket receive timeout (SO_RCVTIMEO socket option) to the specified + * number of milliseconds, or disable it if the 'ms' argument is zero. */ +int anetRecvTimeout(char *err, int fd, long long ms) { + struct timeval tv; + + tv.tv_sec = ms/1000; + tv.tv_usec = (ms%1000)*1000; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) { + anetSetError(err, "setsockopt SO_RCVTIMEO: %s", strerror(errno)); + return ANET_ERR; + } + return ANET_OK; +} + /* anetGenericResolve() is called by anetResolve() and anetResolveIP() to * do the actual work. It resolves the hostname "host" and set the string * representation of the IP address into the buffer pointed by "ipbuf". diff --git a/src/anet.h b/src/anet.h index 7142f78d..dd735240 100644 --- a/src/anet.h +++ b/src/anet.h @@ -70,6 +70,7 @@ int anetEnableTcpNoDelay(char *err, int fd); int anetDisableTcpNoDelay(char *err, int fd); int anetTcpKeepAlive(char *err, int fd); int anetSendTimeout(char *err, int fd, long long ms); +int anetRecvTimeout(char *err, int fd, long long ms); int anetPeerToString(int fd, char *ip, size_t ip_len, int *port); int anetKeepAlive(char *err, int fd, int interval); int anetSockName(int fd, char *ip, size_t ip_len, int *port); diff --git a/src/aof.c b/src/aof.c index 4744847d..565ee807 100644 --- a/src/aof.c +++ b/src/aof.c @@ -729,7 +729,7 @@ int loadAppendOnlyFile(char *filename) { server.aof_state = AOF_OFF; fakeClient = createFakeClient(); - startLoading(fp); + startLoadingFile(fp, filename); /* Check if this AOF file has an RDB preamble. In that case we need to * load the RDB file and later continue loading the AOF tail. */ diff --git a/src/config.c b/src/config.c index 2e6e9a6b..fde00ddf 100644 --- a/src/config.c +++ b/src/config.c @@ -91,6 +91,13 @@ configEnum aof_fsync_enum[] = { {NULL, 0} }; +configEnum repl_diskless_load_enum[] = { + {"disabled", REPL_DISKLESS_LOAD_DISABLED}, + {"on-empty-db", REPL_DISKLESS_LOAD_WHEN_DB_EMPTY}, + {"swapdb", REPL_DISKLESS_LOAD_SWAPDB}, + {NULL, 0} +}; + /* Output buffer limits presets. */ clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = { {0, 0, 0}, /* normal */ @@ -427,6 +434,11 @@ void loadServerConfigFromString(char *config) { err = "repl-timeout must be 1 or greater"; goto loaderr; } + } else if (!strcasecmp(argv[0],"repl-diskless-load") && argc==2) { + server.repl_diskless_load = configEnumGetValue(repl_diskless_load_enum,argv[1]); + if (server.repl_diskless_load == INT_MIN) { + err = "argument must be 'disabled', 'on-empty-db', 'swapdb' or 'flushdb'"; + } } else if (!strcasecmp(argv[0],"repl-diskless-sync-delay") && argc==2) { server.repl_diskless_sync_delay = atoi(argv[1]); if (server.repl_diskless_sync_delay < 0) { @@ -466,12 +478,10 @@ void loadServerConfigFromString(char *config) { if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ; if (server.config_hz > CONFIG_MAX_HZ) server.config_hz = CONFIG_MAX_HZ; } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) { - int yes; - - if ((yes = yesnotoi(argv[1])) == -1) { + if ((server.aof_enabled = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } - server.aof_state = yes ? AOF_ON : AOF_OFF; + server.aof_state = server.aof_enabled ? AOF_ON : AOF_OFF; } else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) { if (!pathIsBaseName(argv[1])) { err = "appendfilename can't be a path, just a filename"; @@ -497,6 +507,12 @@ void loadServerConfigFromString(char *config) { argc == 2) { server.aof_rewrite_min_size = memtoll(argv[1],NULL); + } else if (!strcasecmp(argv[0],"rdb-key-save-delay") && argc==2) { + server.rdb_key_save_delay = atoi(argv[1]); + if (server.rdb_key_save_delay < 0) { + err = "rdb-key-save-delay can't be negative"; + goto loaderr; + } } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) { if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) { err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN"; @@ -942,6 +958,7 @@ void configSetCommand(client *c) { int enable = yesnotoi(o->ptr); if (enable == -1) goto badfmt; + server.aof_enabled = enable; if (enable == 0 && server.aof_state != AOF_OFF) { stopAppendOnly(); } else if (enable && server.aof_state == AOF_OFF) { @@ -1132,6 +1149,8 @@ void configSetCommand(client *c) { "slave-priority",server.slave_priority,0,INT_MAX) { } config_set_numerical_field( "replica-priority",server.slave_priority,0,INT_MAX) { + } config_set_numerical_field( + "rdb-key-save-delay",server.rdb_key_save_delay,0,LLONG_MAX) { } config_set_numerical_field( "slave-announce-port",server.slave_announce_port,0,65535) { } config_set_numerical_field( @@ -1199,6 +1218,8 @@ void configSetCommand(client *c) { "maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum) { } config_set_enum_field( "appendfsync",server.aof_fsync,aof_fsync_enum) { + } config_set_enum_field( + "repl-diskless-load",server.repl_diskless_load,repl_diskless_load_enum) { /* Everyhing else is an error... */ } config_set_else { @@ -1346,6 +1367,7 @@ void configGetCommand(client *c) { config_get_numerical_field("cluster-slave-validity-factor",server.cluster_slave_validity_factor); config_get_numerical_field("cluster-replica-validity-factor",server.cluster_slave_validity_factor); config_get_numerical_field("repl-diskless-sync-delay",server.repl_diskless_sync_delay); + config_get_numerical_field("rdb-key-save-delay",server.rdb_key_save_delay); config_get_numerical_field("tcp-keepalive",server.tcpkeepalive); /* Bool (yes/no) values */ @@ -1370,12 +1392,14 @@ void configGetCommand(client *c) { server.aof_fsync,aof_fsync_enum); config_get_enum_field("syslog-facility", server.syslog_facility,syslog_facility_enum); + config_get_enum_field("repl-diskless-load", + server.repl_diskless_load,repl_diskless_load_enum); /* Everything we can't handle with macros follows. */ if (stringmatch(pattern,"appendonly",1)) { addReplyBulkCString(c,"appendonly"); - addReplyBulkCString(c,server.aof_state == AOF_OFF ? "no" : "yes"); + addReplyBulkCString(c,server.aof_enabled ? "yes" : "no"); matches++; } if (stringmatch(pattern,"dir",1)) { @@ -2109,6 +2133,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"repl-timeout",server.repl_timeout,CONFIG_DEFAULT_REPL_TIMEOUT); rewriteConfigBytesOption(state,"repl-backlog-size",server.repl_backlog_size,CONFIG_DEFAULT_REPL_BACKLOG_SIZE); rewriteConfigBytesOption(state,"repl-backlog-ttl",server.repl_backlog_time_limit,CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT); + rewriteConfigEnumOption(state,"repl-diskless-load",server.repl_diskless_load,repl_diskless_load_enum,CONFIG_DEFAULT_REPL_DISKLESS_LOAD); rewriteConfigNumericalOption(state,"repl-diskless-sync-delay",server.repl_diskless_sync_delay,CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY); rewriteConfigNumericalOption(state,"replica-priority",server.slave_priority,CONFIG_DEFAULT_SLAVE_PRIORITY); rewriteConfigNumericalOption(state,"min-replicas-to-write",server.repl_min_slaves_to_write,CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE); @@ -2128,7 +2153,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"active-defrag-cycle-min",server.active_defrag_cycle_min,CONFIG_DEFAULT_DEFRAG_CYCLE_MIN); rewriteConfigNumericalOption(state,"active-defrag-cycle-max",server.active_defrag_cycle_max,CONFIG_DEFAULT_DEFRAG_CYCLE_MAX); rewriteConfigNumericalOption(state,"active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS); - rewriteConfigYesNoOption(state,"appendonly",server.aof_state != AOF_OFF,0); + rewriteConfigYesNoOption(state,"appendonly",server.aof_enabled,0); rewriteConfigStringOption(state,"appendfilename",server.aof_filename,CONFIG_DEFAULT_AOF_FILENAME); rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,aof_fsync_enum,CONFIG_DEFAULT_AOF_FSYNC); rewriteConfigNumericalOption(state,"auto-aof-rewrite-percentage",server.aof_rewrite_perc,AOF_REWRITE_PERC); @@ -2157,6 +2182,7 @@ int rewriteConfig(char *path) { rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.config_hz,CONFIG_DEFAULT_HZ); rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE); + rewriteConfigNumericalOption(state,"rdb-key-save-delay",server.rdb_key_save_delay,CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY); /* Rewrite Sentinel config if in Sentinel mode. */ if (server.sentinel_mode) rewriteConfigSentinelOption(state); diff --git a/src/db.c b/src/db.c index 07051dad..8b765680 100644 --- a/src/db.c +++ b/src/db.c @@ -344,7 +344,7 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) { * On success the fuction returns the number of keys removed from the * database(s). Otherwise -1 is returned in the specific case the * DB number is out of range, and errno is set to EINVAL. */ -long long emptyDb(int dbnum, int flags, void(callback)(void*)) { +long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)(void*)) { int async = (flags & EMPTYDB_ASYNC); long long removed = 0; @@ -362,12 +362,12 @@ long long emptyDb(int dbnum, int flags, void(callback)(void*)) { } for (int j = startdb; j <= enddb; j++) { - removed += dictSize(server.db[j].dict); + removed += dictSize(dbarray[j].dict); if (async) { - emptyDbAsync(&server.db[j]); + emptyDbAsync(&dbarray[j]); } else { - dictEmpty(server.db[j].dict,callback); - dictEmpty(server.db[j].expires,callback); + dictEmpty(dbarray[j].dict,callback); + dictEmpty(dbarray[j].expires,callback); } } if (server.cluster_enabled) { @@ -381,6 +381,10 @@ long long emptyDb(int dbnum, int flags, void(callback)(void*)) { return removed; } +long long emptyDb(int dbnum, int flags, void(callback)(void*)) { + return emptyDbGeneric(server.db, dbnum, flags, callback); +} + int selectDb(client *c, int id) { if (id < 0 || id >= server.dbnum) return C_ERR; @@ -388,6 +392,15 @@ int selectDb(client *c, int id) { return C_OK; } +long long dbTotalServerKeyCount() { + long long total = 0; + int j; + for (j = 0; j < server.dbnum; j++) { + total += dictSize(server.db[j].dict); + } + return total; +} + /*----------------------------------------------------------------------------- * Hooks for key space changes. * diff --git a/src/rdb.c b/src/rdb.c index 95e4766e..c566378f 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -44,6 +44,7 @@ #define rdbExitReportCorruptRDB(...) rdbCheckThenExit(__LINE__,__VA_ARGS__) +char* rdbFileBeingLoaded = NULL; /* used for rdb checking on read error */ extern int rdbCheckMode; void rdbCheckError(const char *fmt, ...); void rdbCheckSetError(const char *fmt, ...); @@ -61,11 +62,17 @@ void rdbCheckThenExit(int linenum, char *reason, ...) { if (!rdbCheckMode) { serverLog(LL_WARNING, "%s", msg); - char *argv[2] = {"",server.rdb_filename}; - redis_check_rdb_main(2,argv,NULL); + if (rdbFileBeingLoaded) { + char *argv[2] = {"",rdbFileBeingLoaded}; + redis_check_rdb_main(2,argv,NULL); + } else { + serverLog(LL_WARNING, "Failure loading rdb format from socket, assuming connection error, resuming operation."); + return; + } } else { rdbCheckError("%s",msg); } + serverLog(LL_WARNING, "Terminating server after rdb file reading failure."); exit(1); } @@ -1039,6 +1046,11 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime) { if (rdbSaveObjectType(rdb,val) == -1) return -1; if (rdbSaveStringObject(rdb,key) == -1) return -1; if (rdbSaveObject(rdb,val,key) == -1) return -1; + + /* Delay return if required (for testing) */ + if (server.rdb_key_save_delay) + usleep(server.rdb_key_save_delay); + return 1; } @@ -1800,18 +1812,23 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, robj *key) { /* Mark that we are loading in the global state and setup the fields * needed to provide loading stats. */ -void startLoading(FILE *fp) { - struct stat sb; - +void startLoading(size_t size) { /* Load the DB */ server.loading = 1; server.loading_start_time = time(NULL); server.loading_loaded_bytes = 0; - if (fstat(fileno(fp), &sb) == -1) { - server.loading_total_bytes = 0; - } else { - server.loading_total_bytes = sb.st_size; - } + server.loading_total_bytes = size; +} + +/* Mark that we are loading in the global state and setup the fields + * needed to provide loading stats. + * 'filename' is optional and used for rdb-check on error */ +void startLoadingFile(FILE *fp, char* filename) { + struct stat sb; + if (fstat(fileno(fp), &sb) == -1) + sb.st_size = 0; + rdbFileBeingLoaded = filename; + startLoading(sb.st_size); } /* Refresh the loading progress info */ @@ -1824,6 +1841,7 @@ void loadingProgress(off_t pos) { /* Loading finished */ void stopLoading(void) { server.loading = 0; + rdbFileBeingLoaded = NULL; } /* Track loading progress in order to serve client's from time to time @@ -2089,7 +2107,7 @@ int rdbLoad(char *filename, rdbSaveInfo *rsi) { int retval; if ((fp = fopen(filename,"r")) == NULL) return C_ERR; - startLoading(fp); + startLoadingFile(fp, filename); rioInitWithFile(&rdb,fp); retval = rdbLoadRio(&rdb,rsi,0); fclose(fp); diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index ec00ee71..e2d71b5a 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -202,7 +202,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { } expiretime = -1; - startLoading(fp); + startLoadingFile(fp, rdbfilename); while(1) { robj *key, *val; @@ -314,6 +314,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { } if (closefile) fclose(fp); + stopLoading(); return 0; eoferr: /* unexpected end of file is handled here with a fatal exit */ @@ -324,6 +325,7 @@ eoferr: /* unexpected end of file is handled here with a fatal exit */ } err: if (closefile) fclose(fp); + stopLoading(); return 1; } diff --git a/src/replication.c b/src/replication.c index 63a67a06..e2bac08b 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1113,11 +1113,22 @@ void restartAOFAfterSYNC() { } } +static int useDisklessLoad() { + /* compute boolean decision to use diskless load */ + return server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB || + (server.repl_diskless_load == REPL_DISKLESS_LOAD_WHEN_DB_EMPTY && dbTotalServerKeyCount()==0); +} + + /* Asynchronously read the SYNC payload we receive from a master */ #define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { char buf[4096]; ssize_t nread, readlen, nwritten; + int use_diskless_load; + redisDb *diskless_load_backup = NULL; + int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS; + int i; off_t left; UNUSED(el); UNUSED(privdata); @@ -1173,90 +1184,177 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { * at the next call. */ server.repl_transfer_size = 0; serverLog(LL_NOTICE, - "MASTER <-> REPLICA sync: receiving streamed RDB from master"); + "MASTER <-> REPLICA sync: receiving streamed RDB from master with EOF %s", + useDisklessLoad()? "to parser":"to disk"); } else { usemark = 0; server.repl_transfer_size = strtol(buf+1,NULL,10); serverLog(LL_NOTICE, - "MASTER <-> REPLICA sync: receiving %lld bytes from master", - (long long) server.repl_transfer_size); + "MASTER <-> REPLICA sync: receiving %lld bytes from master %s", + (long long) server.repl_transfer_size, + useDisklessLoad()? "to parser":"to disk"); } return; } - /* Read bulk data */ - if (usemark) { - readlen = sizeof(buf); - } else { - left = server.repl_transfer_size - server.repl_transfer_read; - readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf); - } + use_diskless_load = useDisklessLoad(); + if (!use_diskless_load) { - nread = read(fd,buf,readlen); - if (nread <= 0) { - serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s", - (nread == -1) ? strerror(errno) : "connection lost"); - cancelReplicationHandshake(); - return; - } - server.stat_net_input_bytes += nread; - - /* When a mark is used, we want to detect EOF asap in order to avoid - * writing the EOF mark into the file... */ - int eof_reached = 0; - - if (usemark) { - /* Update the last bytes array, and check if it matches our delimiter.*/ - if (nread >= CONFIG_RUN_ID_SIZE) { - memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,CONFIG_RUN_ID_SIZE); + /* read the data from the socket, store it to a file and search for the EOF */ + if (usemark) { + readlen = sizeof(buf); } else { - int rem = CONFIG_RUN_ID_SIZE-nread; - memmove(lastbytes,lastbytes+nread,rem); - memcpy(lastbytes+rem,buf,nread); + left = server.repl_transfer_size - server.repl_transfer_read; + readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf); } - if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) eof_reached = 1; - } - server.repl_transfer_lastio = server.unixtime; - if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) { - serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> REPLICA synchronization: %s", - (nwritten == -1) ? strerror(errno) : "short write"); - goto error; - } - server.repl_transfer_read += nread; + nread = read(fd,buf,readlen); + if (nread <= 0) { + serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s", + (nread == -1) ? strerror(errno) : "connection lost"); + cancelReplicationHandshake(); + return; + } + server.stat_net_input_bytes += nread; - /* Delete the last 40 bytes from the file if we reached EOF. */ - if (usemark && eof_reached) { - if (ftruncate(server.repl_transfer_fd, - server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1) - { - serverLog(LL_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno)); + /* When a mark is used, we want to detect EOF asap in order to avoid + * writing the EOF mark into the file... */ + int eof_reached = 0; + + if (usemark) { + /* Update the last bytes array, and check if it matches our delimiter.*/ + if (nread >= CONFIG_RUN_ID_SIZE) { + memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,CONFIG_RUN_ID_SIZE); + } else { + int rem = CONFIG_RUN_ID_SIZE-nread; + memmove(lastbytes,lastbytes+nread,rem); + memcpy(lastbytes+rem,buf,nread); + } + if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) eof_reached = 1; + } + + server.repl_transfer_lastio = server.unixtime; + if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) { + serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> REPLICA synchronization: %s", + (nwritten == -1) ? strerror(errno) : "short write"); goto error; } + server.repl_transfer_read += nread; + + /* Delete the last 40 bytes from the file if we reached EOF. */ + if (usemark && eof_reached) { + if (ftruncate(server.repl_transfer_fd, + server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1) + { + serverLog(LL_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno)); + goto error; + } + } + + /* Sync data on disk from time to time, otherwise at the end of the transfer + * we may suffer a big delay as the memory buffers are copied into the + * actual disk. */ + if (server.repl_transfer_read >= + server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC) + { + off_t sync_size = server.repl_transfer_read - + server.repl_transfer_last_fsync_off; + rdb_fsync_range(server.repl_transfer_fd, + server.repl_transfer_last_fsync_off, sync_size); + server.repl_transfer_last_fsync_off += sync_size; + } + + /* Check if the transfer is now complete */ + if (!usemark) { + if (server.repl_transfer_read == server.repl_transfer_size) + eof_reached = 1; + } + if (!eof_reached) + return; } - /* Sync data on disk from time to time, otherwise at the end of the transfer - * we may suffer a big delay as the memory buffers are copied into the - * actual disk. */ - if (server.repl_transfer_read >= - server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC) - { - off_t sync_size = server.repl_transfer_read - - server.repl_transfer_last_fsync_off; - rdb_fsync_range(server.repl_transfer_fd, - server.repl_transfer_last_fsync_off, sync_size); - server.repl_transfer_last_fsync_off += sync_size; + /* We reach here when the slave is using diskless replication, + * or when we are done reading from the socket to the rdb file. */ + serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data"); + /* We need to stop any AOFRW fork before flusing and parsing + * RDB, otherwise we'll create a copy-on-write disaster. */ + if (server.aof_state != AOF_OFF) stopAppendOnly(); + signalFlushedDb(-1); + if (use_diskless_load && server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { + /* create a backup of the current db */ + diskless_load_backup = zmalloc(sizeof(redisDb)*server.dbnum); + for (i=0; i REPLICA sync: Loading DB in memory"); + rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; + if (use_diskless_load) { + rio rdb; + rioInitWithFd(&rdb,fd,server.repl_transfer_size); + /* Put the socket in blocking mode to simplify RDB transfer. + * We'll restore it when the RDB is received. */ + anetBlock(NULL,fd); + anetRecvTimeout(NULL,fd,server.repl_timeout*1000); - /* Check if the transfer is now complete */ - if (!usemark) { - if (server.repl_transfer_read == server.repl_transfer_size) - eof_reached = 1; - } - - if (eof_reached) { - int aof_is_enabled = server.aof_state != AOF_OFF; - + startLoading(server.repl_transfer_size); + if (rdbLoadRio(&rdb,&rsi,0) != C_OK) { + /* rdbloading failed */ + stopLoading(); + serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from socket"); + cancelReplicationHandshake(); + rioFreeFd(&rdb, NULL); + if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { + /* restore the backed up db */ + emptyDbGeneric(server.db,-1,empty_db_flags,replicationEmptyDbCallback); + for (i=0; i REPLICA synchronization: %s", - server.rdb_filename, strerror(errno)); + server.rdb_filename, strerror(errno)); cancelReplicationHandshake(); return; } - serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data"); - /* We need to stop any AOFRW fork before flusing and parsing - * RDB, otherwise we'll create a copy-on-write disaster. */ - if(aof_is_enabled) stopAppendOnly(); - signalFlushedDb(-1); - emptyDb( - -1, - server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS, - replicationEmptyDbCallback); - /* Before loading the DB into memory we need to delete the readable - * handler, otherwise it will get called recursively since - * rdbLoad() will call the event loop to process events from time to - * time for non blocking loading. */ - aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE); - serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Loading DB in memory"); - rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; if (rdbLoad(server.rdb_filename,&rsi) != C_OK) { serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); cancelReplicationHandshake(); - /* Re-enable the AOF if we disabled it earlier, in order to restore - * the original configuration. */ - if (aof_is_enabled) restartAOFAfterSYNC(); + /* Note that there's no point in restarting the AOF on sync failure, + it'll be restarted when sync succeeds or slave promoted. */ return; } - /* Final setup of the connected slave <- master link */ zfree(server.repl_transfer_tmpfile); close(server.repl_transfer_fd); - replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db); - server.repl_state = REPL_STATE_CONNECTED; - server.repl_down_since = 0; - /* After a full resynchroniziation we use the replication ID and - * offset of the master. The secondary ID / offset are cleared since - * we are starting a new history. */ - memcpy(server.replid,server.master->replid,sizeof(server.replid)); - server.master_repl_offset = server.master->reploff; - clearReplicationId2(); - /* Let's create the replication backlog if needed. Slaves need to - * accumulate the backlog regardless of the fact they have sub-slaves - * or not, in order to behave correctly if they are promoted to - * masters after a failover. */ - if (server.repl_backlog == NULL) createReplicationBacklog(); - - serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success"); - /* Restart the AOF subsystem now that we finished the sync. This - * will trigger an AOF rewrite, and when done will start appending - * to the new file. */ - if (aof_is_enabled) restartAOFAfterSYNC(); + server.repl_transfer_fd = -1; + server.repl_transfer_tmpfile = NULL; } + /* Final setup of the connected slave <- master link */ + replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db); + server.repl_state = REPL_STATE_CONNECTED; + server.repl_down_since = 0; + /* After a full resynchroniziation we use the replication ID and + * offset of the master. The secondary ID / offset are cleared since + * we are starting a new history. */ + memcpy(server.replid,server.master->replid,sizeof(server.replid)); + server.master_repl_offset = server.master->reploff; + clearReplicationId2(); + /* Let's create the replication backlog if needed. Slaves need to + * accumulate the backlog regardless of the fact they have sub-slaves + * or not, in order to behave correctly if they are promoted to + * masters after a failover. */ + if (server.repl_backlog == NULL) createReplicationBacklog(); + + serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success"); + /* Restart the AOF subsystem now that we finished the sync. This + * will trigger an AOF rewrite, and when done will start appending + * to the new file. */ + if (server.aof_enabled) restartAOFAfterSYNC(); return; error: @@ -1845,16 +1928,20 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { } /* Prepare a suitable temp file for bulk transfer */ - while(maxtries--) { - snprintf(tmpfile,256, - "temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid()); - dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644); - if (dfd != -1) break; - sleep(1); - } - if (dfd == -1) { - serverLog(LL_WARNING,"Opening the temp file needed for MASTER <-> REPLICA synchronization: %s",strerror(errno)); - goto error; + if (!useDisklessLoad()) { + while(maxtries--) { + snprintf(tmpfile,256, + "temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid()); + dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644); + if (dfd != -1) break; + sleep(1); + } + if (dfd == -1) { + serverLog(LL_WARNING,"Opening the temp file needed for MASTER <-> REPLICA synchronization: %s",strerror(errno)); + goto error; + } + server.repl_transfer_tmpfile = zstrdup(tmpfile); + server.repl_transfer_fd = dfd; } /* Setup the non blocking download of the bulk file. */ @@ -1871,15 +1958,19 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { server.repl_transfer_size = -1; server.repl_transfer_read = 0; server.repl_transfer_last_fsync_off = 0; - server.repl_transfer_fd = dfd; server.repl_transfer_lastio = server.unixtime; - server.repl_transfer_tmpfile = zstrdup(tmpfile); return; error: aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE); if (dfd != -1) close(dfd); close(fd); + if (server.repl_transfer_fd != -1) + close(server.repl_transfer_fd); + if (server.repl_transfer_tmpfile) + zfree(server.repl_transfer_tmpfile); + server.repl_transfer_tmpfile = NULL; + server.repl_transfer_fd = -1; server.repl_transfer_s = -1; server.repl_state = REPL_STATE_CONNECT; return; @@ -1933,9 +2024,13 @@ void undoConnectWithMaster(void) { void replicationAbortSyncTransfer(void) { serverAssert(server.repl_state == REPL_STATE_TRANSFER); undoConnectWithMaster(); - close(server.repl_transfer_fd); - unlink(server.repl_transfer_tmpfile); - zfree(server.repl_transfer_tmpfile); + if (server.repl_transfer_fd!=-1) { + close(server.repl_transfer_fd); + unlink(server.repl_transfer_tmpfile); + zfree(server.repl_transfer_tmpfile); + server.repl_transfer_tmpfile = NULL; + server.repl_transfer_fd = -1; + } } /* This function aborts a non blocking replication attempt if there is one @@ -2045,6 +2140,9 @@ void replicaofCommand(client *c) { serverLog(LL_NOTICE,"MASTER MODE enabled (user request from '%s')", client); sdsfree(client); + /* Restart the AOF subsystem in case we shut it down during a sync when + * we were still a slave. */ + if (server.aof_enabled && server.aof_state == AOF_OFF) restartAOFAfterSYNC(); } } else { long port; diff --git a/src/rio.c b/src/rio.c index c9c76b8f..993768b5 100644 --- a/src/rio.c +++ b/src/rio.c @@ -157,6 +157,113 @@ void rioInitWithFile(rio *r, FILE *fp) { r->io.file.autosync = 0; } +/* ------------------- File descriptor implementation ------------------- */ + +static size_t rioFdWrite(rio *r, const void *buf, size_t len) { + UNUSED(r); + UNUSED(buf); + UNUSED(len); + return 0; /* Error, this target does not yet support writing. */ +} + +/* Returns 1 or 0 for success/failure. */ +static size_t rioFdRead(rio *r, void *buf, size_t len) { + size_t avail = sdslen(r->io.fd.buf)-r->io.fd.pos; + + /* if the buffer is too small for the entire request: realloc */ + if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len) + r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf)); + + /* if the remaining unused buffer is not large enough: memmove so that we can read the rest */ + if (len > avail && sdsavail(r->io.fd.buf) < len - avail) { + sdsrange(r->io.fd.buf, r->io.fd.pos, -1); + r->io.fd.pos = 0; + } + + /* if we don't already have all the data in the sds, read more */ + while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) { + size_t buffered = sdslen(r->io.fd.buf) - r->io.fd.pos; + size_t toread = len - buffered; + /* read either what's missing, or PROTO_IOBUF_LEN, the bigger of the two */ + if (toread < PROTO_IOBUF_LEN) + toread = PROTO_IOBUF_LEN; + if (toread > sdsavail(r->io.fd.buf)) + toread = sdsavail(r->io.fd.buf); + if (r->io.fd.read_limit != 0 && + r->io.fd.read_so_far + buffered + toread > r->io.fd.read_limit) { + if (r->io.fd.read_limit >= r->io.fd.read_so_far - buffered) + toread = r->io.fd.read_limit - r->io.fd.read_so_far - buffered; + else { + errno = EOVERFLOW; + return 0; + } + } + int retval = read(r->io.fd.fd, (char*)r->io.fd.buf + sdslen(r->io.fd.buf), toread); + if (retval <= 0) { + if (errno == EWOULDBLOCK) errno = ETIMEDOUT; + return 0; + } + sdsIncrLen(r->io.fd.buf, retval); + } + + memcpy(buf, (char*)r->io.fd.buf + r->io.fd.pos, len); + r->io.fd.read_so_far += len; + r->io.fd.pos += len; + return len; +} + +/* Returns read/write position in file. */ +static off_t rioFdTell(rio *r) { + return r->io.fd.read_so_far; +} + +/* Flushes any buffer to target device if applicable. Returns 1 on success + * and 0 on failures. */ +static int rioFdFlush(rio *r) { + /* Our flush is implemented by the write method, that recognizes a + * buffer set to NULL with a count of zero as a flush request. */ + return rioFdWrite(r,NULL,0); +} + +static const rio rioFdIO = { + rioFdRead, + rioFdWrite, + rioFdTell, + rioFdFlush, + NULL, /* update_checksum */ + 0, /* current checksum */ + 0, /* bytes read or written */ + 0, /* read/write chunk size */ + { { NULL, 0 } } /* union for io-specific vars */ +}; + +/* create an rio that implements a buffered read from an fd + * read_limit argument stops buffering when the reaching the limit */ +void rioInitWithFd(rio *r, int fd, size_t read_limit) { + *r = rioFdIO; + r->io.fd.fd = fd; + r->io.fd.pos = 0; + r->io.fd.read_limit = read_limit; + r->io.fd.read_so_far = 0; + r->io.fd.buf = sdsnewlen(NULL, PROTO_IOBUF_LEN); + sdsclear(r->io.fd.buf); +} + +/* release the rio stream. + * optionally returns the unread buffered data. */ +void rioFreeFd(rio *r, sds* out_remainingBufferedData) { + if(out_remainingBufferedData && (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) { + if (r->io.fd.pos > 0) + sdsrange(r->io.fd.buf, r->io.fd.pos, -1); + *out_remainingBufferedData = r->io.fd.buf; + } else { + sdsfree(r->io.fd.buf); + if (out_remainingBufferedData) + *out_remainingBufferedData = NULL; + } + r->io.fd.buf = NULL; +} + /* ------------------- File descriptors set implementation ------------------- */ /* Returns 1 or 0 for success/failure. @@ -300,7 +407,7 @@ void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) { * disk I/O concentrated in very little time. When we fsync in an explicit * way instead the I/O pressure is more distributed across time. */ void rioSetAutoSync(rio *r, off_t bytes) { - serverAssert(r->read == rioFileIO.read); + if(r->write != rioFileIO.write) return; r->io.file.autosync = bytes; } diff --git a/src/rio.h b/src/rio.h index c996c54f..beea0688 100644 --- a/src/rio.h +++ b/src/rio.h @@ -73,6 +73,14 @@ struct _rio { off_t buffered; /* Bytes written since last fsync. */ off_t autosync; /* fsync after 'autosync' bytes written. */ } file; + /* file descriptor */ + struct { + int fd; /* File descriptor. */ + off_t pos; /* pos in buf that was returned */ + sds buf; /* buffered data */ + size_t read_limit; /* don't allow to buffer/read more than that */ + size_t read_so_far; /* amount of data read from the rio (not buffered) */ + } fd; /* Multiple FDs target (used to write to N sockets). */ struct { int *fds; /* File descriptors. */ @@ -126,9 +134,11 @@ static inline int rioFlush(rio *r) { void rioInitWithFile(rio *r, FILE *fp); void rioInitWithBuffer(rio *r, sds s); +void rioInitWithFd(rio *r, int fd, size_t read_limit); void rioInitWithFdset(rio *r, int *fds, int numfds); void rioFreeFdset(rio *r); +void rioFreeFd(rio *r, sds* out_remainingBufferedData); size_t rioWriteBulkCount(rio *r, char prefix, long count); size_t rioWriteBulkString(rio *r, const char *buf, size_t len); diff --git a/src/server.c b/src/server.c index 78b8d8f1..8ed5b591 100644 --- a/src/server.c +++ b/src/server.c @@ -2265,6 +2265,7 @@ void initServerConfig(void) { server.aof_flush_postponed_start = 0; server.aof_rewrite_incremental_fsync = CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC; server.rdb_save_incremental_fsync = CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC; + server.rdb_key_save_delay = CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY; server.aof_load_truncated = CONFIG_DEFAULT_AOF_LOAD_TRUNCATED; server.aof_use_rdb_preamble = CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE; server.pidfile = NULL; @@ -2334,6 +2335,9 @@ void initServerConfig(void) { server.cached_master = NULL; server.master_initial_offset = -1; server.repl_state = REPL_STATE_NONE; + server.repl_transfer_tmpfile = NULL; + server.repl_transfer_fd = -1; + server.repl_transfer_s = -1; server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT; server.repl_serve_stale_data = CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA; server.repl_slave_ro = CONFIG_DEFAULT_SLAVE_READ_ONLY; @@ -2342,6 +2346,7 @@ void initServerConfig(void) { server.repl_down_since = 0; /* Never connected, repl is down since EVER. */ server.repl_disable_tcp_nodelay = CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY; server.repl_diskless_sync = CONFIG_DEFAULT_REPL_DISKLESS_SYNC; + server.repl_diskless_load = CONFIG_DEFAULT_REPL_DISKLESS_LOAD; server.repl_diskless_sync_delay = CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY; server.repl_ping_slave_period = CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD; server.repl_timeout = CONFIG_DEFAULT_REPL_TIMEOUT; @@ -4053,7 +4058,7 @@ sds genRedisInfoString(char *section) { (server.aof_last_write_status == C_OK) ? "ok" : "err", server.stat_aof_cow_bytes); - if (server.aof_state != AOF_OFF) { + if (server.aof_enabled) { info = sdscatprintf(info, "aof_current_size:%lld\r\n" "aof_base_size:%lld\r\n" diff --git a/src/server.h b/src/server.h index 8686994f..f81b1010 100644 --- a/src/server.h +++ b/src/server.h @@ -132,6 +132,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_RDB_FILENAME "dump.rdb" #define CONFIG_DEFAULT_REPL_DISKLESS_SYNC 0 #define CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY 5 +#define CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY 0 #define CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA 1 #define CONFIG_DEFAULT_SLAVE_READ_ONLY 1 #define CONFIG_DEFAULT_SLAVE_IGNORE_MAXMEMORY 1 @@ -394,6 +395,12 @@ typedef long long mstime_t; /* millisecond time type. */ #define AOF_FSYNC_EVERYSEC 2 #define CONFIG_DEFAULT_AOF_FSYNC AOF_FSYNC_EVERYSEC +/* Replication diskless load defines */ +#define REPL_DISKLESS_LOAD_DISABLED 0 +#define REPL_DISKLESS_LOAD_WHEN_DB_EMPTY 1 +#define REPL_DISKLESS_LOAD_SWAPDB 2 +#define CONFIG_DEFAULT_REPL_DISKLESS_LOAD REPL_DISKLESS_LOAD_DISABLED + /* Zipped structures related defaults */ #define OBJ_HASH_MAX_ZIPLIST_ENTRIES 512 #define OBJ_HASH_MAX_ZIPLIST_VALUE 64 @@ -1158,6 +1165,7 @@ struct redisServer { int daemonize; /* True if running as a daemon */ clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT]; /* AOF persistence */ + int aof_enabled; /* AOF configuration */ int aof_state; /* AOF_(ON|OFF|WAIT_REWRITE) */ int aof_fsync; /* Kind of fsync() policy */ char *aof_filename; /* Name of the AOF file */ @@ -1214,6 +1222,8 @@ struct redisServer { int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */ int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */ int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */ + int rdb_key_save_delay; /* Delay in microseconds between keys while + * writing the RDB. (for testings) */ /* Pipe and data structures for child -> parent info sharing. */ int child_info_pipe[2]; /* Pipe used to write the child_info_data. */ struct { @@ -1249,7 +1259,9 @@ struct redisServer { int repl_min_slaves_to_write; /* Min number of slaves to write. */ int repl_min_slaves_max_lag; /* Max lag of slaves to write. */ int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */ - int repl_diskless_sync; /* Send RDB to slaves sockets directly. */ + int repl_diskless_sync; /* Master send RDB to slaves sockets directly. */ + int repl_diskless_load; /* Slave parse RDB directly from the socket. + * see REPL_DISKLESS_LOAD_* enum */ int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */ /* Replication (slave) */ char *masteruser; /* AUTH with this user and masterauth with master */ @@ -1739,7 +1751,8 @@ void replicationCacheMasterUsingMyself(void); void feedReplicationBacklog(void *ptr, size_t len); /* Generic persistence functions */ -void startLoading(FILE *fp); +void startLoadingFile(FILE* fp, char* filename); +void startLoading(size_t size); void loadingProgress(off_t pos); void stopLoading(void); @@ -1996,6 +2009,8 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o); #define EMPTYDB_NO_FLAGS 0 /* No flags. */ #define EMPTYDB_ASYNC (1<<0) /* Reclaim memory in another thread. */ long long emptyDb(int dbnum, int flags, void(callback)(void*)); +long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)(void*)); +long long dbTotalServerKeyCount(); int selectDb(client *c, int id); void signalModifiedKey(redisDb *db, robj *key); diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl index 3c6df52a..54891151 100644 --- a/tests/integration/replication-4.tcl +++ b/tests/integration/replication-4.tcl @@ -1,12 +1,3 @@ -proc start_bg_complex_data {host port db ops} { - set tclsh [info nameofexecutable] - exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops & -} - -proc stop_bg_complex_data {handle} { - catch {exec /bin/kill -9 $handle} -} - start_server {tags {"repl"}} { start_server {} { diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl index bf868244..3c98723a 100644 --- a/tests/integration/replication-psync.tcl +++ b/tests/integration/replication-psync.tcl @@ -1,12 +1,3 @@ -proc start_bg_complex_data {host port db ops} { - set tclsh [info nameofexecutable] - exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops & -} - -proc stop_bg_complex_data {handle} { - catch {exec /bin/kill -9 $handle} -} - # Creates a master-slave pair and breaks the link continuously to force # partial resyncs attempts, all this while flooding the master with # write queries. @@ -17,7 +8,7 @@ proc stop_bg_complex_data {handle} { # If reconnect is > 0, the test actually try to break the connection and # reconnect with the master, otherwise just the initial synchronization is # checked for consistency. -proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless reconnect} { +proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect} { start_server {tags {"repl"}} { start_server {} { @@ -28,8 +19,9 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec $master config set repl-backlog-size $backlog_size $master config set repl-backlog-ttl $backlog_ttl - $master config set repl-diskless-sync $diskless + $master config set repl-diskless-sync $mdl $master config set repl-diskless-sync-delay 1 + $slave config set repl-diskless-load $sdl set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000] set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000] @@ -54,7 +46,7 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec } } - test "Test replication partial resync: $descr (diskless: $diskless, reconnect: $reconnect)" { + test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect)" { # Now while the clients are writing data, break the maste-slave # link multiple times. if ($reconnect) { @@ -132,23 +124,25 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec } } -foreach diskless {no yes} { - test_psync {no reconnection, just sync} 6 1000000 3600 0 { - } $diskless 0 +foreach mdl {no yes} { + foreach sdl {disabled swapdb} { + test_psync {no reconnection, just sync} 6 1000000 3600 0 { + } $mdl $sdl 0 - test_psync {ok psync} 6 100000000 3600 0 { + test_psync {ok psync} 6 100000000 3600 0 { assert {[s -1 sync_partial_ok] > 0} - } $diskless 1 + } $mdl $sdl 1 - test_psync {no backlog} 6 100 3600 0.5 { + test_psync {no backlog} 6 100 3600 0.5 { assert {[s -1 sync_partial_err] > 0} - } $diskless 1 + } $mdl $sdl 1 - test_psync {ok after delay} 3 100000000 3600 3 { + test_psync {ok after delay} 3 100000000 3600 3 { assert {[s -1 sync_partial_ok] > 0} - } $diskless 1 + } $mdl $sdl 1 - test_psync {backlog expired} 3 100000000 1 3 { + test_psync {backlog expired} 3 100000000 1 3 { assert {[s -1 sync_partial_err] > 0} - } $diskless 1 + } $mdl $sdl 1 + } } diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 0e50c20a..d69a1761 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -183,85 +183,92 @@ start_server {tags {"repl"}} { } } -foreach dl {no yes} { - start_server {tags {"repl"}} { - set master [srv 0 client] - $master config set repl-diskless-sync $dl - set master_host [srv 0 host] - set master_port [srv 0 port] - set slaves {} - set load_handle0 [start_write_load $master_host $master_port 3] - set load_handle1 [start_write_load $master_host $master_port 5] - set load_handle2 [start_write_load $master_host $master_port 20] - set load_handle3 [start_write_load $master_host $master_port 8] - set load_handle4 [start_write_load $master_host $master_port 4] - start_server {} { - lappend slaves [srv 0 client] +foreach mdl {no yes} { + foreach sdl {disabled swapdb} { + start_server {tags {"repl"}} { + set master [srv 0 client] + $master config set repl-diskless-sync $mdl + $master config set repl-diskless-sync-delay 1 + set master_host [srv 0 host] + set master_port [srv 0 port] + set slaves {} + set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000] + set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000] + set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000000] + set load_handle3 [start_write_load $master_host $master_port 8] + set load_handle4 [start_write_load $master_host $master_port 4] + after 5000 ;# wait for some data to accumulate so that we have RDB part for the fork start_server {} { lappend slaves [srv 0 client] start_server {} { lappend slaves [srv 0 client] - test "Connect multiple replicas at the same time (issue #141), diskless=$dl" { - # Send SLAVEOF commands to slaves - [lindex $slaves 0] slaveof $master_host $master_port - [lindex $slaves 1] slaveof $master_host $master_port - [lindex $slaves 2] slaveof $master_host $master_port + start_server {} { + lappend slaves [srv 0 client] + test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" { + # Send SLAVEOF commands to slaves + [lindex $slaves 0] config set repl-diskless-load $sdl + [lindex $slaves 1] config set repl-diskless-load $sdl + [lindex $slaves 2] config set repl-diskless-load $sdl + [lindex $slaves 0] slaveof $master_host $master_port + [lindex $slaves 1] slaveof $master_host $master_port + [lindex $slaves 2] slaveof $master_host $master_port - # Wait for all the three slaves to reach the "online" - # state from the POV of the master. - set retry 500 - while {$retry} { - set info [r -3 info] - if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} { - break - } else { - incr retry -1 - after 100 + # Wait for all the three slaves to reach the "online" + # state from the POV of the master. + set retry 500 + while {$retry} { + set info [r -3 info] + if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} { + break + } else { + incr retry -1 + after 100 + } + } + if {$retry == 0} { + error "assertion:Slaves not correctly synchronized" } - } - if {$retry == 0} { - error "assertion:Replicas not correctly synchronized" - } - # Wait that slaves acknowledge they are online so - # we are sure that DBSIZE and DEBUG DIGEST will not - # fail because of timing issues. - wait_for_condition 500 100 { - [lindex [[lindex $slaves 0] role] 3] eq {connected} && - [lindex [[lindex $slaves 1] role] 3] eq {connected} && - [lindex [[lindex $slaves 2] role] 3] eq {connected} - } else { - fail "Replicas still not connected after some time" + # Wait that slaves acknowledge they are online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. + wait_for_condition 500 100 { + [lindex [[lindex $slaves 0] role] 3] eq {connected} && + [lindex [[lindex $slaves 1] role] 3] eq {connected} && + [lindex [[lindex $slaves 2] role] 3] eq {connected} + } else { + fail "Slaves still not connected after some time" + } + + # Stop the write load + stop_bg_complex_data $load_handle0 + stop_bg_complex_data $load_handle1 + stop_bg_complex_data $load_handle2 + stop_write_load $load_handle3 + stop_write_load $load_handle4 + + # Make sure that slaves and master have same + # number of keys + wait_for_condition 500 100 { + [$master dbsize] == [[lindex $slaves 0] dbsize] && + [$master dbsize] == [[lindex $slaves 1] dbsize] && + [$master dbsize] == [[lindex $slaves 2] dbsize] + } else { + fail "Different number of keys between master and replica after too long time." + } + + # Check digests + set digest [$master debug digest] + set digest0 [[lindex $slaves 0] debug digest] + set digest1 [[lindex $slaves 1] debug digest] + set digest2 [[lindex $slaves 2] debug digest] + assert {$digest ne 0000000000000000000000000000000000000000} + assert {$digest eq $digest0} + assert {$digest eq $digest1} + assert {$digest eq $digest2} } - - # Stop the write load - stop_write_load $load_handle0 - stop_write_load $load_handle1 - stop_write_load $load_handle2 - stop_write_load $load_handle3 - stop_write_load $load_handle4 - - # Make sure that slaves and master have same - # number of keys - wait_for_condition 500 100 { - [$master dbsize] == [[lindex $slaves 0] dbsize] && - [$master dbsize] == [[lindex $slaves 1] dbsize] && - [$master dbsize] == [[lindex $slaves 2] dbsize] - } else { - fail "Different number of keys between masted and replica after too long time." - } - - # Check digests - set digest [$master debug digest] - set digest0 [[lindex $slaves 0] debug digest] - set digest1 [[lindex $slaves 1] debug digest] - set digest2 [[lindex $slaves 2] debug digest] - assert {$digest ne 0000000000000000000000000000000000000000} - assert {$digest eq $digest0} - assert {$digest eq $digest1} - assert {$digest eq $digest2} - } - } + } + } } } } @@ -309,3 +316,70 @@ start_server {tags {"repl"}} { } } } + +test {slave fails full sync and diskless load swapdb recoveres it} { + start_server {tags {"repl"}} { + set slave [srv 0 client] + set slave_host [srv 0 host] + set slave_port [srv 0 port] + set slave_log [srv 0 stdout] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + # Put different data sets on the master and slave + # we need to put large keys on the master since the slave replies to info only once in 2mb + $slave debug populate 2000 slave 10 + $master debug populate 200 master 100000 + $master config set rdbcompression no + + # Set master and slave to use diskless replication + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + $slave config set repl-diskless-load swapdb + + # Set master with a slow rdb generation, so that we can easily disconnect it mid sync + # 10ms per key, with 200 keys is 2 seconds + $master config set rdb-key-save-delay 10000 + + # Start the replication process... + $slave slaveof $master_host $master_port + + # wait for the slave to start reading the rdb + wait_for_condition 50 100 { + [s -1 loading] eq 1 + } else { + fail "Replica didn't get into loading mode" + } + + # make sure that next sync will not start immediately so that we can catch the slave in betweeen syncs + $master config set repl-diskless-sync-delay 5 + # for faster server shutdown, make rdb saving fast again (the fork is already uses the slow one) + $master config set rdb-key-save-delay 0 + + # waiting slave to do flushdb (key count drop) + wait_for_condition 50 100 { + 2000 != [scan [regexp -inline {keys\=([\d]*)} [$slave info keyspace]] keys=%d] + } else { + fail "Replica didn't flush" + } + + # make sure we're still loading + assert_equal [s -1 loading] 1 + + # kill the slave connection on the master + set killed [$master client kill type slave] + + # wait for loading to stop (fail) + wait_for_condition 50 100 { + [s -1 loading] eq 0 + } else { + fail "Replica didn't disconnect" + } + + # make sure the original keys were restored + assert_equal [$slave dbsize] 2000 + } + } +} diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 74f491e4..41cc5612 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -399,3 +399,15 @@ proc lshuffle {list} { } return $slist } + +# Execute a background process writing complex data for the specified number +# of ops to the specified Redis instance. +proc start_bg_complex_data {host port db ops} { + set tclsh [info nameofexecutable] + exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops & +} + +# Stop a process generating write load executed with start_bg_complex_data. +proc stop_bg_complex_data {handle} { + catch {exec /bin/kill -9 $handle} +} From 81b18fa3a0926b60a59083eee144cbf3d0e2fd64 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Jul 2019 18:32:47 +0200 Subject: [PATCH 119/304] Diskless replica: a few aesthetic changes to replication.c. --- src/replication.c | 126 ++++++++++++++++++++++++++++++++-------------- src/rio.c | 12 +++-- 2 files changed, 96 insertions(+), 42 deletions(-) diff --git a/src/replication.c b/src/replication.c index e2bac08b..a7c1c0d6 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1127,7 +1127,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { ssize_t nread, readlen, nwritten; int use_diskless_load; redisDb *diskless_load_backup = NULL; - int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS; + int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : + EMPTYDB_NO_FLAGS; int i; off_t left; UNUSED(el); @@ -1199,8 +1200,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { use_diskless_load = useDisklessLoad(); if (!use_diskless_load) { - - /* read the data from the socket, store it to a file and search for the EOF */ + /* Read the data from the socket, store it to a file and search + * for the EOF. */ if (usemark) { readlen = sizeof(buf); } else { @@ -1222,20 +1223,28 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { int eof_reached = 0; if (usemark) { - /* Update the last bytes array, and check if it matches our delimiter.*/ + /* Update the last bytes array, and check if it matches our + * delimiter. */ if (nread >= CONFIG_RUN_ID_SIZE) { - memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,CONFIG_RUN_ID_SIZE); + memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE, + CONFIG_RUN_ID_SIZE); } else { int rem = CONFIG_RUN_ID_SIZE-nread; memmove(lastbytes,lastbytes+nread,rem); memcpy(lastbytes+rem,buf,nread); } - if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) eof_reached = 1; + if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) + eof_reached = 1; } + /* Update the last I/O time for the replication transfer (used in + * order to detect timeouts during replication), and write what we + * got from the socket to the dump file on disk. */ server.repl_transfer_lastio = server.unixtime; if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) { - serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> REPLICA synchronization: %s", + serverLog(LL_WARNING, + "Write error or short write writing to the DB dump file " + "needed for MASTER <-> REPLICA synchronization: %s", (nwritten == -1) ? strerror(errno) : "short write"); goto error; } @@ -1246,14 +1255,16 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { if (ftruncate(server.repl_transfer_fd, server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1) { - serverLog(LL_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno)); + serverLog(LL_WARNING, + "Error truncating the RDB file received from the master " + "for SYNC: %s", strerror(errno)); goto error; } } - /* Sync data on disk from time to time, otherwise at the end of the transfer - * we may suffer a big delay as the memory buffers are copied into the - * actual disk. */ + /* Sync data on disk from time to time, otherwise at the end of the + * transfer we may suffer a big delay as the memory buffers are copied + * into the actual disk. */ if (server.repl_transfer_read >= server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC) { @@ -1269,19 +1280,34 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { if (server.repl_transfer_read == server.repl_transfer_size) eof_reached = 1; } - if (!eof_reached) - return; + + /* If the transfer is yet not complete, we need to read more, so + * return ASAP and wait for the handler to be called again. */ + if (!eof_reached) return; } - /* We reach here when the slave is using diskless replication, - * or when we are done reading from the socket to the rdb file. */ + /* We reach this point in one of the following cases: + * + * 1. The replica is using diskless replication, that is, it reads data + * directly from the socket to the Redis memory, without using + * a temporary RDB file on disk. In that case we just block and + * read everything from the socket. + * + * 2. Or when we are done reading from the socket to the RDB file, in + * such case we want just to read the RDB file in memory. */ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data"); - /* We need to stop any AOFRW fork before flusing and parsing - * RDB, otherwise we'll create a copy-on-write disaster. */ + + /* We need to stop any AOF rewriting child before flusing and parsing + * the RDB, otherwise we'll create a copy-on-write disaster. */ if (server.aof_state != AOF_OFF) stopAppendOnly(); signalFlushedDb(-1); - if (use_diskless_load && server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { - /* create a backup of the current db */ + + /* When diskless RDB loading is used by replicas, it may be configured + * in order to save the current DB instead of throwing it away, + * so that we can restore it in case of failed transfer. */ + if (use_diskless_load && + server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) + { diskless_load_backup = zmalloc(sizeof(redisDb)*server.dbnum); for (i=0; i REPLICA synchronization: %s", + serverLog(LL_WARNING, + "Failed trying to rename the temp DB into %s in " + "MASTER <-> REPLICA synchronization: %s", server.rdb_filename, strerror(errno)); cancelReplicationHandshake(); return; } if (rdbLoad(server.rdb_filename,&rsi) != C_OK) { - serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); + serverLog(LL_WARNING, + "Failed trying to load the MASTER synchronization " + "DB from disk"); cancelReplicationHandshake(); /* Note that there's no point in restarting the AOF on sync failure, - it'll be restarted when sync succeeds or slave promoted. */ + it'll be restarted when sync succeeds or replica promoted. */ return; } + + /* Cleanup. */ zfree(server.repl_transfer_tmpfile); close(server.repl_transfer_fd); server.repl_transfer_fd = -1; server.repl_transfer_tmpfile = NULL; } + /* Final setup of the connected slave <- master link */ replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db); server.repl_state = REPL_STATE_CONNECTED; server.repl_down_since = 0; + /* After a full resynchroniziation we use the replication ID and * offset of the master. The secondary ID / offset are cleared since * we are starting a new history. */ memcpy(server.replid,server.master->replid,sizeof(server.replid)); server.master_repl_offset = server.master->reploff; clearReplicationId2(); + /* Let's create the replication backlog if needed. Slaves need to * accumulate the backlog regardless of the fact they have sub-slaves * or not, in order to behave correctly if they are promoted to * masters after a failover. */ if (server.repl_backlog == NULL) createReplicationBacklog(); - serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success"); + /* Restart the AOF subsystem now that we finished the sync. This * will trigger an AOF rewrite, and when done will start appending * to the new file. */ diff --git a/src/rio.c b/src/rio.c index 993768b5..9327c17a 100644 --- a/src/rio.c +++ b/src/rio.c @@ -173,13 +173,13 @@ static size_t rioFdRead(rio *r, void *buf, size_t len) { /* if the buffer is too small for the entire request: realloc */ if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len) r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf)); - + /* if the remaining unused buffer is not large enough: memmove so that we can read the rest */ if (len > avail && sdsavail(r->io.fd.buf) < len - avail) { sdsrange(r->io.fd.buf, r->io.fd.pos, -1); r->io.fd.pos = 0; } - + /* if we don't already have all the data in the sds, read more */ while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) { size_t buffered = sdslen(r->io.fd.buf) - r->io.fd.pos; @@ -251,8 +251,10 @@ void rioInitWithFd(rio *r, int fd, size_t read_limit) { /* release the rio stream. * optionally returns the unread buffered data. */ -void rioFreeFd(rio *r, sds* out_remainingBufferedData) { - if(out_remainingBufferedData && (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) { +void rioFreeFd(rio *r, sds *out_remainingBufferedData) { + if (out_remainingBufferedData && + (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) + { if (r->io.fd.pos > 0) sdsrange(r->io.fd.buf, r->io.fd.pos, -1); *out_remainingBufferedData = r->io.fd.buf; @@ -264,7 +266,7 @@ void rioFreeFd(rio *r, sds* out_remainingBufferedData) { r->io.fd.buf = NULL; } -/* ------------------- File descriptors set implementation ------------------- */ +/* ------------------- File descriptors set implementation ------------------ */ /* Returns 1 or 0 for success/failure. * The function returns success as long as we are able to correctly write From dfcbeaf11572516e13d619db9a82e2bfebe392a5 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Jul 2019 18:39:59 +0200 Subject: [PATCH 120/304] Diskless replica: a few aesthetic changes to rio.c --- src/rio.c | 57 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/src/rio.c b/src/rio.c index 9327c17a..1a53992a 100644 --- a/src/rio.c +++ b/src/rio.c @@ -157,7 +157,11 @@ void rioInitWithFile(rio *r, FILE *fp) { r->io.file.autosync = 0; } -/* ------------------- File descriptor implementation ------------------- */ +/* ------------------- File descriptor implementation ------------------- + * We use this RIO implemetnation when reading an RDB file directly from + * the socket to the memory via rdbLoadRio(), thus this implementation + * only implements reading from a file descriptor that is, normally, + * just a socket. */ static size_t rioFdWrite(rio *r, const void *buf, size_t len) { UNUSED(r); @@ -170,27 +174,28 @@ static size_t rioFdWrite(rio *r, const void *buf, size_t len) { static size_t rioFdRead(rio *r, void *buf, size_t len) { size_t avail = sdslen(r->io.fd.buf)-r->io.fd.pos; - /* if the buffer is too small for the entire request: realloc */ + /* If the buffer is too small for the entire request: realloc. */ if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len) r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf)); - /* if the remaining unused buffer is not large enough: memmove so that we can read the rest */ + /* If the remaining unused buffer is not large enough: memmove so that we + * can read the rest. */ if (len > avail && sdsavail(r->io.fd.buf) < len - avail) { sdsrange(r->io.fd.buf, r->io.fd.pos, -1); r->io.fd.pos = 0; } - /* if we don't already have all the data in the sds, read more */ + /* If we don't already have all the data in the sds, read more */ while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) { size_t buffered = sdslen(r->io.fd.buf) - r->io.fd.pos; size_t toread = len - buffered; - /* read either what's missing, or PROTO_IOBUF_LEN, the bigger of the two */ - if (toread < PROTO_IOBUF_LEN) - toread = PROTO_IOBUF_LEN; - if (toread > sdsavail(r->io.fd.buf)) - toread = sdsavail(r->io.fd.buf); + /* Read either what's missing, or PROTO_IOBUF_LEN, the bigger of + * the two. */ + if (toread < PROTO_IOBUF_LEN) toread = PROTO_IOBUF_LEN; + if (toread > sdsavail(r->io.fd.buf)) toread = sdsavail(r->io.fd.buf); if (r->io.fd.read_limit != 0 && - r->io.fd.read_so_far + buffered + toread > r->io.fd.read_limit) { + r->io.fd.read_so_far + buffered + toread > r->io.fd.read_limit) + { if (r->io.fd.read_limit >= r->io.fd.read_so_far - buffered) toread = r->io.fd.read_limit - r->io.fd.read_so_far - buffered; else { @@ -198,7 +203,9 @@ static size_t rioFdRead(rio *r, void *buf, size_t len) { return 0; } } - int retval = read(r->io.fd.fd, (char*)r->io.fd.buf + sdslen(r->io.fd.buf), toread); + int retval = read(r->io.fd.fd, + (char*)r->io.fd.buf + sdslen(r->io.fd.buf), + toread); if (retval <= 0) { if (errno == EWOULDBLOCK) errno = ETIMEDOUT; return 0; @@ -237,8 +244,8 @@ static const rio rioFdIO = { { { NULL, 0 } } /* union for io-specific vars */ }; -/* create an rio that implements a buffered read from an fd - * read_limit argument stops buffering when the reaching the limit */ +/* Create an RIO that implements a buffered read from an fd + * read_limit argument stops buffering when the reaching the limit. */ void rioInitWithFd(rio *r, int fd, size_t read_limit) { *r = rioFdIO; r->io.fd.fd = fd; @@ -249,24 +256,24 @@ void rioInitWithFd(rio *r, int fd, size_t read_limit) { sdsclear(r->io.fd.buf); } -/* release the rio stream. - * optionally returns the unread buffered data. */ -void rioFreeFd(rio *r, sds *out_remainingBufferedData) { - if (out_remainingBufferedData && - (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) - { - if (r->io.fd.pos > 0) - sdsrange(r->io.fd.buf, r->io.fd.pos, -1); - *out_remainingBufferedData = r->io.fd.buf; +/* Release the RIO tream. Optionally returns the unread buffered data + * when the SDS pointer 'remaining' is passed. */ +void rioFreeFd(rio *r, sds *remaining) { + if (remaining && (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) { + if (r->io.fd.pos > 0) sdsrange(r->io.fd.buf, r->io.fd.pos, -1); + *remaining = r->io.fd.buf; } else { sdsfree(r->io.fd.buf); - if (out_remainingBufferedData) - *out_remainingBufferedData = NULL; + if (out_remainingBufferedData) *remaining = NULL; } r->io.fd.buf = NULL; } -/* ------------------- File descriptors set implementation ------------------ */ +/* ------------------- File descriptors set implementation ------------------ + * This target is used to write the RDB file to N different replicas via + * sockets, when the master just streams the data to the replicas without + * creating an RDB on-disk image (diskless replication option). + * It only implements writes. */ /* Returns 1 or 0 for success/failure. * The function returns success as long as we are able to correctly write From 3300e98ff003709554adf54c4da86c39dd2160d9 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Jul 2019 09:34:21 +0200 Subject: [PATCH 121/304] Diskless replica: fix mispelled var name. --- src/rio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rio.c b/src/rio.c index 1a53992a..5359bc3d 100644 --- a/src/rio.c +++ b/src/rio.c @@ -264,7 +264,7 @@ void rioFreeFd(rio *r, sds *remaining) { *remaining = r->io.fd.buf; } else { sdsfree(r->io.fd.buf); - if (out_remainingBufferedData) *remaining = NULL; + if (remaining) *remaining = NULL; } r->io.fd.buf = NULL; } From 3bbb9a14133725e03012ad73b0b0c9f57f11a333 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Jul 2019 11:42:26 +0200 Subject: [PATCH 122/304] Diskless replica: refactoring of DBs backups. --- src/replication.c | 69 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/src/replication.c b/src/replication.c index a7c1c0d6..a89552a8 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1119,6 +1119,49 @@ static int useDisklessLoad() { (server.repl_diskless_load == REPL_DISKLESS_LOAD_WHEN_DB_EMPTY && dbTotalServerKeyCount()==0); } +/* Helper function for readSyncBulkPayload() to make backups of the current + * DBs before socket-loading the new ones. The backups may be restored later + * or freed by disklessLoadRestoreBackups(). */ +redisDb *disklessLoadMakeBackups(void) { + redisDb *backups = zmalloc(sizeof(redisDb)*server.dbnum); + for (int i=0; i Date: Wed, 10 Jul 2019 12:36:14 +0200 Subject: [PATCH 123/304] Diskless replica: fix disklessLoadRestoreBackups() bug. --- src/replication.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/replication.c b/src/replication.c index a89552a8..26e7cf8f 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1138,16 +1138,16 @@ redisDb *disklessLoadMakeBackups(void) { * * If the socket loading went wrong, we want to restore the old backups * into the server databases. This function does just that in the case - * the 'count' argument (the number of DBs to replace) is non-zero. + * the 'restore' argument (the number of DBs to replace) is non-zero. * * When instead the loading succeeded we want just to free our old backups, - * in that case the funciton will do just that when 'count' is 0. */ -void disklessLoadRestoreBackups(redisDb *backup, int count, int empty_db_flags) + * in that case the funciton will do just that when 'restore' is 0. */ +void disklessLoadRestoreBackups(redisDb *backup, int restore, int empty_db_flags) { - if (count) { + if (restore) { /* Restore. */ emptyDbGeneric(server.db,-1,empty_db_flags,replicationEmptyDbCallback); - for (int i=0; i Date: Wed, 10 Jul 2019 18:08:31 +0200 Subject: [PATCH 124/304] Client side caching: add tracking clients in INFO. --- src/server.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/server.c b/src/server.c index 8ed5b591..4337b8f0 100644 --- a/src/server.c +++ b/src/server.c @@ -3895,10 +3895,12 @@ sds genRedisInfoString(char *section) { "connected_clients:%lu\r\n" "client_recent_max_input_buffer:%zu\r\n" "client_recent_max_output_buffer:%zu\r\n" - "blocked_clients:%d\r\n", + "blocked_clients:%d\r\n" + "tracking_clients:%d\r\n", listLength(server.clients)-listLength(server.slaves), maxin, maxout, - server.blocked_clients); + server.blocked_clients, + server.tracking_clients); } /* Memory */ From c7aaf8db4d532c40c832557f71e76466eb0c819f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Jul 2019 18:17:07 +0200 Subject: [PATCH 125/304] Client side caching: implement CLIENT GETREDIR. This subcommand may simplify the writing of Redis client libraries using the tracking feature and/or improve observability and debugging capabilities. --- src/networking.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/networking.c b/src/networking.c index 716b3585..1a8e3530 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1954,20 +1954,21 @@ void clientCommand(client *c) { if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { const char *help[] = { -"id -- Return the ID of the current connection.", -"getname -- Return the name of the current connection.", -"kill -- Kill connection made from .", -"kill