From 9ea54feef0f2e62f7618da9b0ce3acce9f474db6 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 2 Jan 2012 15:24:32 -0800 Subject: [PATCH 01/70] string2* functions take a const pointer --- src/util.c | 6 +++--- src/util.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/util.c b/src/util.c index f5a23af2..f7437e1c 100644 --- a/src/util.c +++ b/src/util.c @@ -209,8 +209,8 @@ int ll2string(char *s, size_t len, long long value) { /* Convert a string into a long long. Returns 1 if the string could be parsed * into a (non-overflowing) long long, 0 otherwise. The value will be set to * the parsed value when appropriate. */ -int string2ll(char *s, size_t slen, long long *value) { - char *p = s; +int string2ll(const char *s, size_t slen, long long *value) { + const char *p = s; size_t plen = 0; int negative = 0; unsigned long long v; @@ -275,7 +275,7 @@ int string2ll(char *s, size_t slen, long long *value) { /* Convert a string into a long. Returns 1 if the string could be parsed into a * (non-overflowing) long, 0 otherwise. The value will be set to the parsed * value when appropriate. */ -int string2l(char *s, size_t slen, long *lval) { +int string2l(const char *s, size_t slen, long *lval) { long long llval; if (!string2ll(s,slen,&llval)) diff --git a/src/util.h b/src/util.h index b897a89e..59dd10ac 100644 --- a/src/util.h +++ b/src/util.h @@ -5,8 +5,8 @@ int stringmatchlen(const char *p, int plen, const char *s, int slen, int nocase) int stringmatch(const char *p, const char *s, int nocase); long long memtoll(const char *p, int *err); int ll2string(char *s, size_t len, long long value); -int string2ll(char *s, size_t slen, long long *value); -int string2l(char *s, size_t slen, long *value); +int string2ll(const char *s, size_t slen, long long *value); +int string2l(const char *s, size_t slen, long *value); int d2string(char *buf, size_t len, double value); #endif From ebd85e9a455df689c9be02a93354f580df4cafd8 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 2 Jan 2012 22:14:10 -0800 Subject: [PATCH 02/70] Encode small hashes with a ziplist --- src/aof.c | 78 ++--- src/config.c | 30 +- src/object.c | 12 +- src/rdb.c | 139 ++++++--- src/rdb.h | 1 + src/redis.c | 4 +- src/redis.h | 24 +- src/t_hash.c | 619 +++++++++++++++++++++++++++------------ tests/unit/aofrw.tcl | 4 +- tests/unit/type/hash.tcl | 10 +- 10 files changed, 603 insertions(+), 318 deletions(-) diff --git a/src/aof.c b/src/aof.c index 25d6f454..742af905 100644 --- a/src/aof.c +++ b/src/aof.c @@ -607,53 +607,55 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) { return 1; } +static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) { + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll); + if (vstr) { + return rioWriteBulkString(r, (char*)vstr, vlen); + } else { + return rioWriteBulkLongLong(r, vll); + } + + } else if (hi->encoding == REDIS_ENCODING_HT) { + robj *value; + + hashTypeCurrentFromHashTable(hi, what, &value); + return rioWriteBulkObject(r, value); + } + + redisPanic("Unknown hash encoding"); + return 0; +} + /* Emit the commands needed to rebuild a hash object. * The function returns 0 on error, 1 on success. */ int rewriteHashObject(rio *r, robj *key, robj *o) { + hashTypeIterator *hi; long long count = 0, items = hashTypeLength(o); - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - unsigned char *p = zipmapRewind(o->ptr); - unsigned char *field, *val; - unsigned int flen, vlen; + hi = hashTypeInitIterator(o); + while (hashTypeNext(hi) != REDIS_ERR) { + if (count == 0) { + int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ? + REDIS_AOF_REWRITE_ITEMS_PER_CMD : items; - while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) { - if (count == 0) { - int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ? - REDIS_AOF_REWRITE_ITEMS_PER_CMD : items; - - if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0; - if (rioWriteBulkString(r,"HMSET",5) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - } - if (rioWriteBulkString(r,(char*)field,flen) == 0) return 0; - if (rioWriteBulkString(r,(char*)val,vlen) == 0) return 0; - if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; + if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0; + if (rioWriteBulkString(r,"HMSET",5) == 0) return 0; + if (rioWriteBulkObject(r,key) == 0) return 0; } - } else { - dictIterator *di = dictGetIterator(o->ptr); - dictEntry *de; - while((de = dictNext(di)) != NULL) { - robj *field = dictGetKey(de); - robj *val = dictGetVal(de); - - if (count == 0) { - int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ? - REDIS_AOF_REWRITE_ITEMS_PER_CMD : items; - - if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0; - if (rioWriteBulkString(r,"HMSET",5) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - } - if (rioWriteBulkObject(r,field) == 0) return 0; - if (rioWriteBulkObject(r,val) == 0) return 0; - if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; - } - dictReleaseIterator(di); + if (rioWriteHashIteratorCursor(r, hi, REDIS_HASH_KEY) == 0) return 0; + if (rioWriteHashIteratorCursor(r, hi, REDIS_HASH_VALUE) == 0) return 0; + if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0; + items--; } + + hashTypeReleaseIterator(hi); + return 1; } diff --git a/src/config.c b/src/config.c index 4a25489a..26bb2ff5 100644 --- a/src/config.c +++ b/src/config.c @@ -259,9 +259,15 @@ void loadServerConfigFromString(char *config) { zfree(server.rdb_filename); server.rdb_filename = zstrdup(argv[1]); } else if (!strcasecmp(argv[0],"hash-max-zipmap-entries") && argc == 2) { - server.hash_max_zipmap_entries = memtoll(argv[1], NULL); + redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); + server.hash_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"hash-max-zipmap-value") && argc == 2) { - server.hash_max_zipmap_value = memtoll(argv[1], NULL); + redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); + server.hash_max_ziplist_value = memtoll(argv[1], NULL); + } else if (!strcasecmp(argv[0],"hash-max-ziplist-entries") && argc == 2) { + server.hash_max_ziplist_entries = memtoll(argv[1], NULL); + } else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) { + server.hash_max_ziplist_value = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){ server.list_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) { @@ -491,12 +497,12 @@ void configSetCommand(redisClient *c) { addReplyErrorFormat(c,"Changing directory: %s", strerror(errno)); return; } - } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-zipmap-entries")) { + } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-entries")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; - server.hash_max_zipmap_entries = ll; - } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-zipmap-value")) { + server.hash_max_ziplist_entries = ll; + } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-value")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; - server.hash_max_zipmap_value = ll; + server.hash_max_ziplist_value = ll; } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-entries")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; server.list_max_ziplist_entries = ll; @@ -668,14 +674,14 @@ void configGetCommand(redisClient *c) { addReplyBulkCString(c,server.repl_serve_stale_data ? "yes" : "no"); matches++; } - if (stringmatch(pattern,"hash-max-zipmap-entries",0)) { - addReplyBulkCString(c,"hash-max-zipmap-entries"); - addReplyBulkLongLong(c,server.hash_max_zipmap_entries); + if (stringmatch(pattern,"hash-max-ziplist-entries",0)) { + addReplyBulkCString(c,"hash-max-ziplist-entries"); + addReplyBulkLongLong(c,server.hash_max_ziplist_entries); matches++; } - if (stringmatch(pattern,"hash-max-zipmap-value",0)) { - addReplyBulkCString(c,"hash-max-zipmap-value"); - addReplyBulkLongLong(c,server.hash_max_zipmap_value); + if (stringmatch(pattern,"hash-max-ziplist-value",0)) { + addReplyBulkCString(c,"hash-max-ziplist-value"); + addReplyBulkLongLong(c,server.hash_max_ziplist_value); matches++; } if (stringmatch(pattern,"list-max-ziplist-entries",0)) { diff --git a/src/object.c b/src/object.c index 0711afed..ccb07208 100644 --- a/src/object.c +++ b/src/object.c @@ -95,12 +95,9 @@ robj *createIntsetObject(void) { } robj *createHashObject(void) { - /* All the Hashes start as zipmaps. Will be automatically converted - * into hash tables if there are enough elements or big elements - * inside. */ - unsigned char *zm = zipmapNew(); - robj *o = createObject(REDIS_HASH,zm); - o->encoding = REDIS_ENCODING_ZIPMAP; + unsigned char *zl = ziplistNew(); + robj *o = createObject(REDIS_HASH, zl); + o->encoding = REDIS_ENCODING_ZIPLIST; return o; } @@ -176,7 +173,7 @@ void freeHashObject(robj *o) { case REDIS_ENCODING_HT: dictRelease((dict*) o->ptr); break; - case REDIS_ENCODING_ZIPMAP: + case REDIS_ENCODING_ZIPLIST: zfree(o->ptr); break; default: @@ -492,7 +489,6 @@ char *strEncoding(int encoding) { case REDIS_ENCODING_RAW: return "raw"; case REDIS_ENCODING_INT: return "int"; case REDIS_ENCODING_HT: return "hashtable"; - case REDIS_ENCODING_ZIPMAP: return "zipmap"; case REDIS_ENCODING_LINKEDLIST: return "linkedlist"; case REDIS_ENCODING_ZIPLIST: return "ziplist"; case REDIS_ENCODING_INTSET: return "intset"; diff --git a/src/rdb.c b/src/rdb.c index 77e2a048..c74ec41c 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1,5 +1,6 @@ #include "redis.h" #include "lzf.h" /* LZF compression library */ +#include "zipmap.h" #include #include @@ -424,8 +425,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) { else redisPanic("Unknown sorted set encoding"); case REDIS_HASH: - if (o->encoding == REDIS_ENCODING_ZIPMAP) - return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH_ZIPMAP); + if (o->encoding == REDIS_ENCODING_ZIPLIST) + return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH_ZIPLIST); else if (o->encoding == REDIS_ENCODING_HT) return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH); else @@ -530,12 +531,13 @@ int rdbSaveObject(rio *rdb, robj *o) { } } else if (o->type == REDIS_HASH) { /* Save a hash value */ - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - size_t l = zipmapBlobLen((unsigned char*)o->ptr); + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + size_t l = ziplistBlobLen((unsigned char*)o->ptr); if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1; nwritten += n; - } else { + + } else if (o->encoding == REDIS_ENCODING_HT) { dictIterator *di = dictGetIterator(o->ptr); dictEntry *de; @@ -552,7 +554,11 @@ int rdbSaveObject(rio *rdb, robj *o) { nwritten += n; } dictReleaseIterator(di); + + } else { + redisPanic("Unknown hash encoding"); } + } else { redisPanic("Unknown object type"); } @@ -824,55 +830,69 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { maxelelen <= server.zset_max_ziplist_value) zsetConvert(o,REDIS_ENCODING_ZIPLIST); } else if (rdbtype == REDIS_RDB_TYPE_HASH) { - size_t hashlen; + size_t len; + int ret; + + len = rdbLoadLen(rdb, NULL); + if (len == REDIS_RDB_LENERR) return NULL; - if ((hashlen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL; o = createHashObject(); + /* Too many entries? Use an hash table. */ - if (hashlen > server.hash_max_zipmap_entries) - convertToRealHash(o); - /* Load every key/value, then set it into the zipmap or hash - * table, as needed. */ - while(hashlen--) { - robj *key, *val; + if (len > server.hash_max_ziplist_entries) + hashTypeConvert(o, REDIS_ENCODING_HT); - if ((key = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL; - if ((val = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL; - /* If we are using a zipmap and there are too big values - * the object is converted to real hash table encoding. */ - if (o->encoding != REDIS_ENCODING_HT && - ((key->encoding == REDIS_ENCODING_RAW && - sdslen(key->ptr) > server.hash_max_zipmap_value) || - (val->encoding == REDIS_ENCODING_RAW && - sdslen(val->ptr) > server.hash_max_zipmap_value))) + /* Load every field and value into the ziplist */ + while (o->encoding == REDIS_ENCODING_ZIPLIST && len-- > 0) { + robj *field, *value; + + /* Load raw strings */ + field = rdbLoadStringObject(rdb); + if (field == NULL) return NULL; + redisAssert(field->encoding == REDIS_ENCODING_RAW); + value = rdbLoadStringObject(rdb); + if (value == NULL) return NULL; + redisAssert(field->encoding == REDIS_ENCODING_RAW); + + /* Convert to hash table if size threshold is exceeded */ + if (sdslen(field->ptr) > server.hash_max_ziplist_value || + sdslen(value->ptr) > server.hash_max_ziplist_value) { - convertToRealHash(o); + hashTypeConvert(o, REDIS_ENCODING_HT); + break; } - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - unsigned char *zm = o->ptr; - robj *deckey, *decval; - - /* We need raw string objects to add them to the zipmap */ - deckey = getDecodedObject(key); - decval = getDecodedObject(val); - zm = zipmapSet(zm,deckey->ptr,sdslen(deckey->ptr), - decval->ptr,sdslen(decval->ptr),NULL); - o->ptr = zm; - decrRefCount(deckey); - decrRefCount(decval); - decrRefCount(key); - decrRefCount(val); - } else { - key = tryObjectEncoding(key); - val = tryObjectEncoding(val); - dictAdd((dict*)o->ptr,key,val); - } + /* Add pair to ziplist */ + o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); + o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); } + + /* Load remaining fields and values into the hash table */ + while (o->encoding == REDIS_ENCODING_HT && len-- > 0) { + robj *field, *value; + + /* Load encoded strings */ + field = rdbLoadEncodedStringObject(rdb); + if (field == NULL) return NULL; + value = rdbLoadEncodedStringObject(rdb); + if (value == NULL) return NULL; + + field = tryObjectEncoding(field); + value = tryObjectEncoding(value); + + /* Add pair to hash table */ + ret = dictAdd((dict*)o->ptr, field, value); + redisAssert(ret == REDIS_OK); + } + + /* All pairs should be read by now */ + redisAssert(len == 0); + } else if (rdbtype == REDIS_RDB_TYPE_HASH_ZIPMAP || rdbtype == REDIS_RDB_TYPE_LIST_ZIPLIST || rdbtype == REDIS_RDB_TYPE_SET_INTSET || - rdbtype == REDIS_RDB_TYPE_ZSET_ZIPLIST) + rdbtype == REDIS_RDB_TYPE_ZSET_ZIPLIST || + rdbtype == REDIS_RDB_TYPE_HASH_ZIPLIST) { robj *aux = rdbLoadStringObject(rdb); @@ -890,10 +910,29 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { * converted. */ switch(rdbtype) { case REDIS_RDB_TYPE_HASH_ZIPMAP: - o->type = REDIS_HASH; - o->encoding = REDIS_ENCODING_ZIPMAP; - if (zipmapLen(o->ptr) > server.hash_max_zipmap_entries) - convertToRealHash(o); + /* Convert to ziplist encoded hash. This must be deprecated + * when loading dumps created by Redis 2.4 gets deprecated. */ + { + unsigned char *zl = ziplistNew(); + unsigned char *zi = zipmapRewind(o->ptr); + + while (zi != NULL) { + unsigned char *fstr, *vstr; + unsigned int flen, vlen; + + zi = zipmapNext(zi, &fstr, &flen, &vstr, &vlen); + zl = ziplistPush(zl, fstr, flen, ZIPLIST_TAIL); + zl = ziplistPush(zl, vstr, vlen, ZIPLIST_TAIL); + } + + zfree(o->ptr); + o->ptr = zl; + o->type = REDIS_HASH; + o->encoding = REDIS_ENCODING_ZIPLIST; + + if (hashTypeLength(o) > server.hash_max_ziplist_entries) + hashTypeConvert(o, REDIS_ENCODING_HT); + } break; case REDIS_RDB_TYPE_LIST_ZIPLIST: o->type = REDIS_LIST; @@ -913,6 +952,12 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { if (zsetLength(o) > server.zset_max_ziplist_entries) zsetConvert(o,REDIS_ENCODING_SKIPLIST); break; + case REDIS_RDB_TYPE_HASH_ZIPLIST: + o->type = REDIS_HASH; + o->encoding = REDIS_ENCODING_ZIPLIST; + if (hashTypeLength(o) > server.hash_max_ziplist_entries) + hashTypeConvert(o, REDIS_ENCODING_HT); + break; default: redisPanic("Unknown encoding"); break; diff --git a/src/rdb.h b/src/rdb.h index 827947b4..45beaa93 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -47,6 +47,7 @@ #define REDIS_RDB_TYPE_LIST_ZIPLIST 10 #define REDIS_RDB_TYPE_SET_INTSET 11 #define REDIS_RDB_TYPE_ZSET_ZIPLIST 12 +#define REDIS_RDB_TYPE_HASH_ZIPLIST 13 /* Test if a type is an object type. */ #define rdbIsObjectType(t) ((t >= 0 && t <= 4) || (t >= 9 && t <= 12)) diff --git a/src/redis.c b/src/redis.c index 09146115..7de52c87 100644 --- a/src/redis.c +++ b/src/redis.c @@ -895,8 +895,8 @@ void initServerConfig() { server.maxmemory = 0; server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; server.maxmemory_samples = 3; - server.hash_max_zipmap_entries = REDIS_HASH_MAX_ZIPMAP_ENTRIES; - server.hash_max_zipmap_value = REDIS_HASH_MAX_ZIPMAP_VALUE; + server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES; + server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE; server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES; server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE; server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES; diff --git a/src/redis.h b/src/redis.h index aa79b4ad..7a27e56b 100644 --- a/src/redis.h +++ b/src/redis.h @@ -27,7 +27,6 @@ #include "adlist.h" /* Linked lists */ #include "zmalloc.h" /* total memory usage aware version of malloc/free */ #include "anet.h" /* Networking the easy way */ -#include "zipmap.h" /* Compact string -> string data structure */ #include "ziplist.h" /* Compact list data structure */ #include "intset.h" /* Compact integer set structure */ #include "version.h" /* Version macro */ @@ -194,8 +193,8 @@ #define AOF_FSYNC_EVERYSEC 2 /* Zip structure related defaults */ -#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 512 -#define REDIS_HASH_MAX_ZIPMAP_VALUE 64 +#define REDIS_HASH_MAX_ZIPLIST_ENTRIES 512 +#define REDIS_HASH_MAX_ZIPLIST_VALUE 64 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 512 #define REDIS_LIST_MAX_ZIPLIST_VALUE 64 #define REDIS_SET_MAX_INTSET_ENTRIES 512 @@ -610,8 +609,8 @@ struct redisServer { int sort_alpha; int sort_bypattern; /* Zip structure config, see redis.conf for more information */ - size_t hash_max_zipmap_entries; - size_t hash_max_zipmap_value; + size_t hash_max_ziplist_entries; + size_t hash_max_ziplist_value; size_t list_max_ziplist_entries; size_t list_max_ziplist_value; size_t set_max_intset_entries; @@ -715,10 +714,10 @@ typedef struct { * not both are required, store pointers in the iterator to avoid * unnecessary memory allocation for fields/values. */ typedef struct { + robj *subject; int encoding; - unsigned char *zi; - unsigned char *zk, *zv; - unsigned int zklen, zvlen; + + unsigned char *fptr, *vptr; dictIterator *di; dictEntry *de; @@ -934,10 +933,9 @@ unsigned long setTypeSize(robj *subject); void setTypeConvert(robj *subject, int enc); /* Hash data type */ -void convertToRealHash(robj *o); +void hashTypeConvert(robj *o, int enc); void hashTypeTryConversion(robj *subject, robj **argv, int start, int end); void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2); -int hashTypeGet(robj *o, robj *key, robj **objval, unsigned char **v, unsigned int *vlen); robj *hashTypeGetObject(robj *o, robj *key); int hashTypeExists(robj *o, robj *key); int hashTypeSet(robj *o, robj *key, robj *value); @@ -946,7 +944,11 @@ unsigned long hashTypeLength(robj *o); hashTypeIterator *hashTypeInitIterator(robj *subject); void hashTypeReleaseIterator(hashTypeIterator *hi); int hashTypeNext(hashTypeIterator *hi); -int hashTypeCurrent(hashTypeIterator *hi, int what, robj **objval, unsigned char **v, unsigned int *vlen); +void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what, + unsigned char **vstr, + unsigned int *vlen, + long long *vll); +void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, robj **dst); robj *hashTypeCurrentObject(hashTypeIterator *hi, int what); robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key); diff --git a/src/t_hash.c b/src/t_hash.c index 8ee5485c..9699223a 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1,5 +1,4 @@ #include "redis.h" - #include /*----------------------------------------------------------------------------- @@ -7,18 +6,19 @@ *----------------------------------------------------------------------------*/ /* Check the length of a number of objects to see if we need to convert a - * zipmap to a real hash. Note that we only check string encoded objects + * ziplist to a real hash. Note that we only check string encoded objects * as their string length can be queried in constant time. */ -void hashTypeTryConversion(robj *subject, robj **argv, int start, int end) { +void hashTypeTryConversion(robj *o, robj **argv, int start, int end) { int i; - if (subject->encoding != REDIS_ENCODING_ZIPMAP) return; + + if (o->encoding != REDIS_ENCODING_ZIPLIST) return; for (i = start; i <= end; i++) { if (argv[i]->encoding == REDIS_ENCODING_RAW && - sdslen(argv[i]->ptr) > server.hash_max_zipmap_value) + sdslen(argv[i]->ptr) > server.hash_max_ziplist_value) { - convertToRealHash(subject); - return; + hashTypeConvert(o, REDIS_ENCODING_HT); + break; } } } @@ -31,137 +31,262 @@ void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2) { } } -/* Get the value from a hash identified by key. - * - * If the string is found either REDIS_ENCODING_HT or REDIS_ENCODING_ZIPMAP - * is returned, and either **objval or **v and *vlen are set accordingly, - * so that objects in hash tables are returend as objects and pointers - * inside a zipmap are returned as such. - * - * If the object was not found -1 is returned. - * - * This function is copy on write friendly as there is no incr/decr - * of refcount needed if objects are accessed just for reading operations. */ -int hashTypeGet(robj *o, robj *key, robj **objval, unsigned char **v, - unsigned int *vlen) +/* Get the value from a ziplist encoded hash, identified by field. + * Returns -1 when the field cannot be found. */ +int hashTypeGetFromZiplist(robj *o, robj *field, + unsigned char **vstr, + unsigned int *vlen, + long long *vll) { - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - int found; + unsigned char *zl, *fptr = NULL, *vptr = NULL; + int ret; - key = getDecodedObject(key); - found = zipmapGet(o->ptr,key->ptr,sdslen(key->ptr),v,vlen); - decrRefCount(key); - if (!found) return -1; - } else { - dictEntry *de = dictFind(o->ptr,key); - if (de == NULL) return -1; - *objval = dictGetVal(de); + redisAssert(o->encoding == REDIS_ENCODING_ZIPLIST); + + field = getDecodedObject(field); + + zl = o->ptr; + fptr = ziplistIndex(zl, ZIPLIST_HEAD); + while (fptr != NULL) { + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); + + /* Compare field in ziplist with specified field */ + if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { + break; + } + + /* Skip over value */ + fptr = ziplistNext(zl, vptr); } - return o->encoding; + + decrRefCount(field); + + if (fptr != NULL) { + ret = ziplistGet(vptr, vstr, vlen, vll); + redisAssert(ret); + return 0; + } + + return -1; } -/* Higher level function of hashTypeGet() that always returns a Redis +/* Get the value from a hash table encoded hash, identified by field. + * Returns -1 when the field cannot be found. */ +int hashTypeGetFromHashTable(robj *o, robj *field, robj **value) { + dictEntry *de; + + redisAssert(o->encoding == REDIS_ENCODING_HT); + + de = dictFind(o->ptr, field); + if (de == NULL) { + return -1; + } + + *value = dictGetVal(de); + return 0; +} + +/* Higher level function of hashTypeGet*() that always returns a Redis * object (either new or with refcount incremented), so that the caller * can retain a reference or call decrRefCount after the usage. * * The lower level function can prevent copy on write so it is * the preferred way of doing read operations. */ -robj *hashTypeGetObject(robj *o, robj *key) { - robj *objval; - unsigned char *v; - unsigned int vlen; +robj *hashTypeGetObject(robj *o, robj *field) { + robj *value = NULL; - int encoding = hashTypeGet(o,key,&objval,&v,&vlen); - switch(encoding) { - case REDIS_ENCODING_HT: - incrRefCount(objval); - return objval; - case REDIS_ENCODING_ZIPMAP: - objval = createStringObject((char*)v,vlen); - return objval; - default: return NULL; + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) { + if (vstr) { + value = createStringObject((char*)vstr, vlen); + } else { + value = createStringObjectFromLongLong(vll); + } + } + + } else if (o->encoding == REDIS_ENCODING_HT) { + robj *aux; + + if (hashTypeGetFromHashTable(o, field, &aux) == 0) { + incrRefCount(aux); + value = aux; + } + + } else { + redisPanic("Unknown hash encoding"); } + + return value; } -/* Test if the key exists in the given hash. Returns 1 if the key - * exists and 0 when it doesn't. */ -int hashTypeExists(robj *o, robj *key) { - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - key = getDecodedObject(key); - if (zipmapExists(o->ptr,key->ptr,sdslen(key->ptr))) { - decrRefCount(key); +/* Test if the specified field exists in the given hash. Returns 1 if the field + * exists, and 0 when it doesn't. */ +int hashTypeExists(robj *o, robj *field) { + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) { return 1; } - decrRefCount(key); + + } else if (o->encoding == REDIS_ENCODING_HT) { + robj *aux; + + if (hashTypeGetFromHashTable(o, field, &aux) == 0) { + return 1; + } + } else { - if (dictFind(o->ptr,key) != NULL) { - return 1; - } + redisPanic("Unknown hash encoding"); } + return 0; } /* Add an element, discard the old if the key already exists. * Return 0 on insert and 1 on update. */ -int hashTypeSet(robj *o, robj *key, robj *value) { +int hashTypeSet(robj *o, robj *field, robj *value) { int update = 0; - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - key = getDecodedObject(key); + + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *zl, *fptr, *vptr; + + field = getDecodedObject(field); value = getDecodedObject(value); - o->ptr = zipmapSet(o->ptr, - key->ptr,sdslen(key->ptr), - value->ptr,sdslen(value->ptr), &update); - decrRefCount(key); + + zl = o->ptr; + fptr = ziplistIndex(zl, ZIPLIST_HEAD); + while (fptr != NULL) { + /* Compare field in ziplist with specified field */ + if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { + zl = ziplistDelete(zl,&fptr); + zl = ziplistDelete(zl,&fptr); + o->ptr = zl; + update = 1; + break; + } + + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); + + /* Grab pointer (if any) to the next field */ + fptr = ziplistNext(zl, vptr); + } + + /* Push new field/value pair onto the tail of the ziplist */ + zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); + zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); + o->ptr = zl; + + decrRefCount(field); decrRefCount(value); - /* Check if the zipmap needs to be upgraded to a real hash table */ - if (zipmapLen(o->ptr) > server.hash_max_zipmap_entries) - convertToRealHash(o); - } else { - if (dictReplace(o->ptr,key,value)) { - /* Insert */ - incrRefCount(key); - } else { - /* Update */ + /* Check if the ziplist needs to be converted to a hash table */ + if (hashTypeLength(o) > server.hash_max_ziplist_entries) { + hashTypeConvert(o, REDIS_ENCODING_HT); + } + + } else if (o->encoding == REDIS_ENCODING_HT) { + if (dictReplace(o->ptr, field, value)) { /* Insert */ + incrRefCount(field); + } else { /* Update */ update = 1; } + incrRefCount(value); + + } else { + redisPanic("Unknown hash encoding"); } + return update; } /* Delete an element from a hash. * Return 1 on deleted and 0 on not found. */ -int hashTypeDelete(robj *o, robj *key) { +int hashTypeDelete(robj *o, robj *field) { int deleted = 0; - if (o->encoding == REDIS_ENCODING_ZIPMAP) { - key = getDecodedObject(key); - o->ptr = zipmapDel(o->ptr,key->ptr,sdslen(key->ptr), &deleted); - decrRefCount(key); + + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *zl, *fptr, *vptr; + + field = getDecodedObject(field); + + zl = o->ptr; + fptr = ziplistIndex(zl, ZIPLIST_HEAD); + while (fptr != NULL) { + /* Compare field in ziplist with specified field */ + if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { + zl = ziplistDelete(zl,&fptr); + zl = ziplistDelete(zl,&fptr); + o->ptr = zl; + deleted = 1; + break; + } + + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); + + /* Grab pointer (if any) to the next field */ + fptr = ziplistNext(zl, vptr); + } + + decrRefCount(field); + + } else if (o->encoding == REDIS_ENCODING_HT) { + if (dictDelete((dict*)o->ptr, field) == REDIS_OK) { + deleted = 1; + + /* Always check if the dictionary needs a resize after a delete. */ + if (htNeedsResize(o->ptr)) dictResize(o->ptr); + } + } else { - deleted = dictDelete((dict*)o->ptr,key) == DICT_OK; - /* Always check if the dictionary needs a resize after a delete. */ - if (deleted && htNeedsResize(o->ptr)) dictResize(o->ptr); + redisPanic("Unknown hash encoding"); } + return deleted; } /* Return the number of elements in a hash. */ unsigned long hashTypeLength(robj *o) { - return (o->encoding == REDIS_ENCODING_ZIPMAP) ? - zipmapLen((unsigned char*)o->ptr) : dictSize((dict*)o->ptr); + unsigned long length = ULONG_MAX; + + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + length = ziplistLen(o->ptr) / 2; + } else if (o->encoding == REDIS_ENCODING_HT) { + length = dictSize((dict*)o->ptr); + } else { + redisPanic("Unknown hash encoding"); + } + + return length; } hashTypeIterator *hashTypeInitIterator(robj *subject) { hashTypeIterator *hi = zmalloc(sizeof(hashTypeIterator)); + hi->subject = subject; hi->encoding = subject->encoding; - if (hi->encoding == REDIS_ENCODING_ZIPMAP) { - hi->zi = zipmapRewind(subject->ptr); + + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + hi->fptr = NULL; + hi->vptr = NULL; } else if (hi->encoding == REDIS_ENCODING_HT) { hi->di = dictGetIterator(subject->ptr); } else { - redisAssertWithInfo(NULL,subject,0); + redisPanic("Unknown hash encoding"); } + return hi; } @@ -169,68 +294,116 @@ void hashTypeReleaseIterator(hashTypeIterator *hi) { if (hi->encoding == REDIS_ENCODING_HT) { dictReleaseIterator(hi->di); } + zfree(hi); } /* Move to the next entry in the hash. Return REDIS_OK when the next entry * could be found and REDIS_ERR when the iterator reaches the end. */ int hashTypeNext(hashTypeIterator *hi) { - if (hi->encoding == REDIS_ENCODING_ZIPMAP) { - if ((hi->zi = zipmapNext(hi->zi, &hi->zk, &hi->zklen, - &hi->zv, &hi->zvlen)) == NULL) return REDIS_ERR; + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *zl; + unsigned char *fptr, *vptr; + + zl = hi->subject->ptr; + fptr = hi->fptr; + vptr = hi->vptr; + + if (fptr == NULL) { + /* Initialize cursor */ + redisAssert(vptr == NULL); + fptr = ziplistIndex(zl, 0); + } else { + /* Advance cursor */ + redisAssert(vptr != NULL); + fptr = ziplistNext(zl, vptr); + } + + if (fptr == NULL) { + return REDIS_ERR; + } + + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); + + /* fptr, vptr now point to the first or next pair */ + hi->fptr = fptr; + hi->vptr = vptr; + + } else if (hi->encoding == REDIS_ENCODING_HT) { + if ((hi->de = dictNext(hi->di)) == NULL) { + return REDIS_ERR; + } + } else { - if ((hi->de = dictNext(hi->di)) == NULL) return REDIS_ERR; + redisPanic("Unknown hash encoding"); } + return REDIS_OK; } -/* Get key or value object at current iteration position. - * The returned item differs with the hash object encoding: - * - When encoding is REDIS_ENCODING_HT, the objval pointer is populated - * with the original object. - * - When encoding is REDIS_ENCODING_ZIPMAP, a pointer to the string and - * its length is retunred populating the v and vlen pointers. - * This function is copy on write friendly as accessing objects in read only - * does not require writing to any memory page. - * - * The function returns the encoding of the object, so that the caller - * can underestand if the key or value was returned as object or C string. */ -int hashTypeCurrent(hashTypeIterator *hi, int what, robj **objval, unsigned char **v, unsigned int *vlen) { - if (hi->encoding == REDIS_ENCODING_ZIPMAP) { - if (what & REDIS_HASH_KEY) { - *v = hi->zk; - *vlen = hi->zklen; - } else { - *v = hi->zv; - *vlen = hi->zvlen; - } +/* Get the field or value at iterator cursor, for an iterator on a hash value + * encoded as a ziplist. Prototype is similar to `hashTypeGetFromZiplist`. */ +void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what, + unsigned char **vstr, + unsigned int *vlen, + long long *vll) +{ + int ret; + + redisAssert(hi->encoding == REDIS_ENCODING_ZIPLIST); + + if (what & REDIS_HASH_KEY) { + ret = ziplistGet(hi->fptr, vstr, vlen, vll); + redisAssert(ret); } else { - if (what & REDIS_HASH_KEY) - *objval = dictGetKey(hi->de); - else - *objval = dictGetVal(hi->de); + ret = ziplistGet(hi->vptr, vstr, vlen, vll); + redisAssert(ret); } - return hi->encoding; } -/* A non copy-on-write friendly but higher level version of hashTypeCurrent() - * that always returns an object with refcount incremented by one (or a new - * object), so it's up to the caller to decrRefCount() the object if no - * reference is retained. */ -robj *hashTypeCurrentObject(hashTypeIterator *hi, int what) { - robj *obj; - unsigned char *v = NULL; - unsigned int vlen = 0; - int encoding = hashTypeCurrent(hi,what,&obj,&v,&vlen); +/* Get the field or value at iterator cursor, for an iterator on a hash value + * encoded as a ziplist. Prototype is similar to `hashTypeGetFromHashTable`. */ +void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, robj **dst) { + redisAssert(hi->encoding == REDIS_ENCODING_HT); - if (encoding == REDIS_ENCODING_HT) { - incrRefCount(obj); - return obj; + if (what & REDIS_HASH_KEY) { + *dst = dictGetKey(hi->de); } else { - return createStringObject((char*)v,vlen); + *dst = dictGetVal(hi->de); } } +/* A non copy-on-write friendly but higher level version of hashTypeCurrent*() + * that returns an object with incremented refcount (or a new object). It is up + * to the caller to decrRefCount() the object if no reference is retained. */ +robj *hashTypeCurrentObject(hashTypeIterator *hi, int what) { + robj *dst; + + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll); + if (vstr) { + dst = createStringObject((char*)vstr, vlen); + } else { + dst = createStringObjectFromLongLong(vll); + } + + } else if (hi->encoding == REDIS_ENCODING_HT) { + hashTypeCurrentFromHashTable(hi, what, &dst); + incrRefCount(dst); + + } else { + redisPanic("Unknown hash encoding"); + } + + return dst; +} + robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) { robj *o = lookupKeyWrite(c->db,key); if (o == NULL) { @@ -245,25 +418,50 @@ robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) { return o; } -void convertToRealHash(robj *o) { - unsigned char *key, *val, *p, *zm = o->ptr; - unsigned int klen, vlen; - dict *dict = dictCreate(&hashDictType,NULL); +void hashTypeConvertZiplist(robj *o, int enc) { + redisAssert(o->encoding == REDIS_ENCODING_ZIPLIST); - redisAssertWithInfo(NULL,o,o->type == REDIS_HASH && o->encoding != REDIS_ENCODING_HT); - p = zipmapRewind(zm); - while((p = zipmapNext(p,&key,&klen,&val,&vlen)) != NULL) { - robj *keyobj, *valobj; + if (enc == REDIS_ENCODING_ZIPLIST) { + /* Nothing to do... */ - keyobj = createStringObject((char*)key,klen); - valobj = createStringObject((char*)val,vlen); - keyobj = tryObjectEncoding(keyobj); - valobj = tryObjectEncoding(valobj); - dictAdd(dict,keyobj,valobj); + } else if (enc == REDIS_ENCODING_HT) { + hashTypeIterator *hi; + dict *dict; + int ret; + + hi = hashTypeInitIterator(o); + dict = dictCreate(&hashDictType, NULL); + + while (hashTypeNext(hi) != REDIS_ERR) { + robj *field, *value; + + field = hashTypeCurrentObject(hi, REDIS_HASH_KEY); + field = tryObjectEncoding(field); + value = hashTypeCurrentObject(hi, REDIS_HASH_VALUE); + value = tryObjectEncoding(value); + ret = dictAdd(dict, field, value); + redisAssert(ret == DICT_OK); + } + + hashTypeReleaseIterator(hi); + zfree(o->ptr); + + o->encoding = REDIS_ENCODING_HT; + o->ptr = dict; + + } else { + redisPanic("Unknown hash encoding"); + } +} + +void hashTypeConvert(robj *o, int enc) { + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + hashTypeConvertZiplist(o, enc); + } else if (o->encoding == REDIS_ENCODING_HT) { + redisPanic("Not implemented"); + } else { + redisPanic("Unknown hash encoding"); } - o->encoding = REDIS_ENCODING_HT; - o->ptr = dict; - zfree(zm); } /*----------------------------------------------------------------------------- @@ -373,51 +571,69 @@ void hincrbyfloatCommand(redisClient *c) { server.dirty++; } +static void addHashFieldToReply(redisClient *c, robj *o, robj *field) { + int ret; + + if (o == NULL) { + addReply(c, shared.nullbulk); + return; + } + + if (o->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + ret = hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll); + if (ret < 0) { + addReply(c, shared.nullbulk); + } else { + if (vstr) { + addReplyBulkCBuffer(c, vstr, vlen); + } else { + addReplyBulkLongLong(c, vll); + } + } + + } else if (o->encoding == REDIS_ENCODING_HT) { + robj *value; + + ret = hashTypeGetFromHashTable(o, field, &value); + if (ret < 0) { + addReply(c, shared.nullbulk); + } else { + addReplyBulk(c, value); + } + + } else { + redisPanic("Unknown hash encoding"); + } +} + void hgetCommand(redisClient *c) { - robj *o, *value; - unsigned char *v; - unsigned int vlen; - int encoding; + robj *o; if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL || checkType(c,o,REDIS_HASH)) return; - if ((encoding = hashTypeGet(o,c->argv[2],&value,&v,&vlen)) != -1) { - if (encoding == REDIS_ENCODING_HT) - addReplyBulk(c,value); - else - addReplyBulkCBuffer(c,v,vlen); - } else { - addReply(c,shared.nullbulk); - } + addHashFieldToReply(c, o, c->argv[2]); } void hmgetCommand(redisClient *c) { - int i, encoding; - robj *o, *value; - unsigned char *v; - unsigned int vlen; + robj *o; + int i; - o = lookupKeyRead(c->db,c->argv[1]); + /* Don't abort when the key cannot be found. Non-existing keys are empty + * hashes, where HMGET should respond with a series of null bulks. */ + o = lookupKeyRead(c->db, c->argv[1]); if (o != NULL && o->type != REDIS_HASH) { - addReply(c,shared.wrongtypeerr); + addReply(c, shared.wrongtypeerr); return; } - /* Note the check for o != NULL happens inside the loop. This is - * done because objects that cannot be found are considered to be - * an empty hash. The reply should then be a series of NULLs. */ - addReplyMultiBulkLen(c,c->argc-2); + addReplyMultiBulkLen(c, c->argc-2); for (i = 2; i < c->argc; i++) { - if (o != NULL && - (encoding = hashTypeGet(o,c->argv[i],&value,&v,&vlen)) != -1) { - if (encoding == REDIS_ENCODING_HT) - addReplyBulk(c,value); - else - addReplyBulkCBuffer(c,v,vlen); - } else { - addReply(c,shared.nullbulk); - } + addHashFieldToReply(c, o, c->argv[i]); } } @@ -452,42 +668,59 @@ void hlenCommand(redisClient *c) { addReplyLongLong(c,hashTypeLength(o)); } +static void addHashIteratorCursorToReply(redisClient *c, hashTypeIterator *hi, int what) { + if (hi->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *vstr = NULL; + unsigned int vlen = UINT_MAX; + long long vll = LLONG_MAX; + + hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll); + if (vstr) { + addReplyBulkCBuffer(c, vstr, vlen); + } else { + addReplyBulkLongLong(c, vll); + } + + } else if (hi->encoding == REDIS_ENCODING_HT) { + robj *value; + + hashTypeCurrentFromHashTable(hi, what, &value); + addReplyBulk(c, value); + + } else { + redisPanic("Unknown hash encoding"); + } +} + void genericHgetallCommand(redisClient *c, int flags) { robj *o; - unsigned long count = 0; hashTypeIterator *hi; - void *replylen = NULL; + int multiplier = 0; + int length, count = 0; if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL || checkType(c,o,REDIS_HASH)) return; - replylen = addDeferredMultiBulkLength(c); + if (flags & REDIS_HASH_KEY) multiplier++; + if (flags & REDIS_HASH_VALUE) multiplier++; + + length = hashTypeLength(o) * multiplier; + addReplyMultiBulkLen(c, length); + hi = hashTypeInitIterator(o); while (hashTypeNext(hi) != REDIS_ERR) { - robj *obj; - unsigned char *v = NULL; - unsigned int vlen = 0; - int encoding; - if (flags & REDIS_HASH_KEY) { - encoding = hashTypeCurrent(hi,REDIS_HASH_KEY,&obj,&v,&vlen); - if (encoding == REDIS_ENCODING_HT) - addReplyBulk(c,obj); - else - addReplyBulkCBuffer(c,v,vlen); + addHashIteratorCursorToReply(c, hi, REDIS_HASH_KEY); count++; } if (flags & REDIS_HASH_VALUE) { - encoding = hashTypeCurrent(hi,REDIS_HASH_VALUE,&obj,&v,&vlen); - if (encoding == REDIS_ENCODING_HT) - addReplyBulk(c,obj); - else - addReplyBulkCBuffer(c,v,vlen); + addHashIteratorCursorToReply(c, hi, REDIS_HASH_VALUE); count++; } } + hashTypeReleaseIterator(hi); - setDeferredMultiBulkLength(c,replylen,count); + redisAssert(count == length); } void hkeysCommand(redisClient *c) { diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl index a558ed4c..358266ef 100644 --- a/tests/unit/aofrw.tcl +++ b/tests/unit/aofrw.tcl @@ -54,10 +54,10 @@ start_server {tags {"aofrw"}} { } foreach d {string int} { - foreach e {zipmap hashtable} { + foreach e {ziplist hashtable} { test "AOF rewrite of hash with $e encoding, $d data" { r flushall - if {$e eq {zipmap}} {set len 10} else {set len 1000} + if {$e eq {ziplist}} {set len 10} else {set len 1000} for {set j 0} {$j < $len} {incr j} { if {$d eq {string}} { set data [randstring 0 16 alpha] diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl index 04a5f4c7..141971a8 100644 --- a/tests/unit/type/hash.tcl +++ b/tests/unit/type/hash.tcl @@ -14,8 +14,8 @@ start_server {tags {"hash"}} { list [r hlen smallhash] } {8} - test {Is the small hash encoded with a zipmap?} { - assert_encoding zipmap smallhash + test {Is the small hash encoded with a ziplist?} { + assert_encoding ziplist smallhash } test {HSET/HLEN - Big hash creation} { @@ -33,7 +33,7 @@ start_server {tags {"hash"}} { list [r hlen bighash] } {1024} - test {Is the big hash encoded with a zipmap?} { + test {Is the big hash encoded with a ziplist?} { assert_encoding hashtable bighash } @@ -252,7 +252,7 @@ start_server {tags {"hash"}} { lappend rv [r hexists bighash nokey] } {1 0 1 0} - test {Is a zipmap encoded Hash promoted on big payload?} { + test {Is a ziplist encoded Hash promoted on big payload?} { r hset smallhash foo [string repeat a 1024] r debug object smallhash } {*hashtable*} @@ -382,7 +382,7 @@ start_server {tags {"hash"}} { lappend rv [string match "ERR*not*float*" $bigerr] } {1 1} - test {Hash zipmap regression test for large keys} { + test {Hash ziplist regression test for large keys} { r hset hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk a r hset hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk b r hget hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk From fe458402014cdd98a10179c85899f1eca0307534 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 3 Jan 2012 15:48:55 -0800 Subject: [PATCH 03/70] Implements ziplistFind To improve the performance of the ziplist implementation, some functions have been converted to macros to avoid unnecessary stack movement and duplicate variable assignments. --- src/t_hash.c | 70 +++++++--------- src/ziplist.c | 215 ++++++++++++++++++++++++++++++++------------------ src/ziplist.h | 1 + 3 files changed, 170 insertions(+), 116 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 9699223a..a22de242 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -47,23 +47,18 @@ int hashTypeGetFromZiplist(robj *o, robj *field, zl = o->ptr; fptr = ziplistIndex(zl, ZIPLIST_HEAD); - while (fptr != NULL) { - /* Grab pointer to the value (fptr points to the field) */ - vptr = ziplistNext(zl, fptr); - redisAssert(vptr != NULL); - - /* Compare field in ziplist with specified field */ - if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { - break; + if (fptr != NULL) { + fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1); + if (fptr != NULL) { + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); } - - /* Skip over value */ - fptr = ziplistNext(zl, vptr); } decrRefCount(field); - if (fptr != NULL) { + if (vptr != NULL) { ret = ziplistGet(vptr, vstr, vlen, vll); redisAssert(ret); return 0; @@ -164,27 +159,28 @@ int hashTypeSet(robj *o, robj *field, robj *value) { zl = o->ptr; fptr = ziplistIndex(zl, ZIPLIST_HEAD); - while (fptr != NULL) { - /* Compare field in ziplist with specified field */ - if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { - zl = ziplistDelete(zl,&fptr); - zl = ziplistDelete(zl,&fptr); - o->ptr = zl; + if (fptr != NULL) { + fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1); + if (fptr != NULL) { + /* Grab pointer to the value (fptr points to the field) */ + vptr = ziplistNext(zl, fptr); + redisAssert(vptr != NULL); update = 1; - break; + + /* Delete value */ + zl = ziplistDelete(zl, &vptr); + + /* Insert new value */ + zl = ziplistInsert(zl, vptr, value->ptr, sdslen(value->ptr)); } - - /* Grab pointer to the value (fptr points to the field) */ - vptr = ziplistNext(zl, fptr); - redisAssert(vptr != NULL); - - /* Grab pointer (if any) to the next field */ - fptr = ziplistNext(zl, vptr); } - /* Push new field/value pair onto the tail of the ziplist */ - zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); - zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); + if (!update) { + /* Push new field/value pair onto the tail of the ziplist */ + zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); + zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); + } + o->ptr = zl; decrRefCount(field); @@ -217,28 +213,20 @@ int hashTypeDelete(robj *o, robj *field) { int deleted = 0; if (o->encoding == REDIS_ENCODING_ZIPLIST) { - unsigned char *zl, *fptr, *vptr; + unsigned char *zl, *fptr; field = getDecodedObject(field); zl = o->ptr; fptr = ziplistIndex(zl, ZIPLIST_HEAD); - while (fptr != NULL) { - /* Compare field in ziplist with specified field */ - if (ziplistCompare(fptr, field->ptr, sdslen(field->ptr))) { + if (fptr != NULL) { + fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1); + if (fptr != NULL) { zl = ziplistDelete(zl,&fptr); zl = ziplistDelete(zl,&fptr); o->ptr = zl; deleted = 1; - break; } - - /* Grab pointer to the value (fptr points to the field) */ - vptr = ziplistNext(zl, fptr); - redisAssert(vptr != NULL); - - /* Grab pointer (if any) to the next field */ - fptr = ziplistNext(zl, vptr); } decrRefCount(field); diff --git a/src/ziplist.c b/src/ziplist.c index 639e4b61..4880013a 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -75,6 +75,8 @@ #define ZIP_BIGLEN 254 /* Different encoding/length possibilities */ +#define ZIP_STR_MASK (0xc0) +#define ZIP_INT_MASK (0x30) #define ZIP_STR_06B (0 << 6) #define ZIP_STR_14B (1 << 6) #define ZIP_STR_32B (2 << 6) @@ -82,9 +84,8 @@ #define ZIP_INT_32B (0xc0 | 1<<4) #define ZIP_INT_64B (0xc0 | 2<<4) -/* Macro's to determine type */ -#define ZIP_IS_STR(enc) (((enc) & 0xc0) < 0xc0) -#define ZIP_IS_INT(enc) (!ZIP_IS_STR(enc) && ((enc) & 0x30) < 0x30) +/* Macro to determine type */ +#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK) /* Utility macros */ #define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl))) @@ -108,19 +109,13 @@ typedef struct zlentry { unsigned char *p; } zlentry; -/* Return the encoding pointer to by 'p'. */ -static unsigned int zipEntryEncoding(unsigned char *p) { - /* String encoding: 2 MSBs */ - unsigned char b = p[0] & 0xc0; - if (b < 0xc0) { - return b; - } else { - /* Integer encoding: 4 MSBs */ - return p[0] & 0xf0; - } - assert(NULL); - return 0; -} +#define ZIP_ENTRY_ENCODING(ptr, encoding) do { \ + (encoding) = (ptr[0]) & (ZIP_STR_MASK | ZIP_INT_MASK); \ + if (((encoding) & ZIP_STR_MASK) < ZIP_STR_MASK) { \ + /* String encoding: 2 MSBs */ \ + (encoding) &= ZIP_STR_MASK; \ + } \ +} while(0) /* Return bytes needed to store integer encoded by 'encoding' */ static unsigned int zipIntSize(unsigned char encoding) { @@ -133,36 +128,6 @@ static unsigned int zipIntSize(unsigned char encoding) { return 0; } -/* Decode the encoded length pointed by 'p'. If a pointer to 'lensize' is - * provided, it is set to the number of bytes required to encode the length. */ -static unsigned int zipDecodeLength(unsigned char *p, unsigned int *lensize) { - unsigned char encoding = zipEntryEncoding(p); - unsigned int len = 0; - - if (ZIP_IS_STR(encoding)) { - switch(encoding) { - case ZIP_STR_06B: - len = p[0] & 0x3f; - if (lensize) *lensize = 1; - break; - case ZIP_STR_14B: - len = ((p[0] & 0x3f) << 8) | p[1]; - if (lensize) *lensize = 2; - break; - case ZIP_STR_32B: - len = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4]; - if (lensize) *lensize = 5; - break; - default: - assert(NULL); - } - } else { - len = zipIntSize(encoding); - if (lensize) *lensize = 1; - } - return len; -} - /* Encode the length 'l' writing it in 'p'. If p is NULL it just returns * the amount of bytes required to encode such a length. */ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) { @@ -199,18 +164,33 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un return len; } -/* Decode the length of the previous element stored at "p". */ -static unsigned int zipPrevDecodeLength(unsigned char *p, unsigned int *lensize) { - unsigned int len = *p; - if (len < ZIP_BIGLEN) { - if (lensize) *lensize = 1; - } else { - if (lensize) *lensize = 1+sizeof(len); - memcpy(&len,p+1,sizeof(len)); - memrev32ifbe(&len); - } - return len; -} +/* Decode the length encoded in 'ptr'. The 'encoding' variable will hold the + * entries encoding, the 'lensize' variable will hold the number of bytes + * required to encode the entries length, and the 'len' variable will hold the + * entries length. */ +#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do { \ + ZIP_ENTRY_ENCODING((ptr), (encoding)); \ + if ((encoding) < ZIP_STR_MASK) { \ + if ((encoding) == ZIP_STR_06B) { \ + (lensize) = 1; \ + (len) = (ptr)[0] & 0x3f; \ + } else if ((encoding) == ZIP_STR_14B) { \ + (lensize) = 2; \ + (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1]; \ + } else if (encoding == ZIP_STR_32B) { \ + (lensize) = 5; \ + (len) = ((ptr)[1] << 24) | \ + ((ptr)[2] << 16) | \ + ((ptr)[3] << 8) | \ + ((ptr)[4]); \ + } else { \ + assert(NULL); \ + } \ + } else { \ + (lensize) = 1; \ + (len) = zipIntSize(encoding); \ + } \ +} while(0); /* Encode the length of the previous entry and write it to "p". Return the * number of bytes needed to encode this length if "p" is NULL. */ @@ -239,12 +219,43 @@ static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { memrev32ifbe(p+1); } -/* Return the difference in number of bytes needed to store the new length - * "len" on the entry pointed to by "p". */ +/* Decode the number of bytes required to store the length of the previous + * element, from the perspective of the entry pointed to by 'ptr'. */ +#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \ + if ((ptr)[0] < ZIP_BIGLEN) { \ + (prevlensize) = 1; \ + } else { \ + (prevlensize) = 5; \ + } \ +} while(0); + +/* Decode the length of the previous element, from the perspective of the entry + * pointed to by 'ptr'. */ +#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \ + ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \ + if ((prevlensize) == 1) { \ + (prevlen) = (ptr)[0]; \ + } else if ((prevlensize) == 5) { \ + assert(sizeof((prevlensize)) == 4); \ + memcpy(&(prevlen), ((char*)(ptr)) + 1, 4); \ + memrev32ifbe(&len); \ + } \ +} while(0); + +/* Return the difference in number of bytes needed to store the length of the + * previous element 'len', in the entry pointed to by 'p'. */ static int zipPrevLenByteDiff(unsigned char *p, unsigned int len) { unsigned int prevlensize; - zipPrevDecodeLength(p,&prevlensize); - return zipPrevEncodeLength(NULL,len)-prevlensize; + ZIP_DECODE_PREVLENSIZE(p, prevlensize); + return zipPrevEncodeLength(NULL, len) - prevlensize; +} + +/* Return the total number of bytes used by the entry pointed to by 'p'. */ +static unsigned int zipRawEntryLength(unsigned char *p) { + unsigned int prevlensize, encoding, lensize, len; + ZIP_DECODE_PREVLENSIZE(p, prevlensize); + ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len); + return prevlensize + lensize + len; } /* Check if string pointed to by 'entry' can be encoded as an integer. @@ -317,20 +328,14 @@ static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) { /* Return a struct with all information about an entry. */ static zlentry zipEntry(unsigned char *p) { zlentry e; - e.prevrawlen = zipPrevDecodeLength(p,&e.prevrawlensize); - e.len = zipDecodeLength(p+e.prevrawlensize,&e.lensize); - e.headersize = e.prevrawlensize+e.lensize; - e.encoding = zipEntryEncoding(p+e.prevrawlensize); + + ZIP_DECODE_PREVLEN(p, e.prevrawlensize, e.prevrawlen); + ZIP_DECODE_LENGTH(p + e.prevrawlensize, e.encoding, e.lensize, e.len); + e.headersize = e.prevrawlensize + e.lensize; e.p = p; return e; } -/* Return the total number of bytes used by the entry at "p". */ -static unsigned int zipRawEntryLength(unsigned char *p) { - zlentry e = zipEntry(p); - return e.headersize + e.len; -} - /* Create a new empty ziplist. */ unsigned char *ziplistNew(void) { unsigned int bytes = ZIPLIST_HEADER_SIZE+1; @@ -616,10 +621,14 @@ unsigned char *ziplistNext(unsigned char *zl, unsigned char *p) { * when the *next* element is ZIP_END (there is no next entry). */ if (p[0] == ZIP_END) { return NULL; - } else { - p = p+zipRawEntryLength(p); - return (p[0] == ZIP_END) ? NULL : p; } + + p += zipRawEntryLength(p); + if (p[0] == ZIP_END) { + return NULL; + } + + return p; } /* Return pointer to previous entry in ziplist. */ @@ -717,6 +726,62 @@ unsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int return 0; } +/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries + * between every comparison. Returns NULL when the field could not be found. */ +unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) { + int skipcnt = 0; + unsigned char vencoding = 0; + long long vll = 0; + + while (p[0] != ZIP_END) { + unsigned int prevlensize, encoding, lensize, len; + unsigned char *q; + + ZIP_DECODE_PREVLENSIZE(p, prevlensize); + ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len); + q = p + prevlensize + lensize; + + if (skipcnt == 0) { + /* Compare current entry with specified entry */ + if (ZIP_IS_STR(encoding)) { + if (len == vlen && memcmp(q, vstr, vlen) == 0) { + return p; + } + } else { + /* Find out if the specified entry can be encoded */ + if (vencoding == 0) { + /* UINT_MAX when the entry CANNOT be encoded */ + if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) { + vencoding = UCHAR_MAX; + } + + /* Must be non-zero by now */ + assert(vencoding); + } + + /* Compare current entry with specified entry */ + if (encoding == vencoding) { + long long ll = zipLoadInteger(q, encoding); + if (ll == vll) { + return p; + } + } + } + + /* Reset skip count */ + skipcnt = skip; + } else { + /* Skip entry */ + skipcnt--; + } + + /* Move to next entry */ + p = q + len; + } + + return NULL; +} + /* Return length of ziplist. */ unsigned int ziplistLen(unsigned char *zl) { unsigned int len = 0; diff --git a/src/ziplist.h b/src/ziplist.h index a07b8440..865b38b4 100644 --- a/src/ziplist.h +++ b/src/ziplist.h @@ -11,5 +11,6 @@ unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p); unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num); unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen); +unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip); unsigned int ziplistLen(unsigned char *zl); size_t ziplistBlobLen(unsigned char *zl); From 80586cb894da69a26f6ab52d6bad0e9e8fa8a6bd Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Jan 2012 13:26:25 -0800 Subject: [PATCH 04/70] Test that zipmap from RDB is correctly converted --- src/rdb.c | 16 +++++---- tests/assets/hash-zipmap.rdb | Bin 0 -> 35 bytes .../convert-zipmap-hash-on-load.tcl | 34 ++++++++++++++++++ 3 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 tests/assets/hash-zipmap.rdb create mode 100644 tests/integration/convert-zipmap-hash-on-load.tcl diff --git a/src/rdb.c b/src/rdb.c index c74ec41c..15555fe4 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -915,12 +915,13 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { { unsigned char *zl = ziplistNew(); unsigned char *zi = zipmapRewind(o->ptr); + unsigned char *fstr, *vstr; + unsigned int flen, vlen; + unsigned int maxlen = 0; - while (zi != NULL) { - unsigned char *fstr, *vstr; - unsigned int flen, vlen; - - zi = zipmapNext(zi, &fstr, &flen, &vstr, &vlen); + while ((zi = zipmapNext(zi, &fstr, &flen, &vstr, &vlen)) != NULL) { + if (flen > maxlen) maxlen = flen; + if (vlen > maxlen) maxlen = vlen; zl = ziplistPush(zl, fstr, flen, ZIPLIST_TAIL); zl = ziplistPush(zl, vstr, vlen, ZIPLIST_TAIL); } @@ -930,8 +931,11 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { o->type = REDIS_HASH; o->encoding = REDIS_ENCODING_ZIPLIST; - if (hashTypeLength(o) > server.hash_max_ziplist_entries) + if (hashTypeLength(o) > server.hash_max_ziplist_entries || + maxlen > server.hash_max_ziplist_value) + { hashTypeConvert(o, REDIS_ENCODING_HT); + } } break; case REDIS_RDB_TYPE_LIST_ZIPLIST: diff --git a/tests/assets/hash-zipmap.rdb b/tests/assets/hash-zipmap.rdb new file mode 100644 index 0000000000000000000000000000000000000000..27a42ed4bb494cee9d67b7f0feccce7e4136d50a GIT binary patch literal 35 pcmWG?b@2=~FfcIw$H2*wkyxA|z{Heh$iz@)$dqOTq>TRm2LPS*34j0q literal 0 HcmV?d00001 diff --git a/tests/integration/convert-zipmap-hash-on-load.tcl b/tests/integration/convert-zipmap-hash-on-load.tcl new file mode 100644 index 00000000..75a65d3e --- /dev/null +++ b/tests/integration/convert-zipmap-hash-on-load.tcl @@ -0,0 +1,34 @@ +set server_path [tmpdir "server.convert-zipmap-hash-on-load"] + +# Copy RDB with zipmap encoded hash to server path +exec cp tests/assets/hash-zipmap.rdb $server_path + +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb"]] { + test "RDB load zipmap hash: converts to ziplist" { + r select 0 + + assert_match "*ziplist*" [r debug object hash] + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} + +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb" "hash-max-ziplist-entries" 1]] { + test "RDB load zipmap hash: converts to hash table when hash-max-ziplist-entries is exceeded" { + r select 0 + + assert_match "*hashtable*" [r debug object hash] + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} + +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb" "hash-max-ziplist-value" 1]] { + test "RDB load zipmap hash: converts to hash table when hash-max-ziplist-value is exceeded" { + r select 0 + + assert_match "*hashtable*" [r debug object hash] + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} From d3ea4c86a8c8efb2ab922db89af915833e437e7e Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Jan 2012 13:37:43 -0800 Subject: [PATCH 05/70] Update default configuration --- redis.conf | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/redis.conf b/redis.conf index 29e326d1..fc72b536 100644 --- a/redis.conf +++ b/redis.conf @@ -394,12 +394,11 @@ slowlog-max-len 1024 ############################### ADVANCED CONFIG ############################### -# Hashes are encoded in a special way (much more memory efficient) when they -# have at max a given numer of elements, and the biggest element does not -# exceed a given threshold. You can configure this limits with the following -# configuration directives. -hash-max-zipmap-entries 512 -hash-max-zipmap-value 64 +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 # Similarly to hashes, small lists are also encoded in a special way in order # to save a lot of space. The special representation is only used when From ad08d059d01ecae5e8c0eae3f31a202896bec4dc Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Feb 2012 16:17:00 +0100 Subject: [PATCH 06/70] Added command propagation API. --- src/redis.c | 42 +++++++++++++++++++++++++++++++++++------- src/redis.h | 5 +++++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/redis.c b/src/redis.c index b4acd6ea..647c5867 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1175,10 +1175,33 @@ struct redisCommand *lookupCommandByCString(char *s) { return cmd; } +/* Propagate the specified command (in the context of the specified database id) + * to AOF, Slaves and Monitors. + * + * flags are an xor between: + * + REDIS_PROPAGATE_NONE (no propagation of command at all) + * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled) + * + REDIS_PROPAGATE_REPL (propagate into the replication link) + */ +void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, + int flags) +{ + if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF) + feedAppendOnlyFile(cmd,dbid,argv,argc); + if (flags & REDIS_PROPAGATE_REPL && listLength(server.slaves)) + replicationFeedSlaves(server.slaves,dbid,argv,argc); +} + /* Call() is the core of Redis execution of a command */ void call(redisClient *c, int flags) { long long dirty, start = ustime(), duration; + /* Sent the command to clients in MONITOR mode, only if the commands are + * not geneated from reading an AOF. */ + if (listLength(server.monitors) && !server.loading) + replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); + + /* Call the command. */ dirty = server.dirty; c->cmd->proc(c); dirty = server.dirty-dirty; @@ -1189,20 +1212,25 @@ void call(redisClient *c, int flags) { if (server.loading && c->flags & REDIS_LUA_CLIENT) flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS); + /* Log the command into the Slow log if needed, and populate the + * per-command statistics that we show in INFO commandstats. */ if (flags & REDIS_CALL_SLOWLOG) slowlogPushEntryIfNeeded(c->argv,c->argc,duration); if (flags & REDIS_CALL_STATS) { c->cmd->microseconds += duration; c->cmd->calls++; } + + /* Propagate the command into the AOF and replication link */ if (flags & REDIS_CALL_PROPAGATE) { - if (server.aof_state != REDIS_AOF_OFF && dirty > 0) - feedAppendOnlyFile(c->cmd,c->db->id,c->argv,c->argc); - if ((dirty > 0 || c->cmd->flags & REDIS_CMD_FORCE_REPLICATION) && - listLength(server.slaves)) - replicationFeedSlaves(server.slaves,c->db->id,c->argv,c->argc); - if (listLength(server.monitors)) - replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); + int flags = REDIS_PROPAGATE_NONE; + + if (c->cmd->flags & REDIS_CMD_FORCE_REPLICATION) + flags |= REDIS_PROPAGATE_REPL; + if (dirty) + flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF); + if (flags != REDIS_PROPAGATE_NONE) + propagate(c->cmd,c->db->id,c->argv,c->argc,flags); } server.stat_numcommands++; } diff --git a/src/redis.h b/src/redis.h index 982f33fe..921e319e 100644 --- a/src/redis.h +++ b/src/redis.h @@ -245,6 +245,11 @@ #define REDIS_CALL_PROPAGATE 4 #define REDIS_CALL_FULL (REDIS_CALL_SLOWLOG | REDIS_CALL_STATS | REDIS_CALL_PROPAGATE) +/* Command propagation flags, see propagate() function */ +#define REDIS_PROPAGATE_NONE 0 +#define REDIS_PROPAGATE_AOF 1 +#define REDIS_PROPAGATE_REPL 2 + /* We can print the stacktrace, so our assert is defined this way: */ #define redisAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_redisAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1))) #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) From edba65d09084dfaa6f44a6f182c6d519e249a0c4 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Feb 2012 16:17:55 +0100 Subject: [PATCH 07/70] Var renamed into pushGenericCommand() to better reflect what it means. --- src/t_list.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/t_list.c b/src/t_list.c index 3742ec49..f0c6790e 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -259,7 +259,7 @@ void listTypeConvert(robj *subject, int enc) { *----------------------------------------------------------------------------*/ void pushGenericCommand(redisClient *c, int where) { - int j, addlen = 0, pushed = 0; + int j, waiting = 0, pushed = 0; robj *lobj = lookupKeyWrite(c->db,c->argv[1]); int may_have_waiting_clients = (lobj == NULL); @@ -272,7 +272,7 @@ void pushGenericCommand(redisClient *c, int where) { c->argv[j] = tryObjectEncoding(c->argv[j]); if (may_have_waiting_clients) { if (handleClientsWaitingListPush(c,c->argv[1],c->argv[j])) { - addlen++; + waiting++; continue; } else { may_have_waiting_clients = 0; @@ -285,7 +285,7 @@ void pushGenericCommand(redisClient *c, int where) { listTypePush(lobj,c->argv[j],where); pushed++; } - addReplyLongLong(c,addlen + (lobj ? listTypeLength(lobj) : 0)); + addReplyLongLong(c, waiting + (lobj ? listTypeLength(lobj) : 0)); if (pushed) signalModifiedKey(c->db,c->argv[1]); server.dirty += pushed; } From d8b1228bf6cbe2969a93b5f48b8cffba2d9772ae Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Feb 2012 16:20:41 +0100 Subject: [PATCH 08/70] propagate() prototype added to redis.h --- src/redis.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis.h b/src/redis.h index 921e319e..f1e407b1 100644 --- a/src/redis.h +++ b/src/redis.h @@ -955,6 +955,7 @@ void setupSignalHandlers(void); struct redisCommand *lookupCommand(sds name); struct redisCommand *lookupCommandByCString(char *s); void call(redisClient *c, int flags); +void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags); int prepareForShutdown(); void redisLog(int level, const char *fmt, ...); void redisLogRaw(int level, const char *msg); From eeb34eff52eb77ff387ea7b316b157aa4337bb7f Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Feb 2012 18:03:08 +0100 Subject: [PATCH 09/70] Added a new API to replicate an additional command after the replication of the currently executed command, in order to propagte the LPUSH originating from RPOPLPUSH and indirectly by BRPOPLPUSH. --- src/redis.c | 26 ++++++++++++++++++++++++++ src/redis.h | 16 +++++++++++++++- src/t_list.c | 51 ++++++++++++++++++++++++++++++--------------------- 3 files changed, 71 insertions(+), 22 deletions(-) diff --git a/src/redis.c b/src/redis.c index 647c5867..2eed4703 100644 --- a/src/redis.c +++ b/src/redis.c @@ -962,6 +962,7 @@ void initServerConfig() { populateCommandTable(); server.delCommand = lookupCommandByCString("del"); server.multiCommand = lookupCommandByCString("multi"); + server.lpushCommand = lookupCommandByCString("lpush"); /* Slow log */ server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN; @@ -1192,6 +1193,20 @@ void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, replicationFeedSlaves(server.slaves,dbid,argv,argc); } +/* Used inside commands to propatate an additional command if needed. */ +void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, + int target) +{ + propagatedItem *pi = &server.also_propagate; + + redisAssert(pi->target == REDIS_PROPAGATE_NONE); + pi->cmd = cmd; + pi->dbid = dbid; + pi->argv = argv; + pi->argc = argc; + pi->target = target; +} + /* Call() is the core of Redis execution of a command */ void call(redisClient *c, int flags) { long long dirty, start = ustime(), duration; @@ -1202,6 +1217,7 @@ void call(redisClient *c, int flags) { replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); /* Call the command. */ + server.also_propagate.target = REDIS_PROPAGATE_NONE; dirty = server.dirty; c->cmd->proc(c); dirty = server.dirty-dirty; @@ -1232,6 +1248,16 @@ void call(redisClient *c, int flags) { if (flags != REDIS_PROPAGATE_NONE) propagate(c->cmd,c->db->id,c->argv,c->argc,flags); } + /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional + * PUSH command. */ + if (server.also_propagate.target != REDIS_PROPAGATE_NONE) { + int j; + propagatedItem *pi = &server.also_propagate; + + propagate(pi->cmd, pi->dbid, pi->argv, pi->argc, pi->target); + for (j = 0; j < pi->argc; j++) decrRefCount(pi->argv[j]); + zfree(pi->argv); + } server.stat_numcommands++; } diff --git a/src/redis.h b/src/redis.h index f1e407b1..09a746cc 100644 --- a/src/redis.h +++ b/src/redis.h @@ -399,6 +399,17 @@ typedef struct clientBufferLimitsConfig { time_t soft_limit_seconds; } clientBufferLimitsConfig; +/* Currently only used to additionally propagate more commands to AOF/Replication + * after the propagation of the executed command. + * The structure contains everything needed to propagate a command: + * argv and argc, the ID of the database, pointer to the command table entry, + * and finally the target, that is an xor between REDIS_PROPAGATE_* flags. */ +typedef struct propagatedItem { + robj **argv; + int argc, dbid, target; + struct redisCommand *cmd; +} propagatedItem; + /*----------------------------------------------------------------------------- * Redis cluster data structures *----------------------------------------------------------------------------*/ @@ -562,7 +573,7 @@ struct redisServer { off_t loading_loaded_bytes; time_t loading_start_time; /* Fast pointers to often looked up command */ - struct redisCommand *delCommand, *multiCommand; + struct redisCommand *delCommand, *multiCommand, *lpushCommand; int cronloops; /* Number of times the cron function run */ time_t lastsave; /* Unix time of last save succeeede */ /* Fields used only for stats */ @@ -612,6 +623,8 @@ struct redisServer { int saveparamslen; /* Number of saving points */ char *rdb_filename; /* Name of RDB file */ int rdb_compression; /* Use compression in RDB? */ + /* Propagation of commands in AOF / replication */ + propagatedItem also_propagate; /* Additional command to propagate. */ /* Logging */ char *logfile; /* Path of log file */ int syslog_enabled; /* Is syslog enabled? */ @@ -956,6 +969,7 @@ struct redisCommand *lookupCommand(sds name); struct redisCommand *lookupCommandByCString(char *s); void call(redisClient *c, int flags); void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags); +void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target); int prepareForShutdown(); void redisLog(int level, const char *fmt, ...); void redisLogRaw(int level, const char *msg); diff --git a/src/t_list.c b/src/t_list.c index f0c6790e..636c556c 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -288,6 +288,18 @@ void pushGenericCommand(redisClient *c, int where) { addReplyLongLong(c, waiting + (lobj ? listTypeLength(lobj) : 0)); if (pushed) signalModifiedKey(c->db,c->argv[1]); server.dirty += pushed; + + /* Alter the replication of the command accordingly to the number of + * list elements delivered to clients waiting into a blocking operation. + * We do that only if there were waiting clients, and only if still some + * element was pushed into the list (othewise dirty is 0 and nothign will + * be propagated). */ + if (waiting && pushed) { + /* CMD KEY a b C D E */ + for (j = 2; j < pushed+2; j++) + rewriteClientCommandArgument(c,j,c->argv[j+waiting]); + c->argc -= waiting; + } } void lpushCommand(redisClient *c) { @@ -655,8 +667,6 @@ void lremCommand(redisClient *c) { */ void rpoplpushHandlePush(redisClient *origclient, redisClient *c, robj *dstkey, robj *dstobj, robj *value) { - robj *aux; - if (!handleClientsWaitingListPush(origclient,dstkey,value)) { /* Create the list if the key does not exist */ if (!dstobj) { @@ -666,27 +676,19 @@ void rpoplpushHandlePush(redisClient *origclient, redisClient *c, robj *dstkey, signalModifiedKey(c->db,dstkey); } listTypePush(dstobj,value,REDIS_HEAD); - /* If we are pushing as a result of LPUSH against a key - * watched by BRPOPLPUSH, we need to rewrite the command vector - * as an LPUSH. - * - * If this is called directly by RPOPLPUSH (either directly - * or via a BRPOPLPUSH where the popped list exists) - * we should replicate the RPOPLPUSH command itself. */ - if (c != origclient) { - aux = createStringObject("LPUSH",5); - rewriteClientCommandVector(origclient,3,aux,dstkey,value); - decrRefCount(aux); - } else { - /* Make sure to always use RPOPLPUSH in the replication / AOF, - * even if the original command was BRPOPLPUSH. */ - aux = createStringObject("RPOPLPUSH",9); - rewriteClientCommandVector(origclient,3,aux,c->argv[1],c->argv[2]); - decrRefCount(aux); + /* Additionally propagate this PUSH operation together with + * the operation performed by the command. */ + { + robj **argv = zmalloc(sizeof(robj*)*3); + argv[0] = createStringObject("LPUSH",5); + argv[1] = dstkey; + argv[2] = value; + incrRefCount(argv[1]); + incrRefCount(argv[2]); + alsoPropagate(server.lpushCommand,c->db->id,argv,3, + REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL); } - server.dirty++; } - /* Always send the pushed value to the client. */ addReplyBulk(c,value); } @@ -717,6 +719,13 @@ void rpoplpushCommand(redisClient *c) { signalModifiedKey(c->db,touchedkey); decrRefCount(touchedkey); server.dirty++; + + /* Replicate this as a simple RPOP since the LPUSH side is replicated + * by rpoplpushHandlePush() call if needed (it may not be needed + * if a client is blocking wait a push against the list). */ + rewriteClientCommandVector(c,2, + resetRefCount(createStringObject("RPOP",4)), + c->argv[1]); } } From 78d6a22dc3e570417a8b8774307059b8d5ad476b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 00:46:50 +0100 Subject: [PATCH 10/70] Better system for additional commands replication. The new code uses a more generic data structure to describe redis operations. The new design allows for multiple alsoPropagate() calls within the scope of a single command, that is useful in different contexts. For instance there when there are multiple clients doing BRPOPLPUSH against the same list, and a variadic LPUSH is performed against this list, the blocked clients will both be served, and we should correctly replicate multiple LPUSH commands after the replication of the current command. --- src/redis.c | 63 ++++++++++++++++++++++++++++++++++++++++------------- src/redis.h | 29 +++++++++++++++++------- 2 files changed, 69 insertions(+), 23 deletions(-) diff --git a/src/redis.c b/src/redis.c index 2eed4703..78067d31 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1161,6 +1161,43 @@ void resetCommandTableStats(void) { } } +/* ========================== Redis OP Array API ============================ */ + +void redisOpArrayInit(redisOpArray *oa) { + oa->ops = NULL; + oa->numops = 0; +} + +int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid, + robj **argv, int argc, int target) +{ + redisOp *op; + + oa->ops = zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1)); + op = oa->ops+oa->numops; + op->cmd = cmd; + op->dbid = dbid; + op->argv = argv; + op->argc = argc; + op->target = target; + oa->numops++; + return oa->numops; +} + +void redisOpArrayFree(redisOpArray *oa) { + while(oa->numops) { + int j; + redisOp *op; + + oa->numops--; + op = oa->ops+oa->numops; + for (j = 0; j < op->argc; j++) + decrRefCount(op->argv[j]); + zfree(op->argv); + } + zfree(oa->ops); +} + /* ====================== Commands lookup and execution ===================== */ struct redisCommand *lookupCommand(sds name) { @@ -1193,18 +1230,12 @@ void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, replicationFeedSlaves(server.slaves,dbid,argv,argc); } -/* Used inside commands to propatate an additional command if needed. */ +/* Used inside commands to schedule the propagation of additional commands + * after the current command is propagated to AOF / Replication. */ void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target) { - propagatedItem *pi = &server.also_propagate; - - redisAssert(pi->target == REDIS_PROPAGATE_NONE); - pi->cmd = cmd; - pi->dbid = dbid; - pi->argv = argv; - pi->argc = argc; - pi->target = target; + redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target); } /* Call() is the core of Redis execution of a command */ @@ -1217,7 +1248,7 @@ void call(redisClient *c, int flags) { replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); /* Call the command. */ - server.also_propagate.target = REDIS_PROPAGATE_NONE; + redisOpArrayInit(&server.also_propagate); dirty = server.dirty; c->cmd->proc(c); dirty = server.dirty-dirty; @@ -1250,13 +1281,15 @@ void call(redisClient *c, int flags) { } /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional * PUSH command. */ - if (server.also_propagate.target != REDIS_PROPAGATE_NONE) { + if (server.also_propagate.numops) { int j; - propagatedItem *pi = &server.also_propagate; + redisOp *rop; - propagate(pi->cmd, pi->dbid, pi->argv, pi->argc, pi->target); - for (j = 0; j < pi->argc; j++) decrRefCount(pi->argv[j]); - zfree(pi->argv); + for (j = 0; j < server.also_propagate.numops; j++) { + rop = &server.also_propagate.ops[j]; + propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target); + } + redisOpArrayFree(&server.also_propagate); } server.stat_numcommands++; } diff --git a/src/redis.h b/src/redis.h index 09a746cc..c2617870 100644 --- a/src/redis.h +++ b/src/redis.h @@ -399,16 +399,29 @@ typedef struct clientBufferLimitsConfig { time_t soft_limit_seconds; } clientBufferLimitsConfig; -/* Currently only used to additionally propagate more commands to AOF/Replication - * after the propagation of the executed command. - * The structure contains everything needed to propagate a command: - * argv and argc, the ID of the database, pointer to the command table entry, - * and finally the target, that is an xor between REDIS_PROPAGATE_* flags. */ -typedef struct propagatedItem { +/* The redisOp structure defines a Redis Operation, that is an instance of + * a command with an argument vector, database ID, propagation target + * (REDIS_PROPAGATE_*), and command pointer. + * + * Currently only used to additionally propagate more commands to AOF/Replication + * after the propagation of the executed command. */ +typedef struct redisOp { robj **argv; int argc, dbid, target; struct redisCommand *cmd; -} propagatedItem; +} redisOp; + +/* Defines an array of Redis operations. There is an API to add to this + * structure in a easy way. + * + * redisOpArrayInit(); + * redisOpArrayAppend(); + * redisOpArrayFree(); + */ +typedef struct redisOpArray { + redisOp *ops; + int numops; +} redisOpArray; /*----------------------------------------------------------------------------- * Redis cluster data structures @@ -624,7 +637,7 @@ struct redisServer { char *rdb_filename; /* Name of RDB file */ int rdb_compression; /* Use compression in RDB? */ /* Propagation of commands in AOF / replication */ - propagatedItem also_propagate; /* Additional command to propagate. */ + redisOpArray also_propagate; /* Additional command to propagate. */ /* Logging */ char *logfile; /* Path of log file */ int syslog_enabled; /* Is syslog enabled? */ From b67feecacd94165a76da6af3a8c4e735f01729b0 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 00:54:52 +0100 Subject: [PATCH 11/70] Version bumped to 2.9.5 --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index a7a2fdb2..9ef74b08 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define REDIS_VERSION "2.9.4" +#define REDIS_VERSION "2.9.5" From cd8bdea31bdd682c811276a56515b65268f42958 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 13:38:30 +0100 Subject: [PATCH 12/70] lpush arguments vector rewrite modified for more speed and to memory leak removal. --- src/t_list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_list.c b/src/t_list.c index 636c556c..d6e28b7e 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -296,8 +296,8 @@ void pushGenericCommand(redisClient *c, int where) { * be propagated). */ if (waiting && pushed) { /* CMD KEY a b C D E */ - for (j = 2; j < pushed+2; j++) - rewriteClientCommandArgument(c,j,c->argv[j+waiting]); + for (j = 0; j < waiting; j++) decrRefCount(c->argv[j+2]); + memmove(c->argv+2,c->argv+2+waiting,sizeof(robj*)*pushed); c->argc -= waiting; } } From c1db214eeb2b7385c32889e17748429fcbe5cbae Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 14:41:57 +0100 Subject: [PATCH 13/70] Better implementation for BRPOP/BLPOP in the non blocking case. --- src/redis.c | 2 ++ src/redis.h | 2 +- src/t_list.c | 40 +++++++++++++--------------------------- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/src/redis.c b/src/redis.c index 78067d31..64df9073 100644 --- a/src/redis.c +++ b/src/redis.c @@ -852,6 +852,8 @@ void createSharedObjects(void) { shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17); shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19); shared.del = createStringObject("DEL",3); + shared.rpop = createStringObject("RPOP",4); + shared.lpop = createStringObject("LPOP",4); for (j = 0; j < REDIS_SHARED_INTEGERS; j++) { shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j); shared.integers[j]->encoding = REDIS_ENCODING_INT; diff --git a/src/redis.h b/src/redis.h index c2617870..cccdcc81 100644 --- a/src/redis.h +++ b/src/redis.h @@ -365,7 +365,7 @@ struct sharedObjectsStruct { *select0, *select1, *select2, *select3, *select4, *select5, *select6, *select7, *select8, *select9, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, - *psubscribebulk, *punsubscribebulk, *del, + *psubscribebulk, *punsubscribebulk, *del, *rpop, *lpop, *integers[REDIS_SHARED_INTEGERS], *mbulkhdr[REDIS_SHARED_BULKHDR_LEN], /* "*\r\n" */ *bulkhdr[REDIS_SHARED_BULKHDR_LEN]; /* "$\r\n" */ diff --git a/src/t_list.c b/src/t_list.c index d6e28b7e..2be8074a 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -933,36 +933,22 @@ void blockingPopGenericCommand(redisClient *c, int where) { return; } else { if (listTypeLength(o) != 0) { - /* If the list contains elements fall back to the usual - * non-blocking POP operation */ - struct redisCommand *orig_cmd; - robj *argv[2], **orig_argv; - int orig_argc; + /* Non empty list, this is like a non normal [LR]POP. */ + robj *value = listTypePop(o,where); + redisAssert(value != NULL); - /* We need to alter the command arguments before to call - * popGenericCommand() as the command takes a single key. */ - orig_argv = c->argv; - orig_argc = c->argc; - orig_cmd = c->cmd; - argv[1] = c->argv[j]; - c->argv = argv; - c->argc = 2; - - /* Also the return value is different, we need to output - * the multi bulk reply header and the key name. The - * "real" command will add the last element (the value) - * for us. If this souds like an hack to you it's just - * because it is... */ addReplyMultiBulkLen(c,2); - addReplyBulk(c,argv[1]); - - popGenericCommand(c,where); - - /* Fix the client structure with the original stuff */ - c->argv = orig_argv; - c->argc = orig_argc; - c->cmd = orig_cmd; + addReplyBulk(c,c->argv[j]); + addReplyBulk(c,value); + decrRefCount(value); + if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[j]); + signalModifiedKey(c->db,c->argv[j]); + server.dirty++; + /* Replicate it as an [LR]POP instead of B[LR]POP. */ + rewriteClientCommandVector(c,2, + (where == REDIS_HEAD) ? shared.lpop : shared.rpop, + c->argv[j]); return; } } From a950a84303038d3365442a48cc2c06f4b5d3f65e Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 16:33:54 +0100 Subject: [PATCH 14/70] Ping the slave using the standard protocol instead of the inline one. --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index dd42fcf1..e8297839 100644 --- a/src/replication.c +++ b/src/replication.c @@ -596,7 +596,7 @@ void replicationCron(void) { if (slave->replstate == REDIS_REPL_SEND_BULK) continue; if (slave->replstate == REDIS_REPL_ONLINE) { /* If the slave is online send a normal ping */ - addReplySds(slave,sdsnew("PING\r\n")); + addReplySds(slave,sdsnew("*1\r\n$4\r\nPING\r\n")); } else { /* Otherwise we are in the pre-synchronization stage. * Just a newline will do the work of refreshing the From b8283ab2189cf1fcaed8c435f779fa9914210297 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 17:10:21 +0100 Subject: [PATCH 15/70] Initial implementation of redis-cli --slave support. --- src/redis-cli.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 827e4061..3b3ef504 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -64,6 +64,7 @@ static struct config { int latency_mode; int cluster_mode; int cluster_reissue_command; + int slave_mode; int stdinarg; /* get last arg from stdin. (-x option) */ char *auth; int raw_output; /* output mode per command */ @@ -606,6 +607,8 @@ static int parseOptions(int argc, char **argv) { config.raw_output = 1; } else if (!strcmp(argv[i],"--latency")) { config.latency_mode = 1; + } else if (!strcmp(argv[i],"--slave")) { + config.slave_mode = 1; } else if (!strcmp(argv[i],"--eval") && !lastarg) { config.eval = argv[++i]; } else if (!strcmp(argv[i],"-c")) { @@ -661,6 +664,7 @@ static void usage() { " -c Enable cluster mode (follow -ASK and -MOVED redirections)\n" " --raw Use raw formatting for replies (default when STDOUT is not a tty)\n" " --latency Enter a special mode continuously sampling latency.\n" +" --slave Simulate a slave showing commands received from the master.\n" " --eval Send an EVAL command using the Lua script at .\n" " --help Output this help and exit\n" " --version Output version and exit\n" @@ -866,6 +870,51 @@ static void latencyMode(void) { } } +static void slaveMode(void) { + /* To start we need to send the SYNC command and return the payload. + * The hiredis client lib does not understand this part of the protocol + * and we don't want to mess with its buffers, so everything is performed + * using direct low-level I/O. */ + int fd = context->fd; + char buf[1024], *p; + ssize_t nread; + unsigned long long payload; + + /* Send the SYNC command. */ + write(fd,"SYNC\r\n",6); + + /* Read $\r\n, making sure to read just up to "\n" */ + p = buf; + while(1) { + nread = read(fd,p,1); + if (nread <= 0) { + fprintf(stderr,"Error reading bulk length while SYNCing\n"); + exit(1); + } + if (*p == '\n') break; + p++; + } + *p = '\0'; + payload = strtoull(buf+1,NULL,10); + if (!config.raw_output) + printf("SYNC with master, discarding %lld bytes of bulk tranfer...\n", + payload); + + /* Discard the payload. */ + while(payload) { + nread = read(fd,buf,(payload > sizeof(buf)) ? sizeof(buf) : payload); + if (nread <= 0) { + fprintf(stderr,"Error reading RDB payload while SYNCing\n"); + exit(1); + } + payload -= nread; + } + if (!config.raw_output) printf("SYNC done. Logging commands from master.\n"); + + /* Now we can use the hiredis to read the incoming protocol. */ + while (cliReadReply(config.raw_output) == REDIS_OK); +} + int main(int argc, char **argv) { int firstarg; @@ -898,6 +947,12 @@ int main(int argc, char **argv) { latencyMode(); } + /* Start in slave mode if appropriate */ + if (config.slave_mode) { + cliConnect(0); + slaveMode(); + } + /* Start interactive mode when no command is provided */ if (argc == 0 && !config.eval) { /* Note that in repl mode we don't abort on connection error. From 60893c6cc6b08504d06aaa2e4eee5df29a479da1 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Feb 2012 17:43:03 +0100 Subject: [PATCH 16/70] redis-cli: CSV output added, used for the --slave mode. --- src/redis-cli.c | 68 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 3b3ef504..c631aa79 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -49,6 +49,10 @@ #define REDIS_NOTUSED(V) ((void) V) +#define OUTPUT_STANDARD 0 +#define OUTPUT_RAW 1 +#define OUTPUT_CSV 2 + static redisContext *context; static struct config { char *hostip; @@ -67,7 +71,7 @@ static struct config { int slave_mode; int stdinarg; /* get last arg from stdin. (-x option) */ char *auth; - int raw_output; /* output mode per command */ + int output; /* output mode, see OUTPUT_* defines */ sds mb_delim; char prompt[128]; char *eval; @@ -435,6 +439,42 @@ static sds cliFormatReplyRaw(redisReply *r) { return out; } +static sds cliFormatReplyCSV(redisReply *r) { + unsigned int i; + + sds out = sdsempty(); + switch (r->type) { + case REDIS_REPLY_ERROR: + out = sdscat(out,"ERROR,"); + out = sdscatrepr(out,r->str,strlen(r->str)); + break; + case REDIS_REPLY_STATUS: + out = sdscatrepr(out,r->str,r->len); + break; + case REDIS_REPLY_INTEGER: + out = sdscatprintf(out,"%lld",r->integer); + break; + case REDIS_REPLY_STRING: + out = sdscatrepr(out,r->str,r->len); + break; + case REDIS_REPLY_NIL: + out = sdscat(out,"NIL\n"); + break; + case REDIS_REPLY_ARRAY: + for (i = 0; i < r->elements; i++) { + sds tmp = cliFormatReplyCSV(r->element[i]); + out = sdscatlen(out,tmp,sdslen(tmp)); + if (i != r->elements-1) out = sdscat(out,","); + sdsfree(tmp); + } + break; + default: + fprintf(stderr,"Unknown reply type: %d\n", r->type); + exit(1); + } + return out; +} + static int cliReadReply(int output_raw_strings) { void *_reply; redisReply *reply; @@ -490,11 +530,14 @@ static int cliReadReply(int output_raw_strings) { if (output_raw_strings) { out = cliFormatReplyRaw(reply); } else { - if (config.raw_output) { + if (config.output == OUTPUT_RAW) { out = cliFormatReplyRaw(reply); out = sdscat(out,"\n"); - } else { + } else if (config.output == OUTPUT_STANDARD) { out = cliFormatReplyTTY(reply,""); + } else if (config.output == OUTPUT_CSV) { + out = cliFormatReplyCSV(reply); + out = sdscat(out,"\n"); } } fwrite(out,sdslen(out),1,stdout); @@ -546,7 +589,7 @@ static int cliSendCommand(int argc, char **argv, int repeat) { } if (config.pubsub_mode) { - if (!config.raw_output) + if (config.output != OUTPUT_RAW) printf("Reading messages... (press Ctrl-C to quit)\n"); while (1) { if (cliReadReply(output_raw) != REDIS_OK) exit(1); @@ -604,7 +647,9 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-a") && !lastarg) { config.auth = argv[++i]; } else if (!strcmp(argv[i],"--raw")) { - config.raw_output = 1; + config.output = OUTPUT_RAW; + } else if (!strcmp(argv[i],"--csv")) { + config.output = OUTPUT_CSV; } else if (!strcmp(argv[i],"--latency")) { config.latency_mode = 1; } else if (!strcmp(argv[i],"--slave")) { @@ -896,8 +941,7 @@ static void slaveMode(void) { } *p = '\0'; payload = strtoull(buf+1,NULL,10); - if (!config.raw_output) - printf("SYNC with master, discarding %lld bytes of bulk tranfer...\n", + fprintf(stderr,"SYNC with master, discarding %lld bytes of bulk tranfer...\n", payload); /* Discard the payload. */ @@ -909,10 +953,11 @@ static void slaveMode(void) { } payload -= nread; } - if (!config.raw_output) printf("SYNC done. Logging commands from master.\n"); + fprintf(stderr,"SYNC done. Logging commands from master.\n"); /* Now we can use the hiredis to read the incoming protocol. */ - while (cliReadReply(config.raw_output) == REDIS_OK); + config.output = OUTPUT_CSV; + while (cliReadReply(0) == REDIS_OK); } int main(int argc, char **argv) { @@ -933,7 +978,10 @@ int main(int argc, char **argv) { config.stdinarg = 0; config.auth = NULL; config.eval = NULL; - config.raw_output = !isatty(fileno(stdout)) && (getenv("FAKETTY") == NULL); + if (!isatty(fileno(stdout)) && (getenv("FAKETTY") == NULL)) + config.output = OUTPUT_RAW; + else + config.output = OUTPUT_STANDARD; config.mb_delim = sdsnew("\n"); cliInitHelp(); From 9494f1f15b128ef8407d118c240d3793aff0ed82 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 10:38:01 +0100 Subject: [PATCH 17/70] TIME command. --- src/redis.c | 14 +++++++++++++- src/redis.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/redis.c b/src/redis.c index 64df9073..452c3bb7 100644 --- a/src/redis.c +++ b/src/redis.c @@ -242,7 +242,8 @@ struct redisCommand redisCommandTable[] = { {"eval",evalCommand,-3,"wms",0,zunionInterGetKeys,0,0,0,0,0}, {"evalsha",evalShaCommand,-3,"wms",0,zunionInterGetKeys,0,0,0,0,0}, {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0}, - {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0} + {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0}, + {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0} }; /*============================ Utility functions ============================ */ @@ -1505,6 +1506,17 @@ void echoCommand(redisClient *c) { addReplyBulk(c,c->argv[1]); } +void timeCommand(redisClient *c) { + struct timeval tv; + + /* gettimeofday() can only fail if &tv is a bad addresss so we + * don't check for errors. */ + gettimeofday(&tv,NULL); + addReplyMultiBulkLen(c,2); + addReplyBulkLongLong(c,tv.tv_sec); + addReplyBulkLongLong(c,tv.tv_usec); +} + /* Convert an amount of bytes into a human readable string in the form * of 100B, 2G, 100M, 4K, and so forth. */ void bytesToHuman(char *s, unsigned long long n) { diff --git a/src/redis.h b/src/redis.h index cccdcc81..870439d7 100644 --- a/src/redis.h +++ b/src/redis.h @@ -1218,6 +1218,7 @@ void clientCommand(redisClient *c); void evalCommand(redisClient *c); void evalShaCommand(redisClient *c); void scriptCommand(redisClient *c); +void timeCommand(redisClient *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); From 7b845b62285230d9015f80a6c2ea7ecb0b25df6e Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 11:30:30 +0100 Subject: [PATCH 18/70] anetPeerToString() automatically populates ip/port with something that may be provided to the user as output in case of errors. --- src/anet.c | 7 ++++++- src/networking.c | 6 +----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/anet.c b/src/anet.c index 9aff4dfa..ba4e6cce 100644 --- a/src/anet.c +++ b/src/anet.c @@ -353,7 +353,12 @@ int anetPeerToString(int fd, char *ip, int *port) { struct sockaddr_in sa; socklen_t salen = sizeof(sa); - if (getpeername(fd,(struct sockaddr*)&sa,&salen) == -1) return -1; + if (getpeername(fd,(struct sockaddr*)&sa,&salen) == -1) { + *port = 0; + ip[0] = '?'; + ip[1] = '\0'; + return -1; + } if (ip) strcpy(ip,inet_ntoa(sa.sin_addr)); if (port) *port = ntohs(sa.sin_port); return 0; diff --git a/src/networking.c b/src/networking.c index d8fb8132..40aad836 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1087,11 +1087,7 @@ sds getClientInfoString(redisClient *client) { time_t now = time(NULL); int emask; - if (anetPeerToString(client->fd,ip,&port) == -1) { - ip[0] = '?'; - ip[1] = '\0'; - port = 0; - } + anetPeerToString(client->fd,ip,&port); p = flags; if (client->flags & REDIS_SLAVE) { if (client->flags & REDIS_MONITOR) From e31b615e6201fccda3f2e036c449646e3cfbac25 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 12:12:15 +0100 Subject: [PATCH 19/70] Better MONITOR output, now includes client ip:port or the lua string if the command was executed by the scripting engine. --- src/redis.c | 2 +- src/redis.h | 2 +- src/replication.c | 12 +++++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/redis.c b/src/redis.c index 452c3bb7..6d5522e8 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1248,7 +1248,7 @@ void call(redisClient *c, int flags) { /* Sent the command to clients in MONITOR mode, only if the commands are * not geneated from reading an AOF. */ if (listLength(server.monitors) && !server.loading) - replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc); + replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc); /* Call the command. */ redisOpArrayInit(&server.also_propagate); diff --git a/src/redis.h b/src/redis.h index 870439d7..c18e9170 100644 --- a/src/redis.h +++ b/src/redis.h @@ -932,7 +932,7 @@ int syncReadLine(int fd, char *ptr, ssize_t size, int timeout); /* Replication */ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); -void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc); +void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc); void updateSlavesWaitingBgsave(int bgsaveerr); void replicationCron(void); diff --git a/src/replication.c b/src/replication.c index e8297839..6c0091e8 100644 --- a/src/replication.c +++ b/src/replication.c @@ -50,17 +50,23 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { } } -void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc) { +void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc) { listNode *ln; listIter li; - int j; + int j, port; sds cmdrepr = sdsnew("+"); robj *cmdobj; + char ip[32]; struct timeval tv; gettimeofday(&tv,NULL); cmdrepr = sdscatprintf(cmdrepr,"%ld.%06ld ",(long)tv.tv_sec,(long)tv.tv_usec); - if (dictid != 0) cmdrepr = sdscatprintf(cmdrepr,"(db %d) ", dictid); + if (c->flags & REDIS_LUA_CLIENT) { + cmdrepr = sdscatprintf(cmdrepr,"[%d lua] ", dictid); + } else { + anetPeerToString(c->fd,ip,&port); + cmdrepr = sdscatprintf(cmdrepr,"[%d %s:%d] ", dictid,ip,port); + } for (j = 0; j < argc; j++) { if (argv[j]->encoding == REDIS_ENCODING_INT) { From c25e7eafef350a985ae236987131c5d56b30cfef Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 13:05:46 +0100 Subject: [PATCH 20/70] Refuse writes if can't persist on disk. Redis now refuses accepting write queries if RDB persistence is configured, but RDB snapshots can't be generated for some reason. The status of the latest background save operation is now exposed in the INFO output as well. This fixes issue #90. --- src/rdb.c | 4 ++++ src/redis.c | 13 +++++++++++++ src/redis.h | 9 +++++---- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 3a10e0ce..840e9913 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -656,6 +656,7 @@ int rdbSave(char *filename) { redisLog(REDIS_NOTICE,"DB saved on disk"); server.dirty = 0; server.lastsave = time(NULL); + server.lastbgsave_status = REDIS_OK; return REDIS_OK; werr: @@ -1061,12 +1062,15 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) { "Background saving terminated with success"); server.dirty = server.dirty - server.dirty_before_bgsave; server.lastsave = time(NULL); + server.lastbgsave_status = REDIS_OK; } else if (!bysignal && exitcode != 0) { redisLog(REDIS_WARNING, "Background saving error"); + server.lastbgsave_status = REDIS_ERR; } else { redisLog(REDIS_WARNING, "Background saving terminated by signal %d", bysignal); rdbRemoveTempFile(server.rdb_child_pid); + server.lastbgsave_status = REDIS_ERR; } server.rdb_child_pid = -1; /* Possibly there are slaves waiting for a BGSAVE in order to be served diff --git a/src/redis.c b/src/redis.c index 6d5522e8..3dfef024 100644 --- a/src/redis.c +++ b/src/redis.c @@ -833,6 +833,8 @@ void createSharedObjects(void) { "-LOADING Redis is loading the dataset in memory\r\n")); shared.slowscripterr = createObject(REDIS_STRING,sdsnew( "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n")); + shared.bgsaveerr = createObject(REDIS_STRING,sdsnew( + "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Write commands are disabled. Please check Redis logs for details about the error.\r\n")); shared.space = createObject(REDIS_STRING,sdsnew(" ")); shared.colon = createObject(REDIS_STRING,sdsnew(":")); shared.plus = createObject(REDIS_STRING,sdsnew("+")); @@ -1088,6 +1090,7 @@ void initServer() { server.stat_fork_time = 0; server.stat_rejected_conn = 0; server.unixtime = time(NULL); + server.lastbgsave_status = REDIS_OK; aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL); if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE, acceptTcpHandler,NULL) == AE_ERR) oom("creating file event"); @@ -1374,6 +1377,14 @@ int processCommand(redisClient *c) { } } + /* Don't accept write commands if there are problems persisting on disk. */ + if (server.saveparamslen > 0 && server.lastbgsave_status == REDIS_ERR && + c->cmd->flags & REDIS_CMD_WRITE) + { + addReply(c, shared.bgsaveerr); + return REDIS_OK; + } + /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0) && @@ -1645,12 +1656,14 @@ sds genRedisInfoString(char *section) { "changes_since_last_save:%lld\r\n" "bgsave_in_progress:%d\r\n" "last_save_time:%ld\r\n" + "last_bgsave_status:%s\r\n" "bgrewriteaof_in_progress:%d\r\n", server.loading, server.aof_state != REDIS_AOF_OFF, server.dirty, server.rdb_child_pid != -1, server.lastsave, + server.lastbgsave_status == REDIS_OK ? "ok" : "err", server.aof_child_pid != -1); if (server.aof_state != REDIS_AOF_OFF) { diff --git a/src/redis.h b/src/redis.h index c18e9170..daaf362f 100644 --- a/src/redis.h +++ b/src/redis.h @@ -361,8 +361,8 @@ struct sharedObjectsStruct { robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space, *colon, *nullbulk, *nullmultibulk, *queued, *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr, - *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *plus, - *select0, *select1, *select2, *select3, *select4, + *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr, + *plus, *select0, *select1, *select2, *select3, *select4, *select5, *select6, *select7, *select8, *select9, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *rpop, *lpop, @@ -567,6 +567,7 @@ struct redisServer { char *requirepass; /* Pass for AUTH command, or NULL */ char *pidfile; /* PID file path */ int arch_bits; /* 32 or 64 depending on sizeof(long) */ + int cronloops; /* Number of times the cron function run */ /* Networking */ int port; /* TCP listening port */ char *bindaddr; /* Bind address or NULL */ @@ -587,8 +588,6 @@ struct redisServer { time_t loading_start_time; /* Fast pointers to often looked up command */ struct redisCommand *delCommand, *multiCommand, *lpushCommand; - int cronloops; /* Number of times the cron function run */ - time_t lastsave; /* Unix time of last save succeeede */ /* Fields used only for stats */ time_t stat_starttime; /* Server start time */ long long stat_numcommands; /* Number of processed commands */ @@ -636,6 +635,8 @@ struct redisServer { int saveparamslen; /* Number of saving points */ char *rdb_filename; /* Name of RDB file */ int rdb_compression; /* Use compression in RDB? */ + time_t lastsave; /* Unix time of last save succeeede */ + int lastbgsave_status; /* REDIS_OK or REDIS_ERR */ /* Propagation of commands in AOF / replication */ redisOpArray also_propagate; /* Additional command to propagate. */ /* Logging */ From 4d3bbf3590af7b575dd90512aa12706cea7ed899 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 7 Mar 2012 18:02:26 +0100 Subject: [PATCH 21/70] By default Redis refuses writes with an error if the latest BGSAVE failed (and at least one save point is configured). However people having good monitoring systems may prefer a server that continues to work, since they are notified that there are problems by their monitoring systems. This commit implements the ability to turn the feature on or off via redis.conf and CONFIG SET. --- redis.conf | 15 +++++++++++++++ src/config.c | 14 ++++++++++++++ src/redis.c | 5 ++++- src/redis.h | 1 + 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/redis.conf b/redis.conf index 4760f291..2b4b6479 100644 --- a/redis.conf +++ b/redis.conf @@ -93,6 +93,21 @@ save 900 1 save 300 10 save 60 10000 +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in an hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# distater will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usually even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + # Compress string objects using LZF when dump .rdb databases? # For default that's set to 'yes' as it's almost always a win. # If you want to save some CPU in the saving child set it to 'no' but diff --git a/src/config.c b/src/config.c index 9d28d678..6befca8f 100644 --- a/src/config.c +++ b/src/config.c @@ -336,6 +336,11 @@ void loadServerConfigFromString(char *config) { server.client_obuf_limits[class].hard_limit_bytes = hard; server.client_obuf_limits[class].soft_limit_bytes = soft; server.client_obuf_limits[class].soft_limit_seconds = soft_seconds; + } else if (!strcasecmp(argv[0],"stop-writes-on-bgsave-error") && + argc == 2) { + if ((server.stop_writes_on_bgsave_err = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else { err = "Bad directive or wrong number of arguments"; goto loaderr; } @@ -604,7 +609,11 @@ void configSetCommand(redisClient *c) { server.client_obuf_limits[class].soft_limit_seconds = soft_seconds; } sdsfreesplitres(v,vlen); + } else if (!strcasecmp(c->argv[2]->ptr,"stop-writes-on-bgsave-error")) { + int yn = yesnotoi(o->ptr); + if (yn == -1) goto badfmt; + server.stop_writes_on_bgsave_err = yn; } else { addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s", (char*)c->argv[2]->ptr); @@ -822,6 +831,11 @@ void configGetCommand(redisClient *c) { sdsfree(buf); matches++; } + if (stringmatch(pattern,"stop-writes-on-bgsave-error",0)) { + addReplyBulkCString(c,"stop-writes-on-bgsave-error"); + addReplyBulkCString(c,server.stop_writes_on_bgsave_err ? "yes" : "no"); + matches++; + } setDeferredMultiBulkLength(c,replylen,matches*2); } diff --git a/src/redis.c b/src/redis.c index 3dfef024..01ec0531 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1091,6 +1091,7 @@ void initServer() { server.stat_rejected_conn = 0; server.unixtime = time(NULL); server.lastbgsave_status = REDIS_OK; + server.stop_writes_on_bgsave_err = 1; aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL); if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE, acceptTcpHandler,NULL) == AE_ERR) oom("creating file event"); @@ -1378,7 +1379,9 @@ int processCommand(redisClient *c) { } /* Don't accept write commands if there are problems persisting on disk. */ - if (server.saveparamslen > 0 && server.lastbgsave_status == REDIS_ERR && + if (server.stop_writes_on_bgsave_err && + server.saveparamslen > 0 + && server.lastbgsave_status == REDIS_ERR && c->cmd->flags & REDIS_CMD_WRITE) { addReply(c, shared.bgsaveerr); diff --git a/src/redis.h b/src/redis.h index daaf362f..fdf7f08d 100644 --- a/src/redis.h +++ b/src/redis.h @@ -637,6 +637,7 @@ struct redisServer { int rdb_compression; /* Use compression in RDB? */ time_t lastsave; /* Unix time of last save succeeede */ int lastbgsave_status; /* REDIS_OK or REDIS_ERR */ + int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */ /* Propagation of commands in AOF / replication */ redisOpArray also_propagate; /* Additional command to propagate. */ /* Logging */ From 44f508f1a81941f298bdb0f2f87c4469094beeb6 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 8 Mar 2012 10:08:44 +0100 Subject: [PATCH 22/70] clusterGetRandomName() generalized into getRandomHexChars() so that we can use it for the run_id field as well. --- src/cluster.c | 16 +--------------- src/redis.h | 3 +++ src/util.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 85cb1198..f76e8ff5 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -19,20 +19,6 @@ int clusterAddSlot(clusterNode *n, int slot); * Initialization * -------------------------------------------------------------------------- */ -void clusterGetRandomName(char *p) { - FILE *fp = fopen("/dev/urandom","r"); - char *charset = "0123456789abcdef"; - int j; - - if (fp == NULL || fread(p,REDIS_CLUSTER_NAMELEN,1,fp) == 0) { - for (j = 0; j < REDIS_CLUSTER_NAMELEN; j++) - p[j] = rand(); - } - for (j = 0; j < REDIS_CLUSTER_NAMELEN; j++) - p[j] = charset[p[j] & 0x0F]; - fclose(fp); -} - int clusterLoadConfig(char *filename) { FILE *fp = fopen(filename,"r"); char *line; @@ -304,7 +290,7 @@ clusterNode *createClusterNode(char *nodename, int flags) { if (nodename) memcpy(node->name, nodename, REDIS_CLUSTER_NAMELEN); else - clusterGetRandomName(node->name); + getRandomHexChars(node->name, REDIS_CLUSTER_NAMELEN); node->flags = flags; memset(node->slots,0,sizeof(node->slots)); node->numslaves = 0; diff --git a/src/redis.h b/src/redis.h index fdf7f08d..3ecedd4c 100644 --- a/src/redis.h +++ b/src/redis.h @@ -58,6 +58,8 @@ #define REDIS_REPL_TIMEOUT 60 #define REDIS_REPL_PING_SLAVE_PERIOD 10 +#define REDIS_RUN_ID_SIZE 40 + /* Protocol and I/O related defines */ #define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */ #define REDIS_IOBUF_LEN (1024*16) /* Generic I/O buffer size */ @@ -813,6 +815,7 @@ dictType hashDictType; /* Utils */ long long ustime(void); long long mstime(void); +void getRandomHexChars(char *p, unsigned int len); /* networking.c -- Networking and Client related operations */ redisClient *createClient(int fd); diff --git a/src/util.c b/src/util.c index f5a23af2..b6ec2150 100644 --- a/src/util.c +++ b/src/util.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include "util.h" @@ -327,6 +329,52 @@ int d2string(char *buf, size_t len, double value) { return len; } +/* Generate the Redis "Run ID", a SHA1-sized random number that identifies a + * given execution of Redis, so that if you are talking with an instance + * having run_id == A, and you reconnect and it has run_id == B, you can be + * sure that it is either a different instance or it was restarted. */ +void getRandomHexChars(char *p, unsigned int len) { + FILE *fp = fopen("/dev/urandom","r"); + char *charset = "0123456789abcdef"; + unsigned int j; + + if (fp == NULL || fread(p,len,1,fp) == 0) { + /* If we can't read from /dev/urandom, do some reasonable effort + * in order to create some entropy, since this function is used to + * generate run_id and cluster instance IDs */ + char *x = p; + unsigned int l = len; + struct timeval tv; + pid_t pid = getpid(); + + /* Use time and PID to fill the initial array. */ + gettimeofday(&tv,NULL); + if (l >= sizeof(tv.tv_usec)) { + memcpy(x,&tv.tv_usec,sizeof(tv.tv_usec)); + l -= sizeof(tv.tv_usec); + x += sizeof(tv.tv_usec); + } + if (l >= sizeof(tv.tv_sec)) { + memcpy(x,&tv.tv_sec,sizeof(tv.tv_sec)); + l -= sizeof(tv.tv_sec); + x += sizeof(tv.tv_sec); + } + if (l >= sizeof(pid)) { + memcpy(x,&pid,sizeof(pid)); + l -= sizeof(pid); + x += sizeof(pid); + } + /* Finally xor it with rand() output, that was already seeded with + * time() at startup. */ + for (j = 0; j < len; j++) + p[j] ^= rand(); + } + /* Turn it into hex digits taking just 4 bits out of 8 for every byte. */ + for (j = 0; j < len; j++) + p[j] = charset[p[j] & 0x0F]; + fclose(fp); +} + #ifdef UTIL_TEST_MAIN #include From 91d664d6ce3d85a5b282a3945153d78e2474640f Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 8 Mar 2012 10:13:12 +0100 Subject: [PATCH 23/70] run_id added to INFO output. The Run ID is a field that identifies a single execution of the Redis server. It can be useful for many purposes as it makes easy to detect if the instance we are talking about is the same, or if it is a different one or was rebooted. An application of run_id will be in the partial synchronization of replication, where a slave may request a partial sync from a given offset only if it is talking with the same master. Another application is in failover and monitoring scripts. --- src/redis.c | 4 ++++ src/redis.h | 1 + 2 files changed, 5 insertions(+) diff --git a/src/redis.c b/src/redis.c index 01ec0531..acc01b4b 100644 --- a/src/redis.c +++ b/src/redis.c @@ -870,6 +870,8 @@ void createSharedObjects(void) { } void initServerConfig() { + getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE); + server.runid[REDIS_RUN_ID_SIZE] = '\0'; server.arch_bits = (sizeof(long) == 8) ? 64 : 32; server.port = REDIS_SERVERPORT; server.bindaddr = NULL; @@ -1585,6 +1587,7 @@ sds genRedisInfoString(char *section) { "multiplexing_api:%s\r\n" "gcc_version:%d.%d.%d\r\n" "process_id:%ld\r\n" + "run_id:%s\r\n" "tcp_port:%d\r\n" "uptime_in_seconds:%ld\r\n" "uptime_in_days:%ld\r\n" @@ -1600,6 +1603,7 @@ sds genRedisInfoString(char *section) { 0,0,0, #endif (long) getpid(), + server.runid, server.port, uptime, uptime/(3600*24), diff --git a/src/redis.h b/src/redis.h index 3ecedd4c..ee1cef4c 100644 --- a/src/redis.h +++ b/src/redis.h @@ -570,6 +570,7 @@ struct redisServer { char *pidfile; /* PID file path */ int arch_bits; /* 32 or 64 depending on sizeof(long) */ int cronloops; /* Number of times the cron function run */ + char runid[REDIS_RUN_ID_SIZE+1]; /* ID always different at every exec. */ /* Networking */ int port; /* TCP listening port */ char *bindaddr; /* Bind address or NULL */ From 0823e48fb9a7a43af3d108223a9c8952ac9dfc7a Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 8 Mar 2012 12:14:23 +0100 Subject: [PATCH 24/70] Support for all the redis.conf fields in CONFIG GET. config.c refactored a bit. --- src/config.c | 219 +++++++++++++++++++++++++-------------------------- 1 file changed, 108 insertions(+), 111 deletions(-) diff --git a/src/config.c b/src/config.c index 6befca8f..8d8fcda7 100644 --- a/src/config.c +++ b/src/config.c @@ -614,6 +614,12 @@ void configSetCommand(redisClient *c) { if (yn == -1) goto badfmt; server.stop_writes_on_bgsave_err = yn; + } else if (!strcasecmp(c->argv[2]->ptr,"repl-ping-slave-period")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt; + server.repl_ping_slave_period = ll; + } else if (!strcasecmp(c->argv[2]->ptr,"repl-timeout")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt; + server.repl_timeout = ll; } else { addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s", (char*)c->argv[2]->ptr); @@ -628,6 +634,31 @@ badfmt: /* Bad format errors */ (char*)c->argv[2]->ptr); } +#define config_get_string_field(_name,_var) do { \ + if (stringmatch(pattern,_name,0)) { \ + addReplyBulkCString(c,_name); \ + addReplyBulkCString(c,_var ? _var : ""); \ + matches++; \ + } \ +} while(0); + +#define config_get_bool_field(_name,_var) do { \ + if (stringmatch(pattern,_name,0)) { \ + addReplyBulkCString(c,_name); \ + addReplyBulkCString(c,_var ? "yes" : "no"); \ + matches++; \ + } \ +} while(0); + +#define config_get_numerical_field(_name,_var) do { \ + if (stringmatch(pattern,_name,0)) { \ + ll2string(buf,sizeof(buf),_var); \ + addReplyBulkCString(c,_name); \ + addReplyBulkCString(c,buf); \ + matches++; \ + } \ +} while(0); + void configGetCommand(redisClient *c) { robj *o = c->argv[2]; void *replylen = addDeferredMultiBulkLength(c); @@ -636,6 +667,66 @@ void configGetCommand(redisClient *c) { int matches = 0; redisAssertWithInfo(c,o,o->encoding == REDIS_ENCODING_RAW); + /* String values */ + config_get_string_field("dbfilename",server.rdb_filename); + config_get_string_field("requirepass",server.requirepass); + config_get_string_field("masterauth",server.requirepass); + config_get_string_field("bind",server.bindaddr); + config_get_string_field("unixsocket",server.unixsocket); + config_get_string_field("logfile",server.logfile); + config_get_string_field("pidfile",server.pidfile); + + /* Numerical values */ + config_get_numerical_field("maxmemory",server.maxmemory); + config_get_numerical_field("maxmemory-samples",server.maxmemory_samples); + config_get_numerical_field("timeout",server.maxidletime); + config_get_numerical_field("auto-aof-rewrite-percentage", + server.aof_rewrite_perc); + config_get_numerical_field("auto-aof-rewrite-min-size", + server.aof_rewrite_min_size); + config_get_numerical_field("hash-max-zipmap-entries", + server.hash_max_zipmap_entries); + config_get_numerical_field("hash-max-zipmap-value", + server.hash_max_zipmap_value); + config_get_numerical_field("list-max-ziplist-entries", + server.list_max_ziplist_entries); + config_get_numerical_field("list-max-ziplist-value", + server.list_max_ziplist_value); + config_get_numerical_field("set-max-intset-entries", + server.set_max_intset_entries); + config_get_numerical_field("zset-max-ziplist-entries", + server.zset_max_ziplist_entries); + config_get_numerical_field("zset-max-ziplist-value", + server.zset_max_ziplist_value); + config_get_numerical_field("lua-time-limit",server.lua_time_limit); + config_get_numerical_field("slowlog-log-slower-than", + server.slowlog_log_slower_than); + config_get_numerical_field("slowlog-max-len", + server.slowlog_max_len); + config_get_numerical_field("port",server.port); + config_get_numerical_field("databases",server.dbnum); + config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period); + config_get_numerical_field("repl-timeout",server.repl_timeout); + config_get_numerical_field("maxclients",server.maxclients); + + /* Bool (yes/no) values */ + config_get_bool_field("no-appendfsync-on-rewrite", + server.aof_no_fsync_on_rewrite); + config_get_bool_field("slave-serve-stale-data", + server.repl_serve_stale_data); + config_get_bool_field("stop-writes-on-bgsave-error", + server.stop_writes_on_bgsave_err); + config_get_bool_field("daemonize", server.daemonize); + config_get_bool_field("rdbcompression", server.rdb_compression); + config_get_bool_field("activerehashing", server.activerehashing); + + /* Everything we can't handle with macros follows. */ + + if (stringmatch(pattern,"appendonly",0)) { + addReplyBulkCString(c,"appendonly"); + addReplyBulkCString(c,server.aof_state == REDIS_AOF_OFF ? "no" : "yes"); + matches++; + } if (stringmatch(pattern,"dir",0)) { char buf[1024]; @@ -646,27 +737,6 @@ void configGetCommand(redisClient *c) { addReplyBulkCString(c,buf); matches++; } - if (stringmatch(pattern,"dbfilename",0)) { - addReplyBulkCString(c,"dbfilename"); - addReplyBulkCString(c,server.rdb_filename); - matches++; - } - if (stringmatch(pattern,"requirepass",0)) { - addReplyBulkCString(c,"requirepass"); - addReplyBulkCString(c,server.requirepass); - matches++; - } - if (stringmatch(pattern,"masterauth",0)) { - addReplyBulkCString(c,"masterauth"); - addReplyBulkCString(c,server.masterauth); - matches++; - } - if (stringmatch(pattern,"maxmemory",0)) { - ll2string(buf,sizeof(buf),server.maxmemory); - addReplyBulkCString(c,"maxmemory"); - addReplyBulkCString(c,buf); - matches++; - } if (stringmatch(pattern,"maxmemory-policy",0)) { char *s; @@ -683,28 +753,6 @@ void configGetCommand(redisClient *c) { addReplyBulkCString(c,s); matches++; } - if (stringmatch(pattern,"maxmemory-samples",0)) { - ll2string(buf,sizeof(buf),server.maxmemory_samples); - addReplyBulkCString(c,"maxmemory-samples"); - addReplyBulkCString(c,buf); - matches++; - } - if (stringmatch(pattern,"timeout",0)) { - ll2string(buf,sizeof(buf),server.maxidletime); - addReplyBulkCString(c,"timeout"); - addReplyBulkCString(c,buf); - matches++; - } - if (stringmatch(pattern,"appendonly",0)) { - addReplyBulkCString(c,"appendonly"); - addReplyBulkCString(c,server.aof_state == REDIS_AOF_OFF ? "no" : "yes"); - matches++; - } - if (stringmatch(pattern,"no-appendfsync-on-rewrite",0)) { - addReplyBulkCString(c,"no-appendfsync-on-rewrite"); - addReplyBulkCString(c,server.aof_no_fsync_on_rewrite ? "yes" : "no"); - matches++; - } if (stringmatch(pattern,"appendfsync",0)) { char *policy; @@ -734,71 +782,6 @@ void configGetCommand(redisClient *c) { sdsfree(buf); matches++; } - if (stringmatch(pattern,"auto-aof-rewrite-percentage",0)) { - addReplyBulkCString(c,"auto-aof-rewrite-percentage"); - addReplyBulkLongLong(c,server.aof_rewrite_perc); - matches++; - } - if (stringmatch(pattern,"auto-aof-rewrite-min-size",0)) { - addReplyBulkCString(c,"auto-aof-rewrite-min-size"); - addReplyBulkLongLong(c,server.aof_rewrite_min_size); - matches++; - } - if (stringmatch(pattern,"slave-serve-stale-data",0)) { - addReplyBulkCString(c,"slave-serve-stale-data"); - addReplyBulkCString(c,server.repl_serve_stale_data ? "yes" : "no"); - matches++; - } - if (stringmatch(pattern,"hash-max-zipmap-entries",0)) { - addReplyBulkCString(c,"hash-max-zipmap-entries"); - addReplyBulkLongLong(c,server.hash_max_zipmap_entries); - matches++; - } - if (stringmatch(pattern,"hash-max-zipmap-value",0)) { - addReplyBulkCString(c,"hash-max-zipmap-value"); - addReplyBulkLongLong(c,server.hash_max_zipmap_value); - matches++; - } - if (stringmatch(pattern,"list-max-ziplist-entries",0)) { - addReplyBulkCString(c,"list-max-ziplist-entries"); - addReplyBulkLongLong(c,server.list_max_ziplist_entries); - matches++; - } - if (stringmatch(pattern,"list-max-ziplist-value",0)) { - addReplyBulkCString(c,"list-max-ziplist-value"); - addReplyBulkLongLong(c,server.list_max_ziplist_value); - matches++; - } - if (stringmatch(pattern,"set-max-intset-entries",0)) { - addReplyBulkCString(c,"set-max-intset-entries"); - addReplyBulkLongLong(c,server.set_max_intset_entries); - matches++; - } - if (stringmatch(pattern,"zset-max-ziplist-entries",0)) { - addReplyBulkCString(c,"zset-max-ziplist-entries"); - addReplyBulkLongLong(c,server.zset_max_ziplist_entries); - matches++; - } - if (stringmatch(pattern,"zset-max-ziplist-value",0)) { - addReplyBulkCString(c,"zset-max-ziplist-value"); - addReplyBulkLongLong(c,server.zset_max_ziplist_value); - matches++; - } - if (stringmatch(pattern,"lua-time-limit",0)) { - addReplyBulkCString(c,"lua-time-limit"); - addReplyBulkLongLong(c,server.lua_time_limit); - matches++; - } - if (stringmatch(pattern,"slowlog-log-slower-than",0)) { - addReplyBulkCString(c,"slowlog-log-slower-than"); - addReplyBulkLongLong(c,server.slowlog_log_slower_than); - matches++; - } - if (stringmatch(pattern,"slowlog-max-len",0)) { - addReplyBulkCString(c,"slowlog-max-len"); - addReplyBulkLongLong(c,server.slowlog_max_len); - matches++; - } if (stringmatch(pattern,"loglevel",0)) { char *s; @@ -831,9 +814,23 @@ void configGetCommand(redisClient *c) { sdsfree(buf); matches++; } - if (stringmatch(pattern,"stop-writes-on-bgsave-error",0)) { - addReplyBulkCString(c,"stop-writes-on-bgsave-error"); - addReplyBulkCString(c,server.stop_writes_on_bgsave_err ? "yes" : "no"); + if (stringmatch(pattern,"unixsocketperm",0)) { + char buf[32]; + snprintf(buf,sizeof(buf),"%o",server.unixsocketperm); + addReplyBulkCString(c,"unixsocketperm"); + addReplyBulkCString(c,buf); + matches++; + } + if (stringmatch(pattern,"slaveof",0)) { + char buf[256]; + + addReplyBulkCString(c,"slaveof"); + if (server.masterhost) + snprintf(buf,sizeof(buf),"%s %d", + server.masterhost, server.masterport); + else + buf[0] = '\0'; + addReplyBulkCString(c,buf); matches++; } setDeferredMultiBulkLength(c,replylen,matches*2); From 250e7f690825f506306e3c091ba4465c206726bd Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 8 Mar 2012 16:15:37 +0100 Subject: [PATCH 25/70] Instantaneous ops/sec figure in INFO output. --- src/redis.c | 33 +++++++++++++++++++++++++++++++++ src/redis.h | 7 +++++++ 2 files changed, 40 insertions(+) diff --git a/src/redis.c b/src/redis.c index acc01b4b..03dc2ed1 100644 --- a/src/redis.c +++ b/src/redis.c @@ -616,6 +616,31 @@ void updateLRUClock(void) { REDIS_LRU_CLOCK_MAX; } + +/* Add a sample to the operations per second array of samples. */ +void trackOperationsPerSecond(void) { + long long t = mstime() - server.ops_sec_last_sample_time; + long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops; + long long ops_sec; + + ops_sec = t > 0 ? (ops*1000/t) : 0; + + server.ops_sec_samples[server.ops_sec_idx] = ops_sec; + server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES; + server.ops_sec_last_sample_time = mstime(); + server.ops_sec_last_sample_ops = server.stat_numcommands; +} + +/* Return the mean of all the samples. */ +long long getOperationsPerSecond(void) { + int j; + long long sum = 0; + + for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++) + sum += server.ops_sec_samples[j]; + return sum / REDIS_OPS_SEC_SAMPLES; +} + int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { int j, loops = server.cronloops; REDIS_NOTUSED(eventLoop); @@ -628,6 +653,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * To access a global var is faster than calling time(NULL) */ server.unixtime = time(NULL); + trackOperationsPerSecond(); + /* We have just 22 bits per object for LRU information. * So we use an (eventually wrapping) LRU clock with 10 seconds resolution. * 2^22 bits with 10 seconds resoluton is more or less 1.5 years. @@ -1091,6 +1118,10 @@ void initServer() { server.stat_peak_memory = 0; server.stat_fork_time = 0; server.stat_rejected_conn = 0; + memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples)); + server.ops_sec_idx = 0; + server.ops_sec_last_sample_time = mstime(); + server.ops_sec_last_sample_ops = 0; server.unixtime = time(NULL); server.lastbgsave_status = REDIS_OK; server.stop_writes_on_bgsave_err = 1; @@ -1726,6 +1757,7 @@ sds genRedisInfoString(char *section) { "# Stats\r\n" "total_connections_received:%lld\r\n" "total_commands_processed:%lld\r\n" + "instantaneous_ops_per_sec:%lld\r\n" "rejected_connections:%lld\r\n" "expired_keys:%lld\r\n" "evicted_keys:%lld\r\n" @@ -1736,6 +1768,7 @@ sds genRedisInfoString(char *section) { "latest_fork_usec:%lld\r\n", server.stat_numconnections, server.stat_numcommands, + getOperationsPerSecond(), server.stat_rejected_conn, server.stat_expiredkeys, server.stat_evictedkeys, diff --git a/src/redis.h b/src/redis.h index ee1cef4c..8d492596 100644 --- a/src/redis.h +++ b/src/redis.h @@ -59,6 +59,7 @@ #define REDIS_REPL_PING_SLAVE_PERIOD 10 #define REDIS_RUN_ID_SIZE 40 +#define REDIS_OPS_SEC_SAMPLES 16 /* Protocol and I/O related defines */ #define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */ @@ -606,6 +607,12 @@ struct redisServer { long long slowlog_entry_id; /* SLOWLOG current entry ID */ long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */ + /* The following two are used to track instantaneous "load" in terms + * of operations per second. */ + long long ops_sec_last_sample_time; /* Timestamp of last sample (in ms) */ + long long ops_sec_last_sample_ops; /* numcommands in last sample */ + long long ops_sec_samples[REDIS_OPS_SEC_SAMPLES]; + int ops_sec_idx; /* Configuration */ int verbosity; /* Loglevel in redis.conf */ int maxidletime; /* Client timeout in seconds */ From addc032756e338731c528d5868cf73b71ea2598e Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 10:36:51 +0100 Subject: [PATCH 26/70] Added a top-function comment to rioWriteHashIteratorCursor() to better specify what the function does. Not immediately clear from the name. --- src/aof.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/aof.c b/src/aof.c index 03e32491..64cd76d3 100644 --- a/src/aof.c +++ b/src/aof.c @@ -609,6 +609,12 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) { return 1; } +/* Write either the key or the value of the currently selected item of an hash. + * The 'hi' argument passes a valid Redis hash iterator. + * The 'what' filed specifies if to write a key or a value and can be + * either REDIS_HASH_KEY or REDIS_HASH_VALUE. + * + * The function returns 0 on error, non-zero on success. */ static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) { if (hi->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *vstr = NULL; From d22248ae99543e74a93c88b9311f23a56231c83c Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 10:38:53 +0100 Subject: [PATCH 27/70] Removed handling of deprecated hash-max-zipmap-entries nad hash-map-zipmap-value. Pieter is too good with users ;). Better to have them switch to a saner configuration ASAP after the 2.6 upgrade. --- 00-RELEASENOTES | 6 ++++++ src/config.c | 8 -------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/00-RELEASENOTES b/00-RELEASENOTES index 49221f2c..8a225b71 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -22,6 +22,12 @@ but it is better to keep them in mind: starting with '#'. All the major clients should be already fixed to work with the new INFO format. +Also the following redis.conf and CONFIG GET / SET parameters changed name: + + * hash-max-zipmap-entries, now replaced by hash-max-ziplist-entries + * hash-max-zipmap-value, now replaced by hash-max-ziplist-value + * glueoutputbuf was no completely removed as it does not make sense + --------- CHANGELOG --------- diff --git a/src/config.c b/src/config.c index d84cd474..533a2a57 100644 --- a/src/config.c +++ b/src/config.c @@ -202,8 +202,6 @@ void loadServerConfigFromString(char *config) { if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } - } else if (!strcasecmp(argv[0],"glueoutputbuf")) { - redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) { if ((server.rdb_compression = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -262,12 +260,6 @@ void loadServerConfigFromString(char *config) { } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) { zfree(server.rdb_filename); server.rdb_filename = zstrdup(argv[1]); - } else if (!strcasecmp(argv[0],"hash-max-zipmap-entries") && argc == 2) { - redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); - server.hash_max_ziplist_entries = memtoll(argv[1], NULL); - } else if (!strcasecmp(argv[0],"hash-max-zipmap-value") && argc == 2) { - redisLog(REDIS_WARNING, "Deprecated configuration directive: \"%s\"", argv[0]); - server.hash_max_ziplist_value = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"hash-max-ziplist-entries") && argc == 2) { server.hash_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) { From c0caa1cf542d452fdc1eb74a7e5511c683ec5d5d Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 11:09:43 +0100 Subject: [PATCH 28/70] Minor code aesthetic change to use Redis code base style rule of saving vertical space when possible. --- src/t_hash.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index f0ecefc3..b09ff41e 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -75,10 +75,7 @@ int hashTypeGetFromHashTable(robj *o, robj *field, robj **value) { redisAssert(o->encoding == REDIS_ENCODING_HT); de = dictFind(o->ptr, field); - if (de == NULL) { - return -1; - } - + if (de == NULL) return -1; *value = dictGetVal(de); return 0; } From 753bb3dcbda2fbb91539057e300b999bc750c9e0 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 11:11:28 +0100 Subject: [PATCH 29/70] More vertical space saved. --- src/t_hash.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index b09ff41e..b3928450 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -109,11 +109,9 @@ robj *hashTypeGetObject(robj *o, robj *field) { incrRefCount(aux); value = aux; } - } else { redisPanic("Unknown hash encoding"); } - return value; } @@ -125,21 +123,14 @@ int hashTypeExists(robj *o, robj *field) { unsigned int vlen = UINT_MAX; long long vll = LLONG_MAX; - if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) { - return 1; - } - + if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) return 1; } else if (o->encoding == REDIS_ENCODING_HT) { robj *aux; - if (hashTypeGetFromHashTable(o, field, &aux) == 0) { - return 1; - } - + if (hashTypeGetFromHashTable(o, field, &aux) == 0) return 1; } else { redisPanic("Unknown hash encoding"); } - return 0; } @@ -303,10 +294,7 @@ int hashTypeNext(hashTypeIterator *hi) { redisAssert(vptr != NULL); fptr = ziplistNext(zl, vptr); } - - if (fptr == NULL) { - return REDIS_ERR; - } + if (fptr == NULL) return REDIS_ERR; /* Grab pointer to the value (fptr points to the field) */ vptr = ziplistNext(zl, fptr); @@ -315,16 +303,11 @@ int hashTypeNext(hashTypeIterator *hi) { /* fptr, vptr now point to the first or next pair */ hi->fptr = fptr; hi->vptr = vptr; - } else if (hi->encoding == REDIS_ENCODING_HT) { - if ((hi->de = dictNext(hi->di)) == NULL) { - return REDIS_ERR; - } - + if ((hi->de = dictNext(hi->di)) == NULL) return REDIS_ERR; } else { redisPanic("Unknown hash encoding"); } - return REDIS_OK; } From 87faf90696d549e69a8864026dfe6fc435eeec0d Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 12:14:17 +0100 Subject: [PATCH 30/70] hash-max-zipmap-... renamed hash-max-ziplist-... in defalt conf for tests. --- tests/assets/default.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/assets/default.conf b/tests/assets/default.conf index 75334426..976852e9 100644 --- a/tests/assets/default.conf +++ b/tests/assets/default.conf @@ -297,8 +297,8 @@ no-appendfsync-on-rewrite no # have at max a given numer of elements, and the biggest element does not # exceed a given threshold. You can configure this limits with the following # configuration directives. -hash-max-zipmap-entries 64 -hash-max-zipmap-value 512 +hash-max-ziplist-entries 64 +hash-max-ziplist-value 512 # Similarly to hashes, small lists are also encoded in a special way in order # to save a lot of space. The special representation is only used when From c7d7d0a80ff90f2daac71df648dbce09c34d4ef2 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 12:35:15 +0100 Subject: [PATCH 31/70] RDB version is no 4, because small hashes are now encoded as ziplists, so older versions of Redis will not understand this format. --- src/rdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 113856d4..518fef02 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -616,7 +616,7 @@ int rdbSave(char *filename) { } rioInitWithFile(&rdb,fp); - if (rdbWriteRaw(&rdb,"REDIS0003",9) == -1) goto werr; + if (rdbWriteRaw(&rdb,"REDIS0004",9) == -1) goto werr; for (j = 0; j < server.dbnum; j++) { redisDb *db = server.db+j; @@ -1023,7 +1023,7 @@ int rdbLoad(char *filename) { return REDIS_ERR; } rdbver = atoi(buf+5); - if (rdbver < 1 || rdbver > 3) { + if (rdbver < 1 || rdbver > 4) { fclose(fp); redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver); errno = EINVAL; From f12d0224f34e1aa680aeebf581dc4d90a4d7d47e Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 12:38:42 +0100 Subject: [PATCH 32/70] RDB4 support in redis-check-dump. --- src/redis-check-dump.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/redis-check-dump.c b/src/redis-check-dump.c index 77be686c..42fe5181 100644 --- a/src/redis-check-dump.c +++ b/src/redis-check-dump.c @@ -20,6 +20,7 @@ #define REDIS_LIST_ZIPLIST 10 #define REDIS_SET_INTSET 11 #define REDIS_ZSET_ZIPLIST 12 +#define REDIS_HASH_ZIPLIST 13 /* Objects encoding. Some kind of objects like Strings and Hashes can be * internally represented in multiple ways. The 'encoding' field of the object @@ -136,7 +137,7 @@ int processHeader() { } dump_version = (int)strtol(buf + 5, NULL, 10); - if (dump_version < 1 || dump_version > 2) { + if (dump_version < 1 || dump_version > 4) { ERROR("Unknown RDB format version: %d\n", dump_version); } return 1; @@ -384,6 +385,7 @@ int loadPair(entry *e) { case REDIS_LIST_ZIPLIST: case REDIS_SET_INTSET: case REDIS_ZSET_ZIPLIST: + case REDIS_HASH_ZIPLIST: if (!processStringObject(NULL)) { SHIFT_ERROR(offset, "Error reading entry value"); return 0; From 64b4c33c0b144a8a3d3600dd056f4c6bfd379098 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 10 Mar 2012 12:40:03 +0100 Subject: [PATCH 33/70] Build dependencies updated. --- src/Makefile | 61 +++++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/src/Makefile b/src/Makefile index cca785cb..f86ea859 100644 --- a/src/Makefile +++ b/src/Makefile @@ -98,35 +98,39 @@ ae_kqueue.o: ae_kqueue.c ae_select.o: ae_select.c anet.o: anet.c fmacros.h anet.h aof.o: aof.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h bio.h bio.o: bio.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h bio.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h bio.h cluster.o: cluster.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h config.o: config.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h crc16.o: crc16.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h db.o: db.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h debug.o: debug.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h sha1.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h sha1.h dict.o: dict.c fmacros.h dict.h zmalloc.h endianconv.o: endianconv.c intset.o: intset.c intset.h zmalloc.h endianconv.h lzf_c.o: lzf_c.c lzfP.h lzf_d.o: lzf_d.c lzfP.h multi.o: multi.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h networking.o: networking.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h object.o: object.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h pqsort.o: pqsort.c pubsub.o: pubsub.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h +rand.o: rand.c rdb.o: rdb.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h lzf.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h lzf.h \ + zipmap.h redis-benchmark.o: redis-benchmark.c fmacros.h ae.h \ ../deps/hiredis/hiredis.h sds.h adlist.h zmalloc.h redis-check-aof.o: redis-check-aof.c fmacros.h config.h @@ -134,34 +138,37 @@ redis-check-dump.o: redis-check-dump.c lzf.h redis-cli.o: redis-cli.c fmacros.h version.h ../deps/hiredis/hiredis.h \ sds.h zmalloc.h ../deps/linenoise/linenoise.h help.h redis.o: redis.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h slowlog.h \ - bio.h asciilogo.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h \ + slowlog.h bio.h asciilogo.h release.o: release.c release.h replication.o: replication.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h +rio.o: rio.c fmacros.h rio.h sds.h util.h scripting.o: scripting.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h \ - sha1.h rand.h -rio.o: rio.c sds.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h sha1.h rand.h sds.o: sds.c sds.h zmalloc.h sha1.o: sha1.c sha1.h config.h slowlog.o: slowlog.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h \ - slowlog.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h slowlog.h sort.o: sort.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h pqsort.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h \ + pqsort.h syncio.o: syncio.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h t_hash.o: t_hash.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h t_list.o: t_list.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h t_set.o: t_set.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h t_string.o: t_string.c redis.h fmacros.h config.h ae.h sds.h dict.h \ - adlist.h zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \ + rio.h t_zset.o: t_zset.c redis.h fmacros.h config.h ae.h sds.h dict.h adlist.h \ - zmalloc.h anet.h zipmap.h ziplist.h intset.h version.h util.h + zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h rio.h util.o: util.c fmacros.h util.h ziplist.o: ziplist.c zmalloc.h util.h ziplist.h endianconv.h zipmap.o: zipmap.c zmalloc.h endianconv.h From 96e9f8d5e3dc862e4590375de5ca9770e151ea84 Mon Sep 17 00:00:00 2001 From: quiver Date: Sat, 10 Mar 2012 21:06:08 +0900 Subject: [PATCH 34/70] fix typo of redis.conf --- redis.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redis.conf b/redis.conf index 80e14ad9..e0335996 100644 --- a/redis.conf +++ b/redis.conf @@ -246,7 +246,7 @@ slave-serve-stale-data yes # volatile-lru -> remove the key with an expire set using an LRU algorithm # allkeys-lru -> remove any key accordingly to the LRU algorithm # volatile-random -> remove a random key with an expire set -# allkeys->random -> remove a random key, any key +# allkeys-random -> remove a random key, any key # volatile-ttl -> remove the key with the nearest expire time (minor TTL) # noeviction -> don't expire at all, just return an error on write operations # From c3c856228da2f0d73c608f38395991eecf735606 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 09:49:11 +0100 Subject: [PATCH 35/70] RDB hashes loading fixed removing the assertion that failed every time an HT-encoded hash was loaded. --- src/rdb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 518fef02..4a56659d 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -844,9 +844,10 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { hashTypeConvert(o, REDIS_ENCODING_HT); /* Load every field and value into the ziplist */ - while (o->encoding == REDIS_ENCODING_ZIPLIST && len-- > 0) { + while (o->encoding == REDIS_ENCODING_ZIPLIST && len > 0) { robj *field, *value; + len--; /* Load raw strings */ field = rdbLoadStringObject(rdb); if (field == NULL) return NULL; @@ -869,9 +870,10 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { } /* Load remaining fields and values into the hash table */ - while (o->encoding == REDIS_ENCODING_HT && len-- > 0) { + while (o->encoding == REDIS_ENCODING_HT && len > 0) { robj *field, *value; + len--; /* Load encoded strings */ field = rdbLoadEncodedStringObject(rdb); if (field == NULL) return NULL; From c3e7441dadf1d287c76c1c0a3cab68f15a875dbd Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 10:59:29 +0100 Subject: [PATCH 36/70] RDB hashes loading, fixed another bug in the loading of HT-encoded hashes: when the hash entry is too big for ziplist, add the field, then convert. The code used to break before the new entry was inserted, resulting into missing fields in the loaded Hash object. --- src/rdb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 4a56659d..519b645d 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -856,6 +856,10 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { if (value == NULL) return NULL; redisAssert(field->encoding == REDIS_ENCODING_RAW); + /* Add pair to ziplist */ + o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); + o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); + /* Convert to hash table if size threshold is exceeded */ if (sdslen(field->ptr) > server.hash_max_ziplist_value || sdslen(value->ptr) > server.hash_max_ziplist_value) @@ -863,10 +867,6 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { hashTypeConvert(o, REDIS_ENCODING_HT); break; } - - /* Add pair to ziplist */ - o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); - o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); } /* Load remaining fields and values into the hash table */ From 573373802ef8699503a90d5559c54d9e03ff2244 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 12:59:27 +0100 Subject: [PATCH 37/70] c->bufpos initialization moved for aesthetics. --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 40aad836..14c5dcce 100644 --- a/src/networking.c +++ b/src/networking.c @@ -22,7 +22,6 @@ int listMatchObjects(void *a, void *b) { redisClient *createClient(int fd) { redisClient *c = zmalloc(sizeof(redisClient)); - c->bufpos = 0; /* passing -1 as fd it is possible to create a non connected client. * This is useful since all the Redis commands needs to be executed @@ -42,6 +41,7 @@ redisClient *createClient(int fd) { selectDb(c,0); c->fd = fd; + c->bufpos = 0; c->querybuf = sdsempty(); c->reqtype = 0; c->argc = 0; From e74dca73d9818b42e7ee34c0a686133813e8d3a3 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 13:05:08 +0100 Subject: [PATCH 38/70] Client creation time in redisClient structure. New age field in CLIENT LIST output. --- src/networking.c | 5 +++-- src/redis.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 14c5dcce..b0f45054 100644 --- a/src/networking.c +++ b/src/networking.c @@ -51,7 +51,7 @@ redisClient *createClient(int fd) { c->bulklen = -1; c->sentlen = 0; c->flags = 0; - c->lastinteraction = time(NULL); + c->ctime = c->lastinteraction = time(NULL); c->authenticated = 0; c->replstate = REDIS_REPL_NONE; c->reply = listCreate(); @@ -1111,8 +1111,9 @@ sds getClientInfoString(redisClient *client) { if (emask & AE_WRITABLE) *p++ = 'w'; *p = '\0'; return sdscatprintf(sdsempty(), - "addr=%s:%d fd=%d idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", + "addr=%s:%d fd=%d age=%ld idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", ip,port,client->fd, + (long)(now - client->ctime), (long)(now - client->lastinteraction), flags, client->db->id, diff --git a/src/redis.h b/src/redis.h index 6ead029d..1d1f572b 100644 --- a/src/redis.h +++ b/src/redis.h @@ -332,6 +332,7 @@ typedef struct redisClient { list *reply; unsigned long reply_bytes; /* Tot bytes of objects in reply list */ int sentlen; + time_t ctime; /* Client creation time */ time_t lastinteraction; /* time of the last interaction, used for timeout */ time_t obuf_soft_limit_reached_time; int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ From bbaeda402cac1b66b52a869bfc419001d79ffcc0 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 13:26:33 +0100 Subject: [PATCH 39/70] Added a qbuf-free field to CLIENT LIST output. --- src/networking.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index b0f45054..06097b58 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1111,7 +1111,7 @@ sds getClientInfoString(redisClient *client) { if (emask & AE_WRITABLE) *p++ = 'w'; *p = '\0'; return sdscatprintf(sdsempty(), - "addr=%s:%d fd=%d age=%ld idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", + "addr=%s:%d fd=%d age=%ld idle=%ld flags=%s db=%d sub=%d psub=%d qbuf=%lu qbuf-free=%lu obl=%lu oll=%lu omem=%lu events=%s cmd=%s", ip,port,client->fd, (long)(now - client->ctime), (long)(now - client->lastinteraction), @@ -1120,6 +1120,7 @@ sds getClientInfoString(redisClient *client) { (int) dictSize(client->pubsub_channels), (int) listLength(client->pubsub_patterns), (unsigned long) sdslen(client->querybuf), + (unsigned long) sdsavail(client->querybuf), (unsigned long) client->bufpos, (unsigned long) listLength(client->reply), getClientOutputBufferMemoryUsage(client), From d19015be12c98f329cdaab039b843c3bf8931916 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 18:05:11 +0100 Subject: [PATCH 40/70] Process async client checks like client timeouts and BLPOP timeouts incrementally using a circular list. --- src/adlist.c | 16 ++++++++++++++++ src/adlist.h | 1 + src/networking.c | 28 --------------------------- src/redis.c | 49 +++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 63 insertions(+), 31 deletions(-) diff --git a/src/adlist.c b/src/adlist.c index 51ba03bd..e48957e3 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -323,3 +323,19 @@ listNode *listIndex(list *list, long index) { } return n; } + +/* Rotate the list removing the tail node and inserting it to the head. */ +void listRotate(list *list) { + listNode *tail = list->tail; + + if (listLength(list) <= 1) return; + + /* Detatch current tail */ + list->tail = tail->prev; + list->tail->next = NULL; + /* Move it as head */ + list->head->prev = tail; + tail->prev = NULL; + tail->next = list->head; + list->head = tail; +} diff --git a/src/adlist.h b/src/adlist.h index 36dba1ff..259bd0f8 100644 --- a/src/adlist.h +++ b/src/adlist.h @@ -84,6 +84,7 @@ listNode *listSearchKey(list *list, void *key); listNode *listIndex(list *list, long index); void listRewind(list *list, listIter *li); void listRewindTail(list *list, listIter *li); +void listRotate(list *list); /* Directions for iterators */ #define AL_START_HEAD 0 diff --git a/src/networking.c b/src/networking.c index 06097b58..ae77d11b 100644 --- a/src/networking.c +++ b/src/networking.c @@ -751,34 +751,6 @@ void resetClient(redisClient *c) { if (!(c->flags & REDIS_MULTI)) c->flags &= (~REDIS_ASKING); } -void closeTimedoutClients(void) { - redisClient *c; - listNode *ln; - time_t now = time(NULL); - listIter li; - - listRewind(server.clients,&li); - while ((ln = listNext(&li)) != NULL) { - c = listNodeValue(ln); - if (server.maxidletime && - !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */ - !(c->flags & REDIS_MASTER) && /* no timeout for masters */ - !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */ - dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */ - listLength(c->pubsub_patterns) == 0 && - (now - c->lastinteraction > server.maxidletime)) - { - redisLog(REDIS_VERBOSE,"Closing idle client"); - freeClient(c); - } else if (c->flags & REDIS_BLOCKED) { - if (c->bpop.timeout != 0 && c->bpop.timeout < now) { - addReply(c,shared.nullmultibulk); - unblockClientWaitingData(c); - } - } - } -} - int processInlineBuffer(redisClient *c) { char *newline = strstr(c->querybuf,"\r\n"); int argc, j; diff --git a/src/redis.c b/src/redis.c index 3294eea4..5741bc90 100644 --- a/src/redis.c +++ b/src/redis.c @@ -641,6 +641,50 @@ long long getOperationsPerSecond(void) { return sum / REDIS_OPS_SEC_SAMPLES; } +void closeTimedoutClient(redisClient *c) { + time_t now = time(NULL); + + if (server.maxidletime && + !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */ + !(c->flags & REDIS_MASTER) && /* no timeout for masters */ + !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */ + dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */ + listLength(c->pubsub_patterns) == 0 && + (now - c->lastinteraction > server.maxidletime)) + { + redisLog(REDIS_VERBOSE,"Closing idle client"); + freeClient(c); + } else if (c->flags & REDIS_BLOCKED) { + if (c->bpop.timeout != 0 && c->bpop.timeout < now) { + addReply(c,shared.nullmultibulk); + unblockClientWaitingData(c); + } + } +} + +void clientsCron(void) { + /* Make sure to process at least 1/100 of clients per call. + * Since this function is called 10 times per second we are sure that + * in the worst case we process all the clients in 10 seconds. + * In normal conditions (a reasonable number of clients) we process + * all the clients in a shorter time. */ + int iterations = listLength(server.clients)/100; + if (iterations < 50) iterations = 50; + + while(listLength(server.clients) && iterations--) { + redisClient *c; + listNode *head; + + /* Rotate the list, take the current head, process. + * This way if the client must be removed from the list it's the + * first element and we don't incur into O(N) computation. */ + listRotate(server.clients); + head = listFirst(server.clients); + c = listNodeValue(head); + closeTimedoutClient(c); + } +} + int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { int j, loops = server.cronloops; REDIS_NOTUSED(eventLoop); @@ -712,9 +756,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { zmalloc_used_memory()); } - /* Close connections of timedout clients */ - if ((server.maxidletime && !(loops % 100)) || server.bpop_blocked_clients) - closeTimedoutClients(); + /* We need to do a few operations on clients asynchronously. */ + clientsCron(); /* Start a scheduled AOF rewrite if this was requested by the user while * a BGSAVE was in progress. */ From 6df450b77c34729aa7a1138051bbaf0acd48563c Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 13 Mar 2012 18:06:29 +0100 Subject: [PATCH 41/70] CLIENT LIST test modified to reflect the new output. --- tests/unit/introspection.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl index 3daa65e0..a768e2ab 100644 --- a/tests/unit/introspection.tcl +++ b/tests/unit/introspection.tcl @@ -1,5 +1,5 @@ start_server {tags {"introspection"}} { test {CLIENT LIST} { r client list - } {*addr=*:* fd=* idle=* flags=N db=9 sub=0 psub=0 qbuf=0 obl=0 oll=0 omem=0 events=r cmd=client*} + } {*addr=*:* fd=* age=* idle=* flags=N db=9 sub=0 psub=0 qbuf=0 qbuf-free=* obl=0 oll=0 omem=0 events=r cmd=client*} } From 529bde82ecf1e566f2db103e5c49402262d3638c Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Mar 2012 09:56:22 +0100 Subject: [PATCH 42/70] Call all the helper functions needed by clientsCron() as clientsCronSomething() for clarity. --- src/redis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index 5741bc90..f46f2376 100644 --- a/src/redis.c +++ b/src/redis.c @@ -641,7 +641,7 @@ long long getOperationsPerSecond(void) { return sum / REDIS_OPS_SEC_SAMPLES; } -void closeTimedoutClient(redisClient *c) { +void clientsCronHandleTimeout(redisClient *c) { time_t now = time(NULL); if (server.maxidletime && @@ -681,7 +681,7 @@ void clientsCron(void) { listRotate(server.clients); head = listFirst(server.clients); c = listNodeValue(head); - closeTimedoutClient(c); + clientsCronHandleTimeout(c); } } From 9555f8f21b9f1780de307c19da268ef63f7c2ae9 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Mar 2012 10:13:23 +0100 Subject: [PATCH 43/70] sds.c new function sdsRemoveFreeSpace(). The new function is used in order to resize the string allocation so that only the minimal allocation possible is used, removing all the free space at the end of the string normally used to improve efficiency of concatenation operations. --- src/sds.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/sds.c b/src/sds.c index 092a431e..82d12e23 100644 --- a/src/sds.c +++ b/src/sds.c @@ -111,6 +111,18 @@ sds sdsMakeRoomFor(sds s, size_t addlen) { return newsh->buf; } +/* Reallocate the sds string so that it has no free space at the end. The + * contained string remains not altered, but next concatenation operations + * will require a reallocation. */ +sds sdsRemoveFreeSpace(sds s) { + struct sdshdr *sh; + + sh = (void*) (s-(sizeof(struct sdshdr))); + sh = zrealloc(sh, sizeof(struct sdshdr)+sh->len+1); + sh->free = 0; + return sh->buf; +} + /* Increment the sds length and decrements the left free space at the * end of the string accordingly to 'incr'. Also set the null term * in the new end of the string. From 739803c06403481f32534a4ab5f5735fa1b52e6f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Mar 2012 14:58:26 +0100 Subject: [PATCH 44/70] sds.c: sdsAllocSize() function added. --- src/sds.c | 6 ++++++ src/sds.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/sds.c b/src/sds.c index 82d12e23..bc6aa6b2 100644 --- a/src/sds.c +++ b/src/sds.c @@ -123,6 +123,12 @@ sds sdsRemoveFreeSpace(sds s) { return sh->buf; } +size_t sdsAllocSize(sds s) { + struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr))); + + return sizeof(*sh)+sh->len+sh->free+1; +} + /* Increment the sds length and decrements the left free space at the * end of the string accordingly to 'incr'. Also set the null term * in the new end of the string. diff --git a/src/sds.h b/src/sds.h index b00551b4..0648381b 100644 --- a/src/sds.h +++ b/src/sds.h @@ -94,5 +94,7 @@ sds sdsmapchars(sds s, char *from, char *to, size_t setlen); /* Low level functions exposed to the user API */ sds sdsMakeRoomFor(sds s, size_t addlen); void sdsIncrLen(sds s, int incr); +sds sdsRemoveFreeSpace(sds s); +size_t sdsAllocSize(sds s); #endif From ae22bf1ef6dbfa07a485f61c119eb2e275a7916f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 Mar 2012 15:32:30 +0100 Subject: [PATCH 45/70] Reclaim space from the client querybuf if needed. --- src/aof.c | 1 + src/networking.c | 2 ++ src/redis.c | 32 +++++++++++++++++++++++++++++--- src/redis.h | 1 + 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/aof.c b/src/aof.c index 64cd76d3..83633217 100644 --- a/src/aof.c +++ b/src/aof.c @@ -287,6 +287,7 @@ struct redisClient *createFakeClient(void) { selectDb(c,0); c->fd = -1; c->querybuf = sdsempty(); + c->querybuf_peak = 0; c->argc = 0; c->argv = NULL; c->bufpos = 0; diff --git a/src/networking.c b/src/networking.c index ae77d11b..375186d1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -43,6 +43,7 @@ redisClient *createClient(int fd) { c->fd = fd; c->bufpos = 0; c->querybuf = sdsempty(); + c->querybuf_peak = 0; c->reqtype = 0; c->argc = 0; c->argv = NULL; @@ -998,6 +999,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { } qblen = sdslen(c->querybuf); + if (c->querybuf_peak < qblen) c->querybuf_peak = qblen; c->querybuf = sdsMakeRoomFor(c->querybuf, readlen); nread = read(fd, c->querybuf+qblen, readlen); if (nread == -1) { diff --git a/src/redis.c b/src/redis.c index f46f2376..5388f5b7 100644 --- a/src/redis.c +++ b/src/redis.c @@ -642,7 +642,7 @@ long long getOperationsPerSecond(void) { } void clientsCronHandleTimeout(redisClient *c) { - time_t now = time(NULL); + time_t now = server.unixtime; if (server.maxidletime && !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */ @@ -662,15 +662,40 @@ void clientsCronHandleTimeout(redisClient *c) { } } +/* The client query buffer is an sds.c string that can end with a lot of + * free space not used, this function reclaims space if needed. */ +void clientsCronResizeQueryBuffer(redisClient *c) { + size_t querybuf_size = sdsAllocSize(c->querybuf); + time_t idletime = server.unixtime - c->lastinteraction; + + /* There are two conditions to resize the query buffer: + * 1) Query buffer is > BIG_ARG and too big for latest peak. + * 2) Client is inactive and the buffer is bigger than 1k. */ + if (((querybuf_size > REDIS_MBULK_BIG_ARG) && + (querybuf_size/(c->querybuf_peak+1)) > 2) || + (querybuf_size > 1024 && idletime > 2)) + { + /* Only resize the query buffer if it is actually wasting space. */ + if (sdsavail(c->querybuf) > 1024) { + c->querybuf = sdsRemoveFreeSpace(c->querybuf); + } + } + /* Reset the peak again to capture the peak memory usage in the next + * cycle. */ + c->querybuf_peak = 0; +} + void clientsCron(void) { /* Make sure to process at least 1/100 of clients per call. * Since this function is called 10 times per second we are sure that * in the worst case we process all the clients in 10 seconds. * In normal conditions (a reasonable number of clients) we process * all the clients in a shorter time. */ - int iterations = listLength(server.clients)/100; - if (iterations < 50) iterations = 50; + int numclients = listLength(server.clients); + int iterations = numclients/100; + if (iterations < 50) + iterations = (numclients < 50) ? numclients : 50; while(listLength(server.clients) && iterations--) { redisClient *c; listNode *head; @@ -682,6 +707,7 @@ void clientsCron(void) { head = listFirst(server.clients); c = listNodeValue(head); clientsCronHandleTimeout(c); + clientsCronResizeQueryBuffer(c); } } diff --git a/src/redis.h b/src/redis.h index 1d1f572b..49b69523 100644 --- a/src/redis.h +++ b/src/redis.h @@ -323,6 +323,7 @@ typedef struct redisClient { redisDb *db; int dictid; sds querybuf; + size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size */ int argc; robj **argv; struct redisCommand *cmd, *lastcmd; From c9d3dda29c9db6fca34599d0cf2c1243e73abfbd Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 15 Mar 2012 20:51:35 +0100 Subject: [PATCH 46/70] Fix for issue #391. Use a simple protocol between clientsCron() and helper functions to understand if the client is still valind and clientsCron() should continue processing or if the client was freed and we should continue with the next one. --- src/redis.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/redis.c b/src/redis.c index 5388f5b7..07a38b21 100644 --- a/src/redis.c +++ b/src/redis.c @@ -641,7 +641,8 @@ long long getOperationsPerSecond(void) { return sum / REDIS_OPS_SEC_SAMPLES; } -void clientsCronHandleTimeout(redisClient *c) { +/* Check for timeouts. Returns non-zero if the client was terminated */ +int clientsCronHandleTimeout(redisClient *c) { time_t now = server.unixtime; if (server.maxidletime && @@ -654,17 +655,21 @@ void clientsCronHandleTimeout(redisClient *c) { { redisLog(REDIS_VERBOSE,"Closing idle client"); freeClient(c); + return 1; } else if (c->flags & REDIS_BLOCKED) { if (c->bpop.timeout != 0 && c->bpop.timeout < now) { addReply(c,shared.nullmultibulk); unblockClientWaitingData(c); } } + return 0; } /* The client query buffer is an sds.c string that can end with a lot of - * free space not used, this function reclaims space if needed. */ -void clientsCronResizeQueryBuffer(redisClient *c) { + * free space not used, this function reclaims space if needed. + * + * The funciton always returns 0 as it never terminates the client. */ +int clientsCronResizeQueryBuffer(redisClient *c) { size_t querybuf_size = sdsAllocSize(c->querybuf); time_t idletime = server.unixtime - c->lastinteraction; @@ -683,6 +688,7 @@ void clientsCronResizeQueryBuffer(redisClient *c) { /* Reset the peak again to capture the peak memory usage in the next * cycle. */ c->querybuf_peak = 0; + return 0; } void clientsCron(void) { @@ -706,8 +712,11 @@ void clientsCron(void) { listRotate(server.clients); head = listFirst(server.clients); c = listNodeValue(head); - clientsCronHandleTimeout(c); - clientsCronResizeQueryBuffer(c); + /* The following functions do different service checks on the client. + * The protocol is that they return non-zero if the client was + * terminated. */ + if (clientsCronHandleTimeout(c)) continue; + if (clientsCronResizeQueryBuffer(c)) continue; } } From c5166e3fc5f906b501528d2ae9a986f854c0e497 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Mar 2012 17:17:39 +0100 Subject: [PATCH 47/70] First implementation of --test-memory. Still a work in progress. --- src/Makefile | 2 +- src/redis.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Makefile b/src/Makefile index f86ea859..43922c2d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -73,7 +73,7 @@ QUIET_CC = @printf ' %b %b\n' $(CCCOLOR)CC$(ENDCOLOR) $(SRCCOLOR)$@$(ENDCOLOR QUIET_LINK = @printf ' %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR); endif -OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o +OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o release.o CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o diff --git a/src/redis.c b/src/redis.c index 07a38b21..60d45b5e 100644 --- a/src/redis.c +++ b/src/redis.c @@ -2231,7 +2231,8 @@ void usage() { fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n"); fprintf(stderr," ./redis-server - (read config from stdin)\n"); fprintf(stderr," ./redis-server -v or --version\n"); - fprintf(stderr," ./redis-server -h or --help\n\n"); + fprintf(stderr," ./redis-server -h or --help\n"); + fprintf(stderr," ./redis-server --test-memory \n\n"); fprintf(stderr,"Examples:\n"); fprintf(stderr," ./redis-server (run the server with default conf)\n"); fprintf(stderr," ./redis-server /etc/redis/6379.conf\n"); @@ -2287,6 +2288,8 @@ void setupSignalHandlers(void) { return; } +void memtest(size_t megabytes, int passes); + int main(int argc, char **argv) { long long start; struct timeval tv; @@ -2308,6 +2311,17 @@ int main(int argc, char **argv) { strcmp(argv[1], "--version") == 0) version(); if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) usage(); + if (strcmp(argv[1], "--test-memory") == 0) { + if (argc == 3) { + memtest(atoi(argv[2]),10000); + exit(0); + } else { + fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n"); + fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n"); + exit(1); + } + } + /* First argument is the config file name? */ if (argv[j][0] != '-' || argv[j][1] != '-') configfile = argv[j++]; From 54e0fa1c2703dda5147e80318adf6929af921eb7 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Mar 2012 17:21:49 +0100 Subject: [PATCH 48/70] Hem... actual memtest.c file added. --- src/memtest.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 src/memtest.c diff --git a/src/memtest.c b/src/memtest.c new file mode 100644 index 00000000..a0e76f19 --- /dev/null +++ b/src/memtest.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include + +#if (ULONG_MAX == 4294967295UL) +#define MEMTEST_32BIT +#elif (ULONG_MAX == 18446744073709551615ULL) +#define MEMTEST_64BIT +#else +#error "ULONG_MAX value not supported." +#endif + +/* Fill words stepping a single page at every write, so we continue to + * touch all the pages in the smallest amount of time reducing the + * effectiveness of caches, and making it hard for the OS to transfer + * pages on the swap. */ +void memtest_fill(unsigned long *l, size_t bytes) { + unsigned long step = 4096/sizeof(unsigned long); + unsigned long words = bytes/sizeof(unsigned long)/2; + unsigned long iwords = words/step; /* words per iteration */ + unsigned long off, w, *l1, *l2; + + assert((bytes & 4095) == 0); + for (off = 0; off < step; off++) { + l1 = l+off; + l2 = l1+words; + for (w = 0; w < iwords; w++) { +#ifdef MEMTEST_32BIT + *l1 = *l2 = ((unsigned long) (rand()&0xffff)) | + (((unsigned long) (rand()&0xffff)) << 16); +#else + *l1 = *l2 = ((unsigned long) (rand()&0xffff)) | + (((unsigned long) (rand()&0xffff)) << 16) | + (((unsigned long) (rand()&0xffff)) << 32) | + (((unsigned long) (rand()&0xffff)) << 48); +#endif + l1 += step; + l2 += step; + } + } +} + +void memtest_compare(unsigned long *l, size_t bytes) { + unsigned long words = bytes/sizeof(unsigned long)/2; + unsigned long w, *l1, *l2; + + assert((bytes & 4095) == 0); + l1 = l; + l2 = l1+words; + for (w = 0; w < words; w++) { + if (*l1 != *l2) { + printf("\n*** MEMORY ERROR DETECTED: %p != %p (%lu vs %lu)\n", + (void*)l1, (void*)l2, *l1, *l2); + exit(1); + } + l1 ++; + l2 ++; + } +} + +void memtest_test(size_t megabytes, int passes) { + size_t bytes = megabytes*1024*1024; + unsigned long *m = malloc(bytes); + int pass = 0; + + if (m == NULL) { + fprintf(stderr,"Unable to allocate %zu megabytes: %s", + megabytes, strerror(errno)); + exit(1); + } + while (pass != passes) { + pass++; + printf("PASS %d... ", pass); + fflush(stdout); + memtest_fill(m,bytes); + memtest_compare(m,bytes); + printf("ok\n"); + } +} + +void memtest(size_t megabytes, int passes) { + memtest_test(megabytes,passes); + printf("\nYour memory passed this test.\n"); + printf("Please if you are stil in doubt use the following two tools:\n"); + printf("1) memtest86: http://www.memtest86.com/\n"); + printf("2) memtester: http://pyropus.ca/software/memtester/\n"); + exit(0); +} From fb068dc91de40e308d78859c03db69a73e881f0e Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 16 Mar 2012 21:19:53 +0100 Subject: [PATCH 49/70] Memory test function now less boring thanks to screen-wide progress bar. --- src/memtest.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/src/memtest.c b/src/memtest.c index a0e76f19..56d566ab 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #if (ULONG_MAX == 4294967295UL) #define MEMTEST_32BIT @@ -13,6 +15,37 @@ #error "ULONG_MAX value not supported." #endif +static struct winsize ws; +size_t progress_printed; /* Printed chars in screen-wide progress bar. */ +size_t progress_full; /* How many chars to write to fill the progress bar. */ + +void memtest_progress_start(char *title, int pass) { + int j; + + printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */ + /* Fill with dots. */ + for (j = 0; j < ws.ws_col*ws.ws_row; j++) printf("."); + printf("\x1b[H\x1b[2K"); /* Cursor home, clear current line. */ + printf("%s [%d]\n", title, pass); /* Print title. */ + progress_printed = 0; + progress_full = ws.ws_col*(ws.ws_row-1); + fflush(stdout); +} + +void memtest_progress_end(void) { + printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */ +} + +void memtest_progress_step(size_t curr, size_t size, char c) { + size_t chars = (curr*progress_full)/size, j; + + for (j = 0; j < chars-progress_printed; j++) { + printf("%c",c); + progress_printed++; + } + fflush(stdout); +} + /* Fill words stepping a single page at every write, so we continue to * touch all the pages in the smallest amount of time reducing the * effectiveness of caches, and making it hard for the OS to transfer @@ -39,6 +72,8 @@ void memtest_fill(unsigned long *l, size_t bytes) { #endif l1 += step; l2 += step; + if ((w & 0xffff) == 0) + memtest_progress_step(w+iwords*off,words,'+'); } } } @@ -58,13 +93,14 @@ void memtest_compare(unsigned long *l, size_t bytes) { } l1 ++; l2 ++; + if ((w & 0xffff) == 0) memtest_progress_step(w,words,'='); } } void memtest_test(size_t megabytes, int passes) { size_t bytes = megabytes*1024*1024; unsigned long *m = malloc(bytes); - int pass = 0; + int pass = 0, j; if (m == NULL) { fprintf(stderr,"Unable to allocate %zu megabytes: %s", @@ -73,15 +109,22 @@ void memtest_test(size_t megabytes, int passes) { } while (pass != passes) { pass++; - printf("PASS %d... ", pass); - fflush(stdout); + memtest_progress_start("Random fill",pass); memtest_fill(m,bytes); - memtest_compare(m,bytes); - printf("ok\n"); + memtest_progress_end(); + for (j = 0; j < 4; j++) { + memtest_progress_start("Compare",pass); + memtest_compare(m,bytes); + memtest_progress_end(); + } } } void memtest(size_t megabytes, int passes) { + if (ioctl(1, TIOCGWINSZ, &ws) == -1) { + ws.ws_col = 80; + ws.ws_row = 20; + } memtest_test(megabytes,passes); printf("\nYour memory passed this test.\n"); printf("Please if you are stil in doubt use the following two tools:\n"); From 525be599a82fcdcb4231c9438106b31a0b01a2f5 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 18 Mar 2012 11:35:35 +0100 Subject: [PATCH 50/70] On crash suggest to give --test-memory a try. --- src/debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/debug.c b/src/debug.c index a355df05..8eea3bf8 100644 --- a/src/debug.c +++ b/src/debug.c @@ -636,8 +636,9 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { redisLog(REDIS_WARNING, "\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n" -" Please report the crash opening an issue on github:\n\n" -" http://github.com/antirez/redis/issues\n\n" +" Please report the crash opening an issue on github:\n\n" +" http://github.com/antirez/redis/issues\n\n" +" Suspect RAM error? Use redis-server --test-memory to veryfy it.\n\n" ); /* free(messages); Don't call free() with possibly corrupted memory. */ if (server.daemonize) unlink(server.pidfile); From 1a197a3c1ae8c1014ca3f5ce0b0e436c550b52bb Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 18 Mar 2012 17:24:48 +0100 Subject: [PATCH 51/70] Number of iteration of --test-memory is now 300 (several minutes per gigabyte). Memtest86 and Memtester links are also displayed while running the test. --- src/memtest.c | 6 ++++-- src/redis.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/memtest.c b/src/memtest.c index 56d566ab..18e2e6e8 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -24,11 +24,13 @@ void memtest_progress_start(char *title, int pass) { printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */ /* Fill with dots. */ - for (j = 0; j < ws.ws_col*ws.ws_row; j++) printf("."); + for (j = 0; j < ws.ws_col*(ws.ws_row-2); j++) printf("."); + printf("Please keep the test running several minutes per GB of memory.\n"); + printf("Also check http://www.memtest86.com/ and http://pyropus.ca/software/memtester/"); printf("\x1b[H\x1b[2K"); /* Cursor home, clear current line. */ printf("%s [%d]\n", title, pass); /* Print title. */ progress_printed = 0; - progress_full = ws.ws_col*(ws.ws_row-1); + progress_full = ws.ws_col*(ws.ws_row-3); fflush(stdout); } diff --git a/src/redis.c b/src/redis.c index 60d45b5e..8e0b22eb 100644 --- a/src/redis.c +++ b/src/redis.c @@ -2313,7 +2313,7 @@ int main(int argc, char **argv) { strcmp(argv[1], "-h") == 0) usage(); if (strcmp(argv[1], "--test-memory") == 0) { if (argc == 3) { - memtest(atoi(argv[2]),10000); + memtest(atoi(argv[2]),300); exit(0); } else { fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n"); From a5801142a410b8e61867413c132f772484740a7e Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 18 Mar 2012 17:27:56 +0100 Subject: [PATCH 52/70] Fixed typo. --- src/memtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memtest.c b/src/memtest.c index 18e2e6e8..645d1da1 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -129,7 +129,7 @@ void memtest(size_t megabytes, int passes) { } memtest_test(megabytes,passes); printf("\nYour memory passed this test.\n"); - printf("Please if you are stil in doubt use the following two tools:\n"); + printf("Please if you are still in doubt use the following two tools:\n"); printf("1) memtest86: http://www.memtest86.com/\n"); printf("2) memtester: http://pyropus.ca/software/memtester/\n"); exit(0); From d033ccb0afa4d9b32c20bce2dd073650d3439479 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 18 Mar 2012 18:03:27 +0100 Subject: [PATCH 53/70] More memory tests implemented. Default number of iterations lowered to a more acceptable value of 50. --- src/memtest.c | 75 ++++++++++++++++++++++++++++++++++++++++++++------- src/redis.c | 2 +- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/src/memtest.c b/src/memtest.c index 645d1da1..01569d05 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -15,6 +15,14 @@ #error "ULONG_MAX value not supported." #endif +#ifdef MEMTEST_32BIT +#define ULONG_ONEZERO 0xaaaaaaaaaaaaaaaaUL +#define ULONG_ZEROONE 0x5555555555555555UL +#else +#define ULONG_ONEZERO 0xaaaaaaaaUL +#define ULONG_ZEROONE 0x55555555UL +#endif + static struct winsize ws; size_t progress_printed; /* Printed chars in screen-wide progress bar. */ size_t progress_full; /* How many chars to write to fill the progress bar. */ @@ -52,7 +60,7 @@ void memtest_progress_step(size_t curr, size_t size, char c) { * touch all the pages in the smallest amount of time reducing the * effectiveness of caches, and making it hard for the OS to transfer * pages on the swap. */ -void memtest_fill(unsigned long *l, size_t bytes) { +void memtest_fill_random(unsigned long *l, size_t bytes) { unsigned long step = 4096/sizeof(unsigned long); unsigned long words = bytes/sizeof(unsigned long)/2; unsigned long iwords = words/step; /* words per iteration */ @@ -75,7 +83,40 @@ void memtest_fill(unsigned long *l, size_t bytes) { l1 += step; l2 += step; if ((w & 0xffff) == 0) - memtest_progress_step(w+iwords*off,words,'+'); + memtest_progress_step(w+iwords*off,words,'R'); + } + } +} + +/* Like memtest_fill_random() but uses the two specified values to fill + * memory, in an alternated way (v1|v2|v1|v2|...) */ +void memtest_fill_value(unsigned long *l, size_t bytes, unsigned long v1, + unsigned long v2, char sym) +{ + unsigned long step = 4096/sizeof(unsigned long); + unsigned long words = bytes/sizeof(unsigned long)/2; + unsigned long iwords = words/step; /* words per iteration */ + unsigned long off, w, *l1, *l2, v; + + assert((bytes & 4095) == 0); + for (off = 0; off < step; off++) { + l1 = l+off; + l2 = l1+words; + v = (off & 1) ? v2 : v1; + for (w = 0; w < iwords; w++) { +#ifdef MEMTEST_32BIT + *l1 = *l2 = ((unsigned long) (rand()&0xffff)) | + (((unsigned long) (rand()&0xffff)) << 16); +#else + *l1 = *l2 = ((unsigned long) (rand()&0xffff)) | + (((unsigned long) (rand()&0xffff)) << 16) | + (((unsigned long) (rand()&0xffff)) << 32) | + (((unsigned long) (rand()&0xffff)) << 48); +#endif + l1 += step; + l2 += step; + if ((w & 0xffff) == 0) + memtest_progress_step(w+iwords*off,words,sym); } } } @@ -99,10 +140,20 @@ void memtest_compare(unsigned long *l, size_t bytes) { } } +void memtest_compare_times(unsigned long *m, size_t bytes, int pass, int times) { + int j; + + for (j = 0; j < times; j++) { + memtest_progress_start("Compare",pass); + memtest_compare(m,bytes); + memtest_progress_end(); + } +} + void memtest_test(size_t megabytes, int passes) { size_t bytes = megabytes*1024*1024; unsigned long *m = malloc(bytes); - int pass = 0, j; + int pass = 0; if (m == NULL) { fprintf(stderr,"Unable to allocate %zu megabytes: %s", @@ -112,13 +163,19 @@ void memtest_test(size_t megabytes, int passes) { while (pass != passes) { pass++; memtest_progress_start("Random fill",pass); - memtest_fill(m,bytes); + memtest_fill_random(m,bytes); memtest_progress_end(); - for (j = 0; j < 4; j++) { - memtest_progress_start("Compare",pass); - memtest_compare(m,bytes); - memtest_progress_end(); - } + memtest_compare_times(m,bytes,pass,4); + + memtest_progress_start("Solid fill",pass); + memtest_fill_value(m,bytes,0,(unsigned long)-1,'S'); + memtest_progress_end(); + memtest_compare_times(m,bytes,pass,4); + + memtest_progress_start("Checkerboard fill",pass); + memtest_fill_value(m,bytes,ULONG_ONEZERO,ULONG_ZEROONE,'C'); + memtest_progress_end(); + memtest_compare_times(m,bytes,pass,4); } } diff --git a/src/redis.c b/src/redis.c index 8e0b22eb..d84a4fc2 100644 --- a/src/redis.c +++ b/src/redis.c @@ -2313,7 +2313,7 @@ int main(int argc, char **argv) { strcmp(argv[1], "-h") == 0) usage(); if (strcmp(argv[1], "--test-memory") == 0) { if (argc == 3) { - memtest(atoi(argv[2]),300); + memtest(atoi(argv[2]),50); exit(0); } else { fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n"); From d4a515c56d911517c7f44330723db9cc7b6f4c0f Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Mar 2012 14:02:34 +0100 Subject: [PATCH 54/70] Memory addressing test implemented. --- src/memtest.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/memtest.c b/src/memtest.c index 01569d05..272ec502 100644 --- a/src/memtest.c +++ b/src/memtest.c @@ -56,6 +56,33 @@ void memtest_progress_step(size_t curr, size_t size, char c) { fflush(stdout); } +/* Test that addressing is fine. Every location is populated with its own + * address, and finally verified. This test is very fast but may detect + * ASAP big issues with the memory subsystem. */ +void memtest_addressing(unsigned long *l, size_t bytes) { + unsigned long words = bytes/sizeof(unsigned long); + unsigned long j, *p; + + /* Fill */ + p = l; + for (j = 0; j < words; j++) { + *p = (unsigned long)p; + p++; + if ((j & 0xffff) == 0) memtest_progress_step(j,words*2,'A'); + } + /* Test */ + p = l; + for (j = 0; j < words; j++) { + if (*p != (unsigned long)p) { + printf("\n*** MEMORY ADDRESSING ERROR: %p contains %lu\n", + (void*) p, *p); + exit(1); + } + p++; + if ((j & 0xffff) == 0) memtest_progress_step(j+words,words*2,'A'); + } +} + /* Fill words stepping a single page at every write, so we continue to * touch all the pages in the smallest amount of time reducing the * effectiveness of caches, and making it hard for the OS to transfer @@ -162,6 +189,11 @@ void memtest_test(size_t megabytes, int passes) { } while (pass != passes) { pass++; + + memtest_progress_start("Addressing test",pass); + memtest_addressing(m,bytes); + memtest_progress_end(); + memtest_progress_start("Random fill",pass); memtest_fill_random(m,bytes); memtest_progress_end(); From bb0aadbe215f878221693175492d1a0cae888abc Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Mar 2012 19:16:41 +0100 Subject: [PATCH 55/70] Read-only flag removed from PUBLISH command. --- src/redis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis.c b/src/redis.c index d84a4fc2..98c12be9 100644 --- a/src/redis.c +++ b/src/redis.c @@ -229,7 +229,7 @@ struct redisCommand redisCommandTable[] = { {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0}, {"psubscribe",psubscribeCommand,-2,"rps",0,NULL,0,0,0,0,0}, {"punsubscribe",punsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0}, - {"publish",publishCommand,3,"rpf",0,NULL,0,0,0,0,0}, + {"publish",publishCommand,3,"pf",0,NULL,0,0,0,0,0}, {"watch",watchCommand,-2,"rs",0,noPreloadGetKeys,1,-1,1,0,0}, {"unwatch",unwatchCommand,1,"rs",0,NULL,0,0,0,0,0}, {"cluster",clusterCommand,-2,"ar",0,NULL,0,0,0,0,0}, From 0d44d507925a97e55ca2352544b1bf6a1b78fa81 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 19 Mar 2012 19:29:06 +0100 Subject: [PATCH 56/70] Suppress warnings compiling redis-cli with certain gcc versions. --- src/redis-cli.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c631aa79..bdaa3964 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -478,7 +478,7 @@ static sds cliFormatReplyCSV(redisReply *r) { static int cliReadReply(int output_raw_strings) { void *_reply; redisReply *reply; - sds out; + sds out = NULL; int output = 1; if (redisGetReply(context,&_reply) != REDIS_OK) { @@ -498,7 +498,8 @@ static int cliReadReply(int output_raw_strings) { reply = (redisReply*)_reply; - /* Check if we need to connect to a different node and reissue the request. */ + /* Check if we need to connect to a different node and reissue the + * request. */ if (config.cluster_mode && reply->type == REDIS_REPLY_ERROR && (!strncmp(reply->str,"MOVED",5) || !strcmp(reply->str,"ASK"))) { @@ -926,7 +927,10 @@ static void slaveMode(void) { unsigned long long payload; /* Send the SYNC command. */ - write(fd,"SYNC\r\n",6); + if (write(fd,"SYNC\r\n",6) != 6) { + fprintf(stderr,"Error writing to master\n"); + exit(1); + } /* Read $\r\n, making sure to read just up to "\n" */ p = buf; From 7a0c72f34550c3811324464661f1b463ccfd362b Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Mar 2012 13:06:50 +0100 Subject: [PATCH 57/70] redis_init_script template updated. --- utils/redis.conf.tpl | 190 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 151 insertions(+), 39 deletions(-) diff --git a/utils/redis.conf.tpl b/utils/redis.conf.tpl index 9e2e1355..e7febeda 100644 --- a/utils/redis.conf.tpl +++ b/utils/redis.conf.tpl @@ -1,6 +1,6 @@ # Redis configuration file example -# Note on units: when memory size is needed, it is possible to specifiy +# Note on units: when memory size is needed, it is possible to specify # it in the usual form of 1k 5GB 4M and so forth: # # 1k => 1000 bytes @@ -34,9 +34,10 @@ port $REDIS_PORT # on a unix socket when not specified. # # unixsocket /tmp/redis.sock +# unixsocketperm 755 # Close the connection after a client is idle for N seconds (0 to disable) -timeout 300 +timeout 0 # Set server verbosity to 'debug' # it can be one of: @@ -44,7 +45,7 @@ timeout 300 # verbose (many rarely useful info, but not a mess like the debug level) # notice (moderately verbose, what you want in production probably) # warning (only very important / critical messages are logged) -loglevel verbose +loglevel notice # Specify the log file name. Also 'stdout' can be used to force # Redis to log on the standard output. Note that if you use standard @@ -81,11 +82,32 @@ databases 16 # after 60 sec if at least 10000 keys changed # # Note: you can disable saving at all commenting all the "save" lines. +# +# It is also possible to remove all the previously configured save +# points by adding a save directive with a single empty string argument +# like in the following example: +# +# save "" save 900 1 save 300 10 save 60 10000 +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in an hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# distater will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usually even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + # Compress string objects using LZF when dump .rdb databases? # For default that's set to 'yes' as it's almost always a win. # If you want to save some CPU in the saving child set it to 'no' but @@ -125,7 +147,7 @@ dir $REDIS_DATA_DIR # is still in progress, the slave can act in two different ways: # # 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will -# still reply to client requests, possibly with out of data data, or the +# still reply to client requests, possibly with out of date data, or the # data set may just be empty if this is the first synchronization. # # 2) if slave-serve-stale data is set to 'no' the slave will reply with @@ -134,6 +156,21 @@ dir $REDIS_DATA_DIR # slave-serve-stale-data yes +# Slaves send PINGs to server in a predefined interval. It's possible to change +# this interval with the repl_ping_slave_period option. The default value is 10 +# seconds. +# +# repl-ping-slave-period 10 + +# The following option sets a timeout for both Bulk transfer I/O timeout and +# master data or ping response timeout. The default value is 60 seconds. +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-slave-period otherwise a timeout will be detected +# every time there is low traffic between the master and the slave. +# +# repl-timeout 60 + ################################## SECURITY ################################### # Require clients to issue AUTH before processing any other @@ -151,7 +188,7 @@ slave-serve-stale-data yes # Command renaming. # -# It is possilbe to change the name of dangerous commands in a shared +# It is possible to change the name of dangerous commands in a shared # environment. For instance the CONFIG command may be renamed into something # of hard to guess so that it will be still available for internal-use # tools but not available for general clients. @@ -160,37 +197,46 @@ slave-serve-stale-data yes # # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 # -# It is also possilbe to completely kill a command renaming it into +# It is also possible to completely kill a command renaming it into # an empty string: # # rename-command CONFIG "" ################################### LIMITS #################################### -# Set the max number of connected clients at the same time. By default there -# is no limit, and it's up to the number of file descriptors the Redis process -# is able to open. The special value '0' means no limits. +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able ot configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# # Once the limit is reached Redis will close all the new connections sending # an error 'max number of clients reached'. # -# maxclients 128 +# maxclients 10000 # Don't use more memory than the specified amount of bytes. -# When the memory limit is reached Redis will try to remove keys with an -# EXPIRE set. It will try to start freeing keys that are going to expire -# in little time and preserve keys with a longer time to live. -# Redis will also try to remove objects from free lists if possible. +# When the memory limit is reached Redis will try to remove keys +# accordingly to the eviction policy selected (see maxmemmory-policy). # -# If all this fails, Redis will start to reply with errors to commands -# that will use more memory, like SET, LPUSH, and so on, and will continue -# to reply to most read-only commands like GET. +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. # -# WARNING: maxmemory can be a good idea mainly if you want to use Redis as a -# 'state' server or cache, not as a real DB. When Redis is used as a real -# database the memory usage will grow over the weeks, it will be obvious if -# it is going to use too much memory in the long run, and you'll have the time -# to upgrade. With maxmemory after the limit is reached you'll start to get -# errors for write operations, and this may even lead to DB inconsistency. +# This option is usually useful when using Redis as an LRU cache, or to set +# an hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have slaves attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the slaves are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of slaves is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have slaves attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for slave +# output buffers (but this is not needed if the policy is 'noeviction'). # # maxmemory @@ -200,7 +246,7 @@ slave-serve-stale-data yes # volatile-lru -> remove the key with an expire set using an LRU algorithm # allkeys-lru -> remove any key accordingly to the LRU algorithm # volatile-random -> remove a random key with an expire set -# allkeys->random -> remove a random key, any key +# allkeys-random -> remove a random key, any key # volatile-ttl -> remove the key with the nearest expire time (minor TTL) # noeviction -> don't expire at all, just return an error on write operations # @@ -260,7 +306,7 @@ appendonly no # # The default is "everysec" that's usually the right compromise between # speed and data safety. It's up to you to understand if you can relax this to -# "no" that will will let the operating system flush the output buffer when +# "no" that will let the operating system flush the output buffer when # it wants, for better performances (but if you can live with the idea of # some data loss consider the default persistence mode that's snapshotting), # or on the contrary, use "always" that's very slow but a bit safer than @@ -284,7 +330,7 @@ appendfsync everysec # BGSAVE or BGREWRITEAOF is in progress. # # This means that while another child is saving the durability of Redis is -# the same as "appendfsync none", that in pratical terms means that it is +# the same as "appendfsync none", that in practical terms means that it is # possible to lost up to 30 seconds of log in the worst scenario (with the # default Linux settings). # @@ -306,7 +352,7 @@ no-appendfsync-on-rewrite no # is useful to avoid rewriting the AOF file even if the percentage increase # is reached but it is still pretty small. # -# Specify a precentage of zero in order to disable the automatic AOF +# Specify a percentage of zero in order to disable the automatic AOF # rewrite feature. auto-aof-rewrite-percentage 100 @@ -315,9 +361,39 @@ auto-aof-rewrite-min-size 64mb ################################ LUA SCRIPTING ############################### # Max execution time of a Lua script in milliseconds. -# This prevents that a programming error generating an infinite loop will block -# your server forever. Set it to 0 or a negative value for unlimited execution. -#lua-time-limit 60000 +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceed the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet called write commands. The second +# is the only way to shut down the server in the case a write commands was +# already issue by the script but the user don't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### +# +# Normal Redis instances can't be part of a Redis Cluster, only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system does not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# In order to setup your cluster make sure to read the documentation +# available at http://redis.io web site. ################################## SLOW LOG ################################### @@ -345,12 +421,11 @@ slowlog-max-len 1024 ############################### ADVANCED CONFIG ############################### -# Hashes are encoded in a special way (much more memory efficient) when they -# have at max a given numer of elements, and the biggest element does not -# exceed a given threshold. You can configure this limits with the following -# configuration directives. -hash-max-zipmap-entries 512 -hash-max-zipmap-value 64 +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 # Similarly to hashes, small lists are also encoded in a special way in order # to save a lot of space. The special representation is only used when @@ -373,9 +448,9 @@ zset-max-ziplist-value 64 # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in # order to help rehashing the main Redis hash table (the one mapping top-level -# keys to values). The hash table implementation redis uses (see dict.c) +# keys to values). The hash table implementation Redis uses (see dict.c) # performs a lazy rehashing: the more operation you run into an hash table -# that is rhashing, the more rehashing "steps" are performed, so if the +# that is rehashing, the more rehashing "steps" are performed, so if the # server is idle the rehashing is never complete and some more memory is used # by the hash table. # @@ -391,10 +466,47 @@ zset-max-ziplist-value 64 # want to free memory asap when possible. activerehashing yes +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients +# slave -> slave clients and MONITOR clients +# pubsub -> clients subcribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and slave clients, since +# subscribers and slaves receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled just setting it to zero. +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit slave 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + ################################## INCLUDES ################################### # Include one or more other config files here. This is useful if you -# have a standard template that goes to all redis server but also need +# have a standard template that goes to all Redis server but also need # to customize a few per-server settings. Include files can include # other files, so use this wisely. # From f3fd419fc95e78818f9eeef15eb2d2e5a60bfbbb Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Mar 2012 17:32:48 +0100 Subject: [PATCH 58/70] Support for read-only slaves. Semantical fixes. This commit introduces support for read only slaves via redis.conf and CONFIG GET/SET commands. Also various semantical fixes are implemented here: 1) MULTI/EXEC with only read commands now work where the server is into a state where writes (or commands increasing memory usage) are not allowed. Before this patch everything inside a transaction would fail in this conditions. 2) Scripts just calling read-only commands will work against read only slaves, when the server is out of memory, or when persistence is into an error condition. Before the patch EVAL always failed in this condition. --- redis.conf | 8 ++++++++ src/config.c | 11 +++++++++++ src/multi.c | 13 ++++++++----- src/redis.c | 26 ++++++++++++++++++++------ src/redis.h | 6 ++++-- src/scripting.c | 38 ++++++++++++++++++++++++++++++++++---- 6 files changed, 85 insertions(+), 17 deletions(-) diff --git a/redis.conf b/redis.conf index e0335996..8396a6a4 100644 --- a/redis.conf +++ b/redis.conf @@ -156,6 +156,14 @@ dir ./ # slave-serve-stale-data yes +# You can configure a slave instance to accept writes or not. Writing against +# a slave instance may be useful to store some ephemeral data (because data +# written on a slave will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it for an error. +# +# Since Redis 2.6 by default slaves are read-only. +slave-read-only yes + # Slaves send PINGs to server in a predefined interval. It's possible to change # this interval with the repl_ping_slave_period option. The default value is 10 # seconds. diff --git a/src/config.c b/src/config.c index 533a2a57..316f0a28 100644 --- a/src/config.c +++ b/src/config.c @@ -202,6 +202,10 @@ void loadServerConfigFromString(char *config) { if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } + } else if (!strcasecmp(argv[0],"slave-read-only") && argc == 2) { + if ((server.repl_slave_ro = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) { if ((server.rdb_compression = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -514,6 +518,11 @@ void configSetCommand(redisClient *c) { if (yn == -1) goto badfmt; server.repl_serve_stale_data = yn; + } else if (!strcasecmp(c->argv[2]->ptr,"slave-read-only")) { + int yn = yesnotoi(o->ptr); + + if (yn == -1) goto badfmt; + server.repl_slave_ro = yn; } else if (!strcasecmp(c->argv[2]->ptr,"dir")) { if (chdir((char*)o->ptr) == -1) { addReplyErrorFormat(c,"Changing directory: %s", strerror(errno)); @@ -712,6 +721,8 @@ void configGetCommand(redisClient *c) { server.aof_no_fsync_on_rewrite); config_get_bool_field("slave-serve-stale-data", server.repl_serve_stale_data); + config_get_bool_field("slave-read-only", + server.repl_slave_ro); config_get_bool_field("stop-writes-on-bgsave-error", server.stop_writes_on_bgsave_err); config_get_bool_field("daemonize", server.daemonize); diff --git a/src/multi.c b/src/multi.c index 65ec38a8..eee9748c 100644 --- a/src/multi.c +++ b/src/multi.c @@ -40,6 +40,13 @@ void queueMultiCommand(redisClient *c) { c->mstate.count++; } +void discardTransaction(redisClient *c) { + freeClientMultiState(c); + initClientMultiState(c); + c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);; + unwatchAllKeys(c); +} + void multiCommand(redisClient *c) { if (c->flags & REDIS_MULTI) { addReplyError(c,"MULTI calls can not be nested"); @@ -54,11 +61,7 @@ void discardCommand(redisClient *c) { addReplyError(c,"DISCARD without MULTI"); return; } - - freeClientMultiState(c); - initClientMultiState(c); - c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);; - unwatchAllKeys(c); + discardTransaction(c); addReply(c,shared.ok); } diff --git a/src/redis.c b/src/redis.c index 98c12be9..737613d8 100644 --- a/src/redis.c +++ b/src/redis.c @@ -211,7 +211,7 @@ struct redisCommand redisCommandTable[] = { {"lastsave",lastsaveCommand,1,"r",0,NULL,0,0,0,0,0}, {"type",typeCommand,2,"r",0,NULL,1,1,1,0,0}, {"multi",multiCommand,1,"rs",0,NULL,0,0,0,0,0}, - {"exec",execCommand,1,"wms",0,NULL,0,0,0,0,0}, + {"exec",execCommand,1,"s",0,NULL,0,0,0,0,0}, {"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0}, {"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0}, {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0}, @@ -239,8 +239,8 @@ struct redisCommand redisCommandTable[] = { {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0}, {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0}, {"client",clientCommand,-2,"ar",0,NULL,0,0,0,0,0}, - {"eval",evalCommand,-3,"wms",0,zunionInterGetKeys,0,0,0,0,0}, - {"evalsha",evalShaCommand,-3,"wms",0,zunionInterGetKeys,0,0,0,0,0}, + {"eval",evalCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0}, + {"evalsha",evalShaCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0}, {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0}, {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0}, {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0} @@ -939,7 +939,11 @@ void createSharedObjects(void) { shared.slowscripterr = createObject(REDIS_STRING,sdsnew( "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n")); shared.bgsaveerr = createObject(REDIS_STRING,sdsnew( - "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Write commands are disabled. Please check Redis logs for details about the error.\r\n")); + "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n")); + shared.roslaveerr = createObject(REDIS_STRING,sdsnew( + "-READONLY You can't write against a read only slave.\r\n")); + shared.oomerr = createObject(REDIS_STRING, + "-OOM command not allowed when used memory > 'maxmemory'.\r\n"); shared.space = createObject(REDIS_STRING,sdsnew(" ")); shared.colon = createObject(REDIS_STRING,sdsnew(":")); shared.plus = createObject(REDIS_STRING,sdsnew("+")); @@ -1048,6 +1052,7 @@ void initServerConfig() { server.repl_state = REDIS_REPL_NONE; server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT; server.repl_serve_stale_data = 1; + server.repl_slave_ro = 1; server.repl_down_since = -1; /* Client output buffer limits */ @@ -1483,8 +1488,7 @@ int processCommand(redisClient *c) { if (server.maxmemory) { int retval = freeMemoryIfNeeded(); if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) { - addReplyError(c, - "command not allowed when used memory > 'maxmemory'"); + addReply(c, shared.oomerr); return REDIS_OK; } } @@ -1499,6 +1503,16 @@ int processCommand(redisClient *c) { return REDIS_OK; } + /* Don't accept wirte commands if this is a read only slave. But + * accept write commands if this is our master. */ + if (server.masterhost && server.repl_slave_ro && + !(c->flags & REDIS_MASTER) && + c->cmd->flags & REDIS_CMD_WRITE) + { + addReply(c, shared.roslaveerr); + return REDIS_OK; + } + /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0) && diff --git a/src/redis.h b/src/redis.h index 49b69523..1fc2ae39 100644 --- a/src/redis.h +++ b/src/redis.h @@ -366,8 +366,8 @@ struct sharedObjectsStruct { *colon, *nullbulk, *nullmultibulk, *queued, *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr, *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr, - *plus, *select0, *select1, *select2, *select3, *select4, - *select5, *select6, *select7, *select8, *select9, + *roslaveerr, *oomerr, *plus, *select0, *select1, *select2, *select3, + *select4, *select5, *select6, *select7, *select8, *select9, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *rpop, *lpop, *integers[REDIS_SHARED_INTEGERS], @@ -671,6 +671,7 @@ struct redisServer { char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */ time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */ int repl_serve_stale_data; /* Serve stale data when link is down? */ + int repl_slave_ro; /* Slave is read only? */ time_t repl_down_since; /* Unix time at which link with master went down */ /* Limits */ unsigned int maxclients; /* Max number of simultaneous clients */ @@ -901,6 +902,7 @@ void freeClientMultiState(redisClient *c); void queueMultiCommand(redisClient *c); void touchWatchedKey(redisDb *db, robj *key); void touchWatchedKeysOnFlush(int dbid); +void discardTransaction(redisClient *c); /* Redis object implementation */ void decrRefCount(void *o); diff --git a/src/scripting.c b/src/scripting.c index ce1f0877..e38d0807 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -206,15 +206,45 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { goto cleanup; } + /* There are commands that are not allowed inside scripts. */ if (cmd->flags & REDIS_CMD_NOSCRIPT) { luaPushError(lua, "This Redis command is not allowed from scripts"); goto cleanup; } - if (cmd->flags & REDIS_CMD_WRITE && server.lua_random_dirty) { - luaPushError(lua, - "Write commands not allowed after non deterministic commands"); - goto cleanup; + /* Write commands are forbidden against read-only slaves, or if a + * command marked as non-deterministic was already called in the context + * of this script. */ + if (cmd->flags & REDIS_CMD_WRITE) { + if (server.lua_random_dirty) { + luaPushError(lua, + "Write commands not allowed after non deterministic commands"); + goto cleanup; + } else if (server.masterhost && server.repl_slave_ro && + !(server.lua_caller->flags & REDIS_MASTER)) + { + luaPushError(lua, shared.roslaveerr->ptr); + goto cleanup; + } else if (server.stop_writes_on_bgsave_err && + server.saveparamslen > 0 && + server.lastbgsave_status == REDIS_ERR) + { + luaPushError(lua, shared.bgsaveerr->ptr); + goto cleanup; + } + } + + /* If we reached the memory limit configured via maxmemory, commands that + * could enlarge the memory usage are not allowed, but only if this is the + * first write in the context of this script, otherwise we can't stop + * in the middle. */ + if (server.maxmemory && server.lua_write_dirty == 0 && + (cmd->flags & REDIS_CMD_DENYOOM)) + { + if (freeMemoryIfNeeded() == REDIS_ERR) { + luaPushError(lua, shared.oomerr->ptr); + goto cleanup; + } } if (cmd->flags & REDIS_CMD_RANDOM) server.lua_random_dirty = 1; From 7dcdd281f529dea2389509f90b101c0471f7f2bd Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 20 Mar 2012 17:53:47 +0100 Subject: [PATCH 59/70] DEBUG should not be flagged as w otherwise we can not call DEBUG DIGEST and other commands against read only slaves. --- src/redis.c | 2 +- tests/support/redis.tcl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index 737613d8..2558f66a 100644 --- a/src/redis.c +++ b/src/redis.c @@ -223,7 +223,7 @@ struct redisCommand redisCommandTable[] = { {"pttl",pttlCommand,2,"r",0,NULL,1,1,1,0,0}, {"persist",persistCommand,2,"w",0,NULL,1,1,1,0,0}, {"slaveof",slaveofCommand,3,"aws",0,NULL,0,0,0,0,0}, - {"debug",debugCommand,-2,"aws",0,NULL,0,0,0,0,0}, + {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0}, {"config",configCommand,-2,"ar",0,NULL,0,0,0,0,0}, {"subscribe",subscribeCommand,-2,"rps",0,NULL,0,0,0,0,0}, {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0}, diff --git a/tests/support/redis.tcl b/tests/support/redis.tcl index 4f8ac485..ca6cf34b 100644 --- a/tests/support/redis.tcl +++ b/tests/support/redis.tcl @@ -160,7 +160,7 @@ proc ::redis::redis_read_reply fd { - {return -code error [redis_read_line $fd]} $ {redis_bulk_read $fd} * {redis_multi_bulk_read $fd} - default {return -code error "Bad protocol, $type as reply type byte"} + default {return -code error "Bad protocol, '$type' as reply type byte"} } } From b22eab8faff94b36ff1474c7e9567778f54e79d5 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Mar 2012 12:11:07 +0100 Subject: [PATCH 60/70] Correctly create shared.oomerr as an sds string. --- src/redis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index 2558f66a..bae852b8 100644 --- a/src/redis.c +++ b/src/redis.c @@ -942,8 +942,8 @@ void createSharedObjects(void) { "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n")); shared.roslaveerr = createObject(REDIS_STRING,sdsnew( "-READONLY You can't write against a read only slave.\r\n")); - shared.oomerr = createObject(REDIS_STRING, - "-OOM command not allowed when used memory > 'maxmemory'.\r\n"); + shared.oomerr = createObject(REDIS_STRING,sdsnew( + "-OOM command not allowed when used memory > 'maxmemory'.\r\n")); shared.space = createObject(REDIS_STRING,sdsnew(" ")); shared.colon = createObject(REDIS_STRING,sdsnew(":")); shared.plus = createObject(REDIS_STRING,sdsnew("+")); From ba864e09d4e24c405ab44ac273381bfdbafff3fd Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 21 Mar 2012 12:26:05 +0100 Subject: [PATCH 61/70] Comments about security of slave-read-only in redis.coinf. --- redis.conf | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/redis.conf b/redis.conf index 8396a6a4..1b79e09e 100644 --- a/redis.conf +++ b/redis.conf @@ -159,9 +159,17 @@ slave-serve-stale-data yes # You can configure a slave instance to accept writes or not. Writing against # a slave instance may be useful to store some ephemeral data (because data # written on a slave will be easily deleted after resync with the master) but -# may also cause problems if clients are writing to it for an error. +# may also cause problems if clients are writing to it because of a +# misconfiguration. # # Since Redis 2.6 by default slaves are read-only. +# +# Note: read only slaves are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only slave exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extend you can improve +# security of read only slaves using 'rename-command' to shadow all the +# administrative / dangerous commands. slave-read-only yes # Slaves send PINGs to server in a predefined interval. It's possible to change From 1f6146df0ce28667b90432ebf725d4a2b4fb0af7 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 22 Mar 2012 18:14:32 +0100 Subject: [PATCH 62/70] Result of INCRBYFLOAT and HINCRBYFLOAT should never be in exponential form, and also should never contain trailing zeroes. This is not possible with vanilla printf() format specifiers, so we alter the output. --- src/object.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index ccb07208..9699ea53 100644 --- a/src/object.c +++ b/src/object.c @@ -48,7 +48,7 @@ robj *createStringObjectFromLongLong(long long value) { /* Note: this function is defined into object.c since here it is where it * belongs but it is actually designed to be used just for INCRBYFLOAT */ robj *createStringObjectFromLongDouble(long double value) { - char buf[256]; + char buf[256], *p; int len; /* We use 17 digits precision since with 128 bit floats that precision @@ -56,7 +56,16 @@ robj *createStringObjectFromLongDouble(long double value) { * that is "non surprising" for the user (that is, most small decimal * numbers will be represented in a way that when converted back into * a string are exactly the same as what the user typed.) */ - len = snprintf(buf,sizeof(buf),"%.17Lg", value); + len = snprintf(buf,sizeof(buf),"%.17Lf", value); + /* Now remove trailing zeroes after the '.' */ + if ((p = strchr(buf,'.')) != NULL) { + p = buf+len-1; + while(*p == '0') { + p--; + len--; + } + if (*p == '.') len--; + } return createStringObject(buf,len); } From 7b558b1d64a29fbb9fea321da75367c0fa38b61d Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 22 Mar 2012 18:16:41 +0100 Subject: [PATCH 63/70] Code style hack. --- src/object.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/object.c b/src/object.c index 9699ea53..91e1933a 100644 --- a/src/object.c +++ b/src/object.c @@ -48,7 +48,7 @@ robj *createStringObjectFromLongLong(long long value) { /* Note: this function is defined into object.c since here it is where it * belongs but it is actually designed to be used just for INCRBYFLOAT */ robj *createStringObjectFromLongDouble(long double value) { - char buf[256], *p; + char buf[256]; int len; /* We use 17 digits precision since with 128 bit floats that precision @@ -58,8 +58,8 @@ robj *createStringObjectFromLongDouble(long double value) { * a string are exactly the same as what the user typed.) */ len = snprintf(buf,sizeof(buf),"%.17Lf", value); /* Now remove trailing zeroes after the '.' */ - if ((p = strchr(buf,'.')) != NULL) { - p = buf+len-1; + if (strchr(buf,'.') != NULL) { + char *p = buf+len-1; while(*p == '0') { p--; len--; From 6f0e77ca19278fd37d19c28156d8704754498680 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 23 Mar 2012 10:22:58 +0100 Subject: [PATCH 64/70] Replicate HINCRBYFLOAT as HSET. --- src/t_hash.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index b3928450..5b7a347a 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -135,7 +135,9 @@ int hashTypeExists(robj *o, robj *field) { } /* Add an element, discard the old if the key already exists. - * Return 0 on insert and 1 on update. */ + * Return 0 on insert and 1 on update. + * This function will take care of incrementing the reference count of the + * retained fields and value objects. */ int hashTypeSet(robj *o, robj *field, robj *value) { int update = 0; @@ -168,30 +170,23 @@ int hashTypeSet(robj *o, robj *field, robj *value) { zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); } - o->ptr = zl; - decrRefCount(field); decrRefCount(value); /* Check if the ziplist needs to be converted to a hash table */ - if (hashTypeLength(o) > server.hash_max_ziplist_entries) { + if (hashTypeLength(o) > server.hash_max_ziplist_entries) hashTypeConvert(o, REDIS_ENCODING_HT); - } - } else if (o->encoding == REDIS_ENCODING_HT) { if (dictReplace(o->ptr, field, value)) { /* Insert */ incrRefCount(field); } else { /* Update */ update = 1; } - incrRefCount(value); - } else { redisPanic("Unknown hash encoding"); } - return update; } @@ -520,7 +515,7 @@ void hincrbyCommand(redisClient *c) { void hincrbyfloatCommand(redisClient *c) { double long value, incr; - robj *o, *current, *new; + robj *o, *current, *new, *aux; if (getLongDoubleFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return; if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; @@ -540,9 +535,17 @@ void hincrbyfloatCommand(redisClient *c) { hashTypeTryObjectEncoding(o,&c->argv[2],NULL); hashTypeSet(o,c->argv[2],new); addReplyBulk(c,new); - decrRefCount(new); signalModifiedKey(c->db,c->argv[1]); server.dirty++; + + /* Always replicate HINCRBYFLOAT as an HSET command with the final value + * in order to make sure that differences in float pricision or formatting + * will not create differences in replicas or after an AOF restart. */ + aux = createStringObject("HSET",4); + rewriteClientCommandArgument(c,0,aux); + decrRefCount(aux); + rewriteClientCommandArgument(c,3,new); + decrRefCount(new); } static void addHashFieldToReply(redisClient *c, robj *o, robj *field) { From bd376d13f820b168954ac6683b5197a4ed72f03b Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 23 Mar 2012 12:42:20 +0100 Subject: [PATCH 65/70] Big endian fix. The bug was introduced because of a typo. --- src/ziplist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ziplist.c b/src/ziplist.c index 5962510d..b214b2da 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -240,7 +240,7 @@ static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { } else if ((prevlensize) == 5) { \ assert(sizeof((prevlensize)) == 4); \ memcpy(&(prevlen), ((char*)(ptr)) + 1, 4); \ - memrev32ifbe(&len); \ + memrev32ifbe(&prevlen); \ } \ } while(0); From 03116904c399cf21db3fe4228af3a795ce20fc46 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 23 Mar 2012 15:22:25 +0100 Subject: [PATCH 66/70] RDB load of different encodings test added. --- tests/assets/encodings.rdb | Bin 0 -> 667 bytes tests/integration/rdb.tcl | 25 +++++++++++++++++++++++++ tests/test_helper.tcl | 1 + 3 files changed, 26 insertions(+) create mode 100644 tests/assets/encodings.rdb create mode 100644 tests/integration/rdb.tcl diff --git a/tests/assets/encodings.rdb b/tests/assets/encodings.rdb new file mode 100644 index 0000000000000000000000000000000000000000..9fd9b705d16220065ee117a1c1c094f40fb122f2 GIT binary patch literal 667 zcmbVKu}UOC5UuKJ7oAzb-~v%NHW5S&dS+bpFfmX#Qw_|N?w&>$M|aur5EcU?;j)7> zGV&XY4SF>ZBR^rkz`zf1tzKQwOdP0r-Cfn)uU@~+^|g&HrPRU;knEK1xGIbhsS?(T zOp!5$Ql%uLiRxVU_K~%gGG1r2V@aAV)EAeQf1$=iXe|~B7q#x40{=g8Nhbr35aH0(af04| z31?FkfXdOIL*v>$`m`ZiW?H~$fQQSK0B})18Q{+2^#ErNo(A|lG8gy_c?x0;Mx(`{ zoa#1QiP|F?FVK2I8DyCB=mk$S8nlC&4|~3w8;|#Ox&QtGwHmXU=HND1)gUq}8+2xM zS?Yc@4yO2OwUpuPICQ~2@KI=m_~m`huJS+FRQ_l1RQDc;oztC1%JaPY56L Date: Fri, 23 Mar 2012 20:21:19 +0100 Subject: [PATCH 67/70] Fixed memory leak in hash loading. --- src/rdb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/rdb.c b/src/rdb.c index 519b645d..1e23fa70 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -859,14 +859,17 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { /* Add pair to ziplist */ o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL); o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL); - /* Convert to hash table if size threshold is exceeded */ if (sdslen(field->ptr) > server.hash_max_ziplist_value || sdslen(value->ptr) > server.hash_max_ziplist_value) { + decrRefCount(field); + decrRefCount(value); hashTypeConvert(o, REDIS_ENCODING_HT); break; } + decrRefCount(field); + decrRefCount(value); } /* Load remaining fields and values into the hash table */ From 6c658d555487b024f7b9e3d873f0b1541e7197ee Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 23 Mar 2012 20:20:43 +0100 Subject: [PATCH 68/70] Contextualize comment. --- tests/integration/rdb.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl index fdc5495f..85c5db9f 100644 --- a/tests/integration/rdb.tcl +++ b/tests/integration/rdb.tcl @@ -1,6 +1,6 @@ set server_path [tmpdir "server.rdb-encoding-test"] -# Copy RDB with zipmap encoded hash to server path +# Copy RDB with different encodings in server path exec cp tests/assets/encodings.rdb $server_path start_server [list overrides [list "dir" $server_path "dbfilename" "encodings.rdb"]] { From c79373482fad7f6a3c558a133e56e5d614ab79f8 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 24 Mar 2012 11:42:20 +0100 Subject: [PATCH 69/70] convert-zipmap-hash-on-load test enabled --- tests/test_helper.tcl | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 4954b79e..bef8231a 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -31,6 +31,7 @@ set ::all_tests { integration/replication-3 integration/aof integration/rdb + integration/convert-zipmap-hash-on-load unit/pubsub unit/slowlog unit/scripting From 1b247d133351c747a3c6cce8cac64e6830ccab92 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 24 Mar 2012 11:52:56 +0100 Subject: [PATCH 70/70] Add used allocator in redis-server -v output. --- src/redis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis.c b/src/redis.c index bae852b8..2f27b4dd 100644 --- a/src/redis.c +++ b/src/redis.c @@ -2236,8 +2236,8 @@ void daemonize(void) { } void version() { - printf("Redis server version %s (%s:%d)\n", REDIS_VERSION, - redisGitSHA1(), atoi(redisGitDirty()) > 0); + printf("Redis server v=%s sha=%s:%d malloc=%s\n", REDIS_VERSION, + redisGitSHA1(), atoi(redisGitDirty()) > 0, ZMALLOC_LIB); exit(0); }