mirror of
https://github.com/fluencelabs/redis
synced 2025-03-17 16:10:50 +00:00
SPOP: reimplemented for speed and better distribution.
The old version of SPOP with "count" argument used an API call of dict.c which was actually designed for a different goal, and was not capable of good distribution. We follow a different three-cases approach optimized for different ratiion between sets and requested number of elements. The implementation is simpler and allowed the removal of a large amount of code.
This commit is contained in:
parent
55003f7a11
commit
9feee428f2
75
src/dict.c
75
src/dict.c
@ -664,81 +664,6 @@ dictEntry *dictGetRandomKey(dict *d)
|
||||
return he;
|
||||
}
|
||||
|
||||
/* XXX: This is going to be removed soon and SPOP internals
|
||||
* reimplemented.
|
||||
*
|
||||
* This is a version of dictGetRandomKey() that is modified in order to
|
||||
* return multiple entries by jumping at a random place of the hash table
|
||||
* and scanning linearly for entries.
|
||||
*
|
||||
* Returned pointers to hash table entries are stored into 'des' that
|
||||
* points to an array of dictEntry pointers. The array must have room for
|
||||
* at least 'count' elements, that is the argument we pass to the function
|
||||
* to tell how many random elements we need.
|
||||
*
|
||||
* The function returns the number of items stored into 'des', that may
|
||||
* be less than 'count' if the hash table has less than 'count' elements
|
||||
* inside.
|
||||
*
|
||||
* Note that this function is not suitable when you need a good distribution
|
||||
* of the returned items, but only when you need to "sample" a given number
|
||||
* of continuous elements to run some kind of algorithm or to produce
|
||||
* statistics. However the function is much faster than dictGetRandomKey()
|
||||
* at producing N elements, and the elements are guaranteed to be non
|
||||
* repeating. */
|
||||
unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count) {
|
||||
unsigned int j; /* internal hash table id, 0 or 1. */
|
||||
unsigned int tables; /* 1 or 2 tables? */
|
||||
unsigned int stored = 0, maxsizemask;
|
||||
|
||||
if (dictSize(d) < count) count = dictSize(d);
|
||||
|
||||
/* Try to do a rehashing work proportional to 'count'. */
|
||||
for (j = 0; j < count; j++) {
|
||||
if (dictIsRehashing(d))
|
||||
_dictRehashStep(d);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
tables = dictIsRehashing(d) ? 2 : 1;
|
||||
maxsizemask = d->ht[0].sizemask;
|
||||
if (tables > 1 && maxsizemask < d->ht[1].sizemask)
|
||||
maxsizemask = d->ht[1].sizemask;
|
||||
|
||||
/* Pick a random point inside the larger table. */
|
||||
unsigned int i = random() & maxsizemask;
|
||||
while(stored < count) {
|
||||
for (j = 0; j < tables; j++) {
|
||||
/* Invariant of the dict.c rehashing: up to the indexes already
|
||||
* visited in ht[0] during the rehashing, there are no populated
|
||||
* buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
|
||||
if (tables == 2 && j == 0 && i < d->rehashidx) {
|
||||
/* Moreover, if we are currently out of range in the second
|
||||
* table, there will be no elements in both tables up to
|
||||
* the current rehashing index, so we jump if possible.
|
||||
* (this happens when going from big to small table). */
|
||||
if (i >= d->ht[1].size) i = d->rehashidx;
|
||||
continue;
|
||||
}
|
||||
if (i >= d->ht[j].size) continue; /* Out of range for this table. */
|
||||
dictEntry *he = d->ht[j].table[i];
|
||||
while (he) {
|
||||
/* Collect all the elements of the buckets found non
|
||||
* empty while iterating. */
|
||||
*des = he;
|
||||
des++;
|
||||
he = he->next;
|
||||
stored++;
|
||||
if (stored == count) return stored;
|
||||
}
|
||||
}
|
||||
i = (i+1) & maxsizemask;
|
||||
}
|
||||
return stored; /* Never reached. */
|
||||
}
|
||||
|
||||
|
||||
/* This function samples the dictionary to return a few keys from random
|
||||
* locations.
|
||||
*
|
||||
|
@ -165,7 +165,6 @@ dictEntry *dictNext(dictIterator *iter);
|
||||
void dictReleaseIterator(dictIterator *iter);
|
||||
dictEntry *dictGetRandomKey(dict *d);
|
||||
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
|
||||
unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count);
|
||||
void dictPrintStats(dict *d);
|
||||
unsigned int dictGenHashFunction(const void *key, int len);
|
||||
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
|
||||
|
84
src/intset.c
84
src/intset.c
@ -261,90 +261,6 @@ int64_t intsetRandom(intset *is) {
|
||||
return _intsetGet(is,rand()%intrev32ifbe(is->length));
|
||||
}
|
||||
|
||||
/* How many times bigger should the set length be compared to the requested
|
||||
* count of members for us to use the Floyd algorithm instead of
|
||||
* the Knuth algorithm */
|
||||
#define RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO (2)
|
||||
|
||||
/* Copies 'count' random members from the set into the 'values' array.
|
||||
* 'values' must be an array of int64_t values, of length 'count'.
|
||||
* Returns the amount of items returned. If this amount is less than 'count',
|
||||
* then the remaining 'values' are left uninitialized. */
|
||||
int intsetRandomMembers(intset *is, int64_t* values, int count) {
|
||||
|
||||
/* We don't check that is and values are non-NULL - the caller must
|
||||
* play nice. */
|
||||
|
||||
int length = intsetLen(is);
|
||||
|
||||
if (count > length) {
|
||||
/* Return everything in the set */
|
||||
count = length;
|
||||
}
|
||||
|
||||
/* Choose between the Knuth shuffle algorithm, O(1) space, O(length) time,
|
||||
* and the Floyd algorithm, O(length) space, O(count) time. */
|
||||
if ((RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO * count) > length) {
|
||||
|
||||
/* If the count of members requested is almost the length of the set,
|
||||
* use the Knuth shuffle algorithm, O(1) space, O(length) time. */
|
||||
|
||||
/* First, fill the values array with unique random indexes inside
|
||||
* the set. */
|
||||
int in, im, rn, rm;
|
||||
im = 0;
|
||||
for (in = 0; in < length && im < count; in++) {
|
||||
|
||||
rn = length - in;
|
||||
rm = count - im;
|
||||
if (rand() % rn < rm) {
|
||||
values[im++] = in;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
/* If the length is considerably more than the count of members
|
||||
* requested, use Robert Floyd's algorithm, O(length) space,
|
||||
* O(count) time.
|
||||
* Based on Jon Bentley's Programming Pearls */
|
||||
|
||||
int64_t *is_used = zcalloc(sizeof(int64_t) * length);
|
||||
int in, im, r;
|
||||
|
||||
r = 0;
|
||||
im = 0;
|
||||
|
||||
for (in = length - count; in < length && im < count; in++) {
|
||||
|
||||
/* Generate a random number r */
|
||||
r = rand() % (in + 1);
|
||||
|
||||
/* Do we already have the value in r? */
|
||||
if (is_used[r]) {
|
||||
/* Use in instead of the generated number */
|
||||
r = in;
|
||||
}
|
||||
|
||||
values[im++] = r ;
|
||||
|
||||
/* Mark it as used */
|
||||
is_used[r] = 1;
|
||||
}
|
||||
|
||||
zfree(is_used);
|
||||
}
|
||||
|
||||
/* Replace each random index with the value stored there in the intset */
|
||||
uint8_t encoding = intrev32ifbe(is->encoding);
|
||||
for (int currentValue = 0; currentValue < count; currentValue++) {
|
||||
values[currentValue] =
|
||||
_intsetGetEncoded(is, values[currentValue], encoding);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Sets the value to the value at the given position. When this position is
|
||||
* out of range the function returns 0, when in range it returns 1. */
|
||||
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
|
||||
|
@ -43,7 +43,6 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
|
||||
intset *intsetRemove(intset *is, int64_t value, int *success);
|
||||
uint8_t intsetFind(intset *is, int64_t value);
|
||||
int64_t intsetRandom(intset *is);
|
||||
int intsetRandomMembers(intset *is, int64_t* value, int count);
|
||||
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
|
||||
uint32_t intsetLen(intset *is);
|
||||
size_t intsetBlobLen(intset *is);
|
||||
|
220
src/t_set.c
220
src/t_set.c
@ -212,106 +212,6 @@ int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele) {
|
||||
return setobj->encoding;
|
||||
}
|
||||
|
||||
/* Return a number of random elements from a non empty set.
|
||||
*
|
||||
* This is a version of setTypeRandomElement() that is modified in order to
|
||||
* return multiple entries, using dictGetRandomKeys() and intsetRandomMembers().
|
||||
*
|
||||
* The elements are stored into 'aux_set' which should be of a set type.
|
||||
*
|
||||
* The function returns the number of items stored into 'aux_set', that may
|
||||
* be less than 'count' if the hash table has less than 'count' elements
|
||||
* inside.
|
||||
*
|
||||
* Note that this function is not suitable when you need a good distribution
|
||||
* of the returned items, but only when you need to "sample" a given number
|
||||
* of continuous elements to run some kind of algorithm or to produce
|
||||
* statistics. However the function is much faster than setTypeRandomElement()
|
||||
* at producing N elements, and the elements are guaranteed to be non
|
||||
* repeating.
|
||||
*/
|
||||
unsigned long setTypeRandomElements(robj *set, unsigned long count,
|
||||
robj *aux_set) {
|
||||
unsigned long set_size;
|
||||
unsigned long elements_to_return = count;
|
||||
unsigned long elements_copied = 0;
|
||||
unsigned long current_element = 0;
|
||||
|
||||
/* Like all setType* functions, we assume good behavior on part of the
|
||||
* caller, so no extra parameter checks are made. */
|
||||
|
||||
/* If the number of elements in the the set is less than the count
|
||||
* requested, just return all of them. */
|
||||
set_size = setTypeSize(set);
|
||||
if (set_size < count) {
|
||||
elements_to_return = set_size;
|
||||
}
|
||||
|
||||
/* TODO: It is definitely faster adding items to the set by directly
|
||||
* handling the Dict or intset inside it, avoiding the constant encoding
|
||||
* checks inside setTypeAdd(). However, We don't want to touch the set
|
||||
* internals in non setType* functions. So, we just call setTypeAdd()
|
||||
* multiple times, but this isn't an optimal solution.
|
||||
* Another option would be to create a bulk-add function:
|
||||
* setTypeAddBulk(). */
|
||||
if (set->encoding == REDIS_ENCODING_HT) {
|
||||
/* Allocate result array */
|
||||
dictEntry **random_elements =
|
||||
zmalloc(sizeof(dictEntry*) * elements_to_return);
|
||||
|
||||
/* Get the random elements */
|
||||
elements_copied =
|
||||
dictGetRandomKeys(set->ptr, random_elements, elements_to_return);
|
||||
redisAssert(elements_copied == elements_to_return);
|
||||
|
||||
/* Put them into the set */
|
||||
for (current_element = 0; current_element < elements_copied;
|
||||
current_element++) {
|
||||
|
||||
/* We get the key and duplicate it, as we know it is a string */
|
||||
setTypeAdd(aux_set,
|
||||
dictGetKey(random_elements[current_element]));
|
||||
}
|
||||
|
||||
zfree(random_elements);
|
||||
|
||||
} else if (set->encoding == REDIS_ENCODING_INTSET) {
|
||||
/* Allocate result array */
|
||||
int64_t *random_elements =
|
||||
zmalloc(sizeof(int64_t) * elements_to_return);
|
||||
robj* element_as_str = NULL;
|
||||
|
||||
elements_copied =
|
||||
intsetRandomMembers((intset*) set->ptr,
|
||||
random_elements,
|
||||
elements_to_return);
|
||||
|
||||
redisAssert(elements_copied == elements_to_return);
|
||||
|
||||
/* Put them into the set */
|
||||
for (current_element = 0; current_element < elements_copied;
|
||||
current_element++) {
|
||||
|
||||
element_as_str = createStringObjectFromLongLong(
|
||||
random_elements[current_element]);
|
||||
|
||||
/* Put the values in the set */
|
||||
setTypeAdd(aux_set,
|
||||
element_as_str);
|
||||
|
||||
decrRefCount(element_as_str);
|
||||
}
|
||||
|
||||
zfree(random_elements);
|
||||
} else {
|
||||
redisPanic("Unknown set encoding");
|
||||
}
|
||||
|
||||
/* We have a set with random elements. Return the actual elements in
|
||||
the aux_set. */
|
||||
return elements_copied;
|
||||
}
|
||||
|
||||
unsigned long setTypeSize(robj *subject) {
|
||||
if (subject->encoding == REDIS_ENCODING_HT) {
|
||||
return dictSize((dict*)subject->ptr);
|
||||
@ -485,15 +385,18 @@ void scardCommand(redisClient *c) {
|
||||
addReplyLongLong(c,setTypeSize(o));
|
||||
}
|
||||
|
||||
/* handle the "SPOP key <count>" variant. The normal version of the
|
||||
/* Handle the "SPOP key <count>" variant. The normal version of the
|
||||
* command is handled by the spopCommand() function itself. */
|
||||
|
||||
/* How many times bigger should be the set compared to the remaining size
|
||||
* for us to use the "create new set" strategy? Read later in the
|
||||
* implementation for more info. */
|
||||
#define SPOP_MOVE_STRATEGY_MUL 5
|
||||
|
||||
void spopWithCountCommand(redisClient *c) {
|
||||
long l;
|
||||
unsigned long count, size;
|
||||
unsigned long elements_returned;
|
||||
robj *set, *aux_set;
|
||||
int64_t llele;
|
||||
robj *set;
|
||||
|
||||
/* Get the count argument */
|
||||
if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != REDIS_OK) return;
|
||||
@ -516,12 +419,11 @@ void spopWithCountCommand(redisClient *c) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Get the size of the set. It is always > 0, as empty sets get
|
||||
* deleted. */
|
||||
size = setTypeSize(set);
|
||||
|
||||
/* Generate an SPOP keyspace notification */
|
||||
notifyKeyspaceEvent(REDIS_NOTIFY_SET,"spop",c->argv[1],c->db->id);
|
||||
server.dirty += count;
|
||||
|
||||
/* CASE 1:
|
||||
* The number of requested elements is greater than or equal to
|
||||
@ -534,64 +436,102 @@ void spopWithCountCommand(redisClient *c) {
|
||||
dbDelete(c->db,c->argv[1]);
|
||||
notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
|
||||
|
||||
/* Replicate/AOF this command as an SREM operation */
|
||||
/* Propagate this command as an DEL operation */
|
||||
rewriteClientCommandVector(c,2,shared.del,c->argv[1]);
|
||||
signalModifiedKey(c->db,c->argv[1]);
|
||||
server.dirty++;
|
||||
return;
|
||||
}
|
||||
|
||||
/* CASE 2:
|
||||
* The number of requested elements is less than the number
|
||||
* of elements inside the set. */
|
||||
/* Case 2 and 3 require to replicate SPOP as a set of SERM commands.
|
||||
* Prepare our replication argument vector. Also send the array length
|
||||
* which is common to both the code paths. */
|
||||
robj *propargv[3];
|
||||
propargv[0] = createStringObject("SREM",4);
|
||||
propargv[1] = c->argv[1];
|
||||
addReplyMultiBulkLen(c,count);
|
||||
|
||||
/* We need an auxiliary set. Optimistically, we create a set using an
|
||||
* Intset internally. */
|
||||
aux_set = createIntsetObject();
|
||||
/* Common iteration vars. */
|
||||
robj *objele;
|
||||
int encoding;
|
||||
int64_t llele;
|
||||
unsigned long remaining = size-count; /* Elements left after SPOP. */
|
||||
|
||||
/* Get the count requested of random elements from the set into our
|
||||
* auxiliary set. */
|
||||
elements_returned = setTypeRandomElements(set, count, aux_set);
|
||||
redisAssert(elements_returned == count);
|
||||
|
||||
{
|
||||
setTypeIterator *si;
|
||||
robj *objele, *propargv[3];
|
||||
int element_encoding;
|
||||
|
||||
addReplyMultiBulkLen(c, elements_returned);
|
||||
propargv[0] = createStringObject("SREM",4);
|
||||
propargv[1] = c->argv[1];
|
||||
|
||||
si = setTypeInitIterator(aux_set);
|
||||
while ((element_encoding = setTypeNext(si, &objele, &llele)) != -1) {
|
||||
if (element_encoding == REDIS_ENCODING_HT) {
|
||||
incrRefCount(objele);
|
||||
} else if (element_encoding == REDIS_ENCODING_INTSET) {
|
||||
/* If we are here, the number of requested elements is less than the
|
||||
* number of elements inside the set. Also we are sure that count < size.
|
||||
* Use two different strategies.
|
||||
*
|
||||
* CASE 2: The number of elements to return is small compared to the
|
||||
* set size. We can just extract random elements and return them to
|
||||
* the set. */
|
||||
if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
|
||||
while(count--) {
|
||||
encoding = setTypeRandomElement(set,&objele,&llele);
|
||||
if (encoding == REDIS_ENCODING_INTSET) {
|
||||
objele = createStringObjectFromLongLong(llele);
|
||||
} else {
|
||||
redisPanic("Unknown set encoding");
|
||||
incrRefCount(objele);
|
||||
}
|
||||
setTypeRemove(set, objele);
|
||||
addReplyBulk(c, objele);
|
||||
|
||||
/* Return the element to the client and remove from the set. */
|
||||
addReplyBulk(c,objele);
|
||||
setTypeRemove(set,objele);
|
||||
|
||||
/* Replicate/AOF this command as an SREM operation */
|
||||
propargv[2] = objele;
|
||||
alsoPropagate(server.sremCommand,c->db->id,propargv,3,REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL);
|
||||
|
||||
alsoPropagate(server.sremCommand,c->db->id,propargv,3,
|
||||
REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL);
|
||||
decrRefCount(objele);
|
||||
server.dirty++;
|
||||
}
|
||||
decrRefCount(propargv[0]);
|
||||
} else {
|
||||
/* CASE 3: The number of elements to return is very big, approaching
|
||||
* the size of the set itself. After some time extracting random elements
|
||||
* from such a set becomes computationally expensive, so we use
|
||||
* a different strategy, we extract random elements that we don't
|
||||
* want to return (the elements that will remain part of the set),
|
||||
* creating a new set as we do this (that will be stored as the original
|
||||
* set). Then we return the elements left in the original set and
|
||||
* release it. */
|
||||
robj *newset = NULL;
|
||||
|
||||
/* Create a new set with just the remaining elements. */
|
||||
while(remaining--) {
|
||||
encoding = setTypeRandomElement(set,&objele,&llele);
|
||||
if (encoding == REDIS_ENCODING_INTSET) {
|
||||
objele = createStringObjectFromLongLong(llele);
|
||||
} else {
|
||||
incrRefCount(objele);
|
||||
}
|
||||
if (!newset) newset = setTypeCreate(objele);
|
||||
setTypeAdd(newset,objele);
|
||||
setTypeRemove(set,objele);
|
||||
decrRefCount(objele);
|
||||
}
|
||||
|
||||
/* Assign the new set as the key value. */
|
||||
incrRefCount(set); /* Protect the old set value. */
|
||||
dbOverwrite(c->db,c->argv[1],newset);
|
||||
|
||||
/* Tranfer the old set to the client and release it. */
|
||||
setTypeIterator *si;
|
||||
si = setTypeInitIterator(set);
|
||||
while((encoding = setTypeNext(si,&objele,&llele)) != -1) {
|
||||
if (encoding == REDIS_ENCODING_INTSET) {
|
||||
addReplyBulkLongLong(c,llele);
|
||||
} else {
|
||||
addReplyBulk(c,objele);
|
||||
}
|
||||
}
|
||||
setTypeReleaseIterator(si);
|
||||
decrRefCount(set);
|
||||
}
|
||||
|
||||
/* Don't propagate the command itself even if we incremented the
|
||||
* dirty counter. We don't want to propagate an SPOP command since
|
||||
* we propagated the command as a set of SREMs operations using
|
||||
* the alsoPropagate() API. */
|
||||
decrRefCount(propargv[0]);
|
||||
preventCommandPropagation(c);
|
||||
decrRefCount(aux_set);
|
||||
}
|
||||
|
||||
void spopCommand(redisClient *c) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user