From 5792a217f85633225209557d952ad2fa6a3fa0c0 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 6 Feb 2015 15:48:42 +0100 Subject: [PATCH] dict.c: add dictGetSomeKeys(), specialized for eviction. --- src/dict.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++- src/dict.h | 1 + src/redis.c | 2 +- 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/src/dict.c b/src/dict.c index c8aaf152..8f25c14b 100644 --- a/src/dict.c +++ b/src/dict.c @@ -664,7 +664,10 @@ dictEntry *dictGetRandomKey(dict *d) return he; } -/* This is a version of dictGetRandomKey() that is modified in order to +/* XXX: This is going to be removed soon and SPOP internals + * reimplemented. + * + * This is a version of dictGetRandomKey() that is modified in order to * return multiple entries by jumping at a random place of the hash table * and scanning linearly for entries. * @@ -735,6 +738,95 @@ unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count) { return stored; /* Never reached. */ } + +/* This function samples the dictionary to return a few keys from random + * locations. + * + * It does not guarantee to return all the keys specified in 'count', nor + * it does guarantee to return non-duplicated elements, however it will make + * some effort to do both things. + * + * Returned pointers to hash table entries are stored into 'des' that + * points to an array of dictEntry pointers. The array must have room for + * at least 'count' elements, that is the argument we pass to the function + * to tell how many random elements we need. + * + * The function returns the number of items stored into 'des', that may + * be less than 'count' if the hash table has less than 'count' elements + * inside, or if not enough elements were found in a reasonable amount of + * steps. + * + * Note that this function is not suitable when you need a good distribution + * of the returned items, but only when you need to "sample" a given number + * of continuous elements to run some kind of algorithm or to produce + * statistics. However the function is much faster than dictGetRandomKey() + * at producing N elements. */ +unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { + unsigned int j; /* internal hash table id, 0 or 1. */ + unsigned int tables; /* 1 or 2 tables? */ + unsigned int stored = 0, maxsizemask; + unsigned int maxsteps; + + if (dictSize(d) < count) count = dictSize(d); + maxsteps = count*10; + + /* Try to do a rehashing work proportional to 'count'. */ + for (j = 0; j < count; j++) { + if (dictIsRehashing(d)) + _dictRehashStep(d); + else + break; + } + + tables = dictIsRehashing(d) ? 2 : 1; + maxsizemask = d->ht[0].sizemask; + if (tables > 1 && maxsizemask < d->ht[1].sizemask) + maxsizemask = d->ht[1].sizemask; + + /* Pick a random point inside the larger table. */ + unsigned int i = random() & maxsizemask; + unsigned int emptylen = 0; /* Continuous empty entries so far. */ + while(stored < count && maxsteps--) { + for (j = 0; j < tables; j++) { + /* Invariant of the dict.c rehashing: up to the indexes already + * visited in ht[0] during the rehashing, there are no populated + * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */ + if (tables == 2 && j == 0 && i < d->rehashidx) { + /* Moreover, if we are currently out of range in the second + * table, there will be no elements in both tables up to + * the current rehashing index, so we jump if possible. + * (this happens when going from big to small table). */ + if (i >= d->ht[1].size) i = d->rehashidx; + continue; + } + if (i >= d->ht[j].size) continue; /* Out of range for this table. */ + dictEntry *he = d->ht[j].table[i]; + + /* Count contiguous empty buckets, and jump to other + * locations if they reach 'count' (with a minimum of 5). */ + if (he == NULL) { + emptylen++; + if (emptylen >= 5 && emptylen > count) { + i = random() & maxsizemask; + emptylen = 0; + } + } else { + while (he) { + /* Collect all the elements of the buckets found non + * empty while iterating. */ + *des = he; + des++; + he = he->next; + stored++; + if (stored == count) return stored; + } + } + } + i = (i+1) & maxsizemask; + } + return stored; +} + /* Function to reverse bits. Algorithm from: * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ static unsigned long rev(unsigned long v) { diff --git a/src/dict.h b/src/dict.h index 7421078f..5959a57b 100644 --- a/src/dict.h +++ b/src/dict.h @@ -164,6 +164,7 @@ dictIterator *dictGetSafeIterator(dict *d); dictEntry *dictNext(dictIterator *iter); void dictReleaseIterator(dictIterator *iter); dictEntry *dictGetRandomKey(dict *d); +unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count); unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count); void dictPrintStats(dict *d); unsigned int dictGenHashFunction(const void *key, int len); diff --git a/src/redis.c b/src/redis.c index b2f9ffc6..a1d8375e 100644 --- a/src/redis.c +++ b/src/redis.c @@ -3148,7 +3148,7 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn } #if 1 /* Use bulk get by default. */ - count = dictGetRandomKeys(sampledict,samples,server.maxmemory_samples); + count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples); #else count = server.maxmemory_samples; for (j = 0; j < count; j++) samples[j] = dictGetRandomKey(sampledict);