LRU: Make cross-database choices for eviction.

The LRU eviction code used to make local choices: for each DB visited it
selected the best key to evict. This was repeated for each DB. However
this means that there could be DBs with very frequently accessed keys
that are targeted by the LRU algorithm while there were other DBs with
many better candidates to expire.

This commit attempts to fix this problem for the LRU policy. However the
TTL policy is still not fixed by this commit. The TTL policy will be
fixed in a successive commit.

This is an initial (partial because of TTL policy) fix for issue #2647.
This commit is contained in:
antirez 2016-07-13 10:45:37 +02:00
parent e64bf05f43
commit e423f76e75
3 changed files with 158 additions and 107 deletions

View File

@ -50,8 +50,11 @@ struct evictionPoolEntry {
unsigned long long idle; /* Object idle time. */ unsigned long long idle; /* Object idle time. */
sds key; /* Key name. */ sds key; /* Key name. */
sds cached; /* Cached SDS object for key name. */ sds cached; /* Cached SDS object for key name. */
int dbid; /* Key DB number. */
}; };
static struct evictionPoolEntry *EvictionPoolLRU;
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* Implementation of eviction, aging and LRU * Implementation of eviction, aging and LRU
* --------------------------------------------------------------------------*/ * --------------------------------------------------------------------------*/
@ -114,7 +117,7 @@ unsigned long long estimateObjectIdleTime(robj *o) {
* evicted in the whole database. */ * evicted in the whole database. */
/* Create a new eviction pool. */ /* Create a new eviction pool. */
struct evictionPoolEntry *evictionPoolAlloc(void) { void evictionPoolAlloc(void) {
struct evictionPoolEntry *ep; struct evictionPoolEntry *ep;
int j; int j;
@ -123,8 +126,9 @@ struct evictionPoolEntry *evictionPoolAlloc(void) {
ep[j].idle = 0; ep[j].idle = 0;
ep[j].key = NULL; ep[j].key = NULL;
ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE); ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
ep[j].dbid = 0;
} }
return ep; EvictionPoolLRU = ep;
} }
/* This is an helper function for freeMemoryIfNeeded(), it is used in order /* This is an helper function for freeMemoryIfNeeded(), it is used in order
@ -136,8 +140,7 @@ struct evictionPoolEntry *evictionPoolAlloc(void) {
* idle time are on the left, and keys with the higher idle time on the * idle time are on the left, and keys with the higher idle time on the
* right. */ * right. */
#define EVICTION_SAMPLES_ARRAY_SIZE 16 void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
int j, k, count; int j, k, count;
dictEntry *samples[server.maxmemory_samples]; dictEntry *samples[server.maxmemory_samples];
@ -176,15 +179,21 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn
if (pool[EVPOOL_SIZE-1].key == NULL) { if (pool[EVPOOL_SIZE-1].key == NULL) {
/* Free space on the right? Insert at k shifting /* Free space on the right? Insert at k shifting
* all the elements from k to end to the right. */ * all the elements from k to end to the right. */
/* Save SDS before overwriting. */
sds cached = pool[EVPOOL_SIZE-1].cached;
memmove(pool+k+1,pool+k, memmove(pool+k+1,pool+k,
sizeof(pool[0])*(EVPOOL_SIZE-k-1)); sizeof(pool[0])*(EVPOOL_SIZE-k-1));
pool[k].cached = cached;
} else { } else {
/* No free space on right? Insert at k-1 */ /* No free space on right? Insert at k-1 */
k--; k--;
/* Shift all elements on the left of k (included) to the /* Shift all elements on the left of k (included) to the
* left, so we discard the element with smaller idle time. */ * left, so we discard the element with smaller idle time. */
sds cached = pool[0].cached; /* Save SDS before overwriting. */
if (pool[0].key != pool[0].cached) sdsfree(pool[0].key); if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
memmove(pool,pool+1,sizeof(pool[0])*k); memmove(pool,pool+1,sizeof(pool[0])*k);
pool[k].cached = cached;
} }
} }
@ -201,6 +210,7 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn
pool[k].key = pool[k].cached; pool[k].key = pool[k].cached;
} }
pool[k].idle = idle; pool[k].idle = idle;
pool[k].dbid = dbid;
} }
} }
@ -249,44 +259,48 @@ int freeMemoryIfNeeded(void) {
latencyStartMonitor(latency); latencyStartMonitor(latency);
while (mem_freed < mem_tofree) { while (mem_freed < mem_tofree) {
int j, k, keys_freed = 0; int j, k, i, keys_freed = 0;
static int next_db = 0;
for (j = 0; j < server.dbnum; j++) {
long bestval = 0; /* just to prevent warning */
sds bestkey = NULL; sds bestkey = NULL;
dictEntry *de; int bestdbid;
redisDb *db = server.db+j; redisDb *db;
dict *dict; dict *dict;
dictEntry *de;
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU || if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM)
{
dict = server.db[j].dict;
} else {
dict = server.db[j].expires;
}
if (dictSize(dict) == 0) continue;
/* volatile-random and allkeys-random policy */
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
{
de = dictGetRandomKey(dict);
bestkey = dictGetKey(de);
}
/* volatile-lru and allkeys-lru policy */
else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU) server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU)
{ {
struct evictionPoolEntry *pool = db->eviction_pool; struct evictionPoolEntry *pool = EvictionPoolLRU;
while(bestkey == NULL) { while(bestkey == NULL) {
evictionPoolPopulate(dict, db->dict, db->eviction_pool); unsigned long total_keys = 0, keys;
/* We don't want to make local-db choices when expiring keys,
* so to start populate the eviction pool sampling keys from
* every DB. */
for (i = 0; i < server.dbnum; i++) {
db = server.db+i;
dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) ?
db->dict : db->expires;
if ((keys = dictSize(dict)) != 0) {
evictionPoolPopulate(i, dict, db->dict, pool);
total_keys += keys;
}
}
if (!total_keys) break; /* No keys to evict. */
/* Go backward from best to worst element to evict. */ /* Go backward from best to worst element to evict. */
for (k = EVPOOL_SIZE-1; k >= 0; k--) { for (k = EVPOOL_SIZE-1; k >= 0; k--) {
if (pool[k].key == NULL) continue; if (pool[k].key == NULL) continue;
de = dictFind(dict,pool[k].key); bestdbid = pool[k].dbid;
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU) {
de = dictFind(server.db[pool[k].dbid].dict,
pool[k].key);
} else {
de = dictFind(server.db[pool[k].dbid].expires,
pool[k].key);
}
/* Remove the entry from the pool. */ /* Remove the entry from the pool. */
if (pool[k].key != pool[k].cached) if (pool[k].key != pool[k].cached)
@ -300,34 +314,72 @@ int freeMemoryIfNeeded(void) {
bestkey = dictGetKey(de); bestkey = dictGetKey(de);
break; break;
} else { } else {
/* Ghost... */ /* Ghost... Iterate again. */
continue;
} }
} }
} }
} }
/* volatile-random and allkeys-random policy */
else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
{
/* When evicting a random key, we try to evict a key for
* each DB, so we use the static 'next_db' variable to
* incrementally visit all DBs. */
for (i = 0; i < server.dbnum; i++) {
j = (++next_db) % server.dbnum;
db = server.db+j;
dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
db->dict : db->expires;
if (dictSize(dict) != 0) {
de = dictGetRandomKey(dict);
bestkey = dictGetKey(de);
bestdbid = j;
break;
}
}
}
/* volatile-ttl */ /* volatile-ttl */
else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
long bestttl = 0; /* Initialized to avoid warning. */
/* In this policy we scan a single DB per iteration (visiting
* a different DB per call), expiring the key with the smallest
* TTL among the few sampled.
*
* Note that this algorithm makes local-DB choices, and should
* use a pool and code more similr to the one used in the
* LRU eviction policies in the future. */
for (i = 0; i < server.dbnum; i++) {
j = (++next_db) % server.dbnum;
db = server.db+j;
dict = db->expires;
if (dictSize(dict) != 0) {
for (k = 0; k < server.maxmemory_samples; k++) { for (k = 0; k < server.maxmemory_samples; k++) {
sds thiskey; sds thiskey;
long thisval; long thisttl;
de = dictGetRandomKey(dict); de = dictGetRandomKey(dict);
thiskey = dictGetKey(de); thiskey = dictGetKey(de);
thisval = (long) dictGetVal(de); thisttl = (long) dictGetVal(de);
/* Expire sooner (minor expire unix timestamp) is better /* Keys expiring sooner (smaller unix timestamp) are
* candidate for deletion */ * better candidates for deletion */
if (bestkey == NULL || thisval < bestval) { if (bestkey == NULL || thisttl < bestttl) {
bestkey = thiskey; bestkey = thiskey;
bestval = thisval; bestttl = thisttl;
bestdbid = j;
}
}
} }
} }
} }
/* Finally remove the selected key. */ /* Finally remove the selected key. */
if (bestkey) { if (bestkey) {
db = server.db+bestdbid;
robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
propagateExpire(db,keyobj,server.lazyfree_lazy_eviction); propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
/* We compute the amount of memory freed by db*Delete() alone. /* We compute the amount of memory freed by db*Delete() alone.
@ -361,7 +413,7 @@ int freeMemoryIfNeeded(void) {
* transmission here inside the loop. */ * transmission here inside the loop. */
if (slaves) flushSlavesOutputBuffers(); if (slaves) flushSlavesOutputBuffers();
} }
}
if (!keys_freed) { if (!keys_freed) {
latencyEndMonitor(latency); latencyEndMonitor(latency);
latencyAddSampleIfNeeded("eviction-cycle",latency); latencyAddSampleIfNeeded("eviction-cycle",latency);

View File

@ -1748,10 +1748,10 @@ void initServer(void) {
server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL); server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL); server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL);
server.db[j].watched_keys = dictCreate(&keylistDictType,NULL); server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
server.db[j].eviction_pool = evictionPoolAlloc();
server.db[j].id = j; server.db[j].id = j;
server.db[j].avg_ttl = 0; server.db[j].avg_ttl = 0;
} }
evictionPoolAlloc(); /* Initialize the LRU keys pool. */
server.pubsub_channels = dictCreate(&keylistDictType,NULL); server.pubsub_channels = dictCreate(&keylistDictType,NULL);
server.pubsub_patterns = listCreate(); server.pubsub_patterns = listCreate();
listSetFreeMethod(server.pubsub_patterns,freePubsubPattern); listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);

View File

@ -558,10 +558,9 @@ struct evictionPoolEntry; /* Defined in evict.c */
typedef struct redisDb { typedef struct redisDb {
dict *dict; /* The keyspace for this DB */ dict *dict; /* The keyspace for this DB */
dict *expires; /* Timeout of keys with a timeout set */ dict *expires; /* Timeout of keys with a timeout set */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/
dict *ready_keys; /* Blocked keys that received a PUSH */ dict *ready_keys; /* Blocked keys that received a PUSH */
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
struct evictionPoolEntry *eviction_pool; /* Eviction pool of keys */
int id; /* Database ID */ int id; /* Database ID */
long long avg_ttl; /* Average TTL, just for stats */ long long avg_ttl; /* Average TTL, just for stats */
} redisDb; } redisDb;
@ -1606,7 +1605,7 @@ void disconnectAllBlockedClients(void);
void activeExpireCycle(int type); void activeExpireCycle(int type);
/* evict.c -- maxmemory handling and LRU eviction. */ /* evict.c -- maxmemory handling and LRU eviction. */
struct evictionPoolEntry *evictionPoolAlloc(void); void evictionPoolAlloc(void);
/* Git SHA1 */ /* Git SHA1 */
char *redisGitSHA1(void); char *redisGitSHA1(void);