mirror of
https://github.com/fluencelabs/redis
synced 2025-03-19 17:10:50 +00:00
Defrag: fix comments & code to conform to the Redis code base.
Don't go over 80 cols. Start with captial letter, capital letter afer point, end comment with a point and so forth. No actual code behavior touched at all.
This commit is contained in:
parent
173d692bc2
commit
e91f0ea1b3
138
src/defrag.c
138
src/defrag.c
@ -58,14 +58,16 @@ void* activeDefragAlloc(void *ptr) {
|
|||||||
server.stat_active_defrag_misses++;
|
server.stat_active_defrag_misses++;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
/* if this run is more utilized than the average utilization in this bin (or it is full), skip it.
|
/* if this run is more utilized than the average utilization in this bin
|
||||||
* this will eventually move all the allocations from relatively empty runs into relatively full runs. */
|
* (or it is full), skip it. This will eventually move all the allocations
|
||||||
|
* from relatively empty runs into relatively full runs. */
|
||||||
if (run_util > bin_util || run_util == 1<<16) {
|
if (run_util > bin_util || run_util == 1<<16) {
|
||||||
server.stat_active_defrag_misses++;
|
server.stat_active_defrag_misses++;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
/* move this allocation to a new allocation.
|
/* move this allocation to a new allocation.
|
||||||
* make sure not to use the thread cache. so that we don't get back the same pointers we try to free */
|
* make sure not to use the thread cache. so that we don't get back the same
|
||||||
|
* pointers we try to free */
|
||||||
size = zmalloc_size(ptr);
|
size = zmalloc_size(ptr);
|
||||||
newptr = zmalloc_no_tcache(size);
|
newptr = zmalloc_no_tcache(size);
|
||||||
memcpy(newptr, ptr, size);
|
memcpy(newptr, ptr, size);
|
||||||
@ -99,7 +101,7 @@ robj *activeDefragStringOb(robj* ob, int *defragged) {
|
|||||||
if (ob->refcount!=1)
|
if (ob->refcount!=1)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/* try to defrag robj (only if not an EMBSTR type (handled below) */
|
/* try to defrag robj (only if not an EMBSTR type (handled below). */
|
||||||
if (ob->type!=OBJ_STRING || ob->encoding!=OBJ_ENCODING_EMBSTR) {
|
if (ob->type!=OBJ_STRING || ob->encoding!=OBJ_ENCODING_EMBSTR) {
|
||||||
if ((ret = activeDefragAlloc(ob))) {
|
if ((ret = activeDefragAlloc(ob))) {
|
||||||
ob = ret;
|
ob = ret;
|
||||||
@ -116,7 +118,8 @@ robj *activeDefragStringOb(robj* ob, int *defragged) {
|
|||||||
(*defragged)++;
|
(*defragged)++;
|
||||||
}
|
}
|
||||||
} else if (ob->encoding==OBJ_ENCODING_EMBSTR) {
|
} else if (ob->encoding==OBJ_ENCODING_EMBSTR) {
|
||||||
/* the sds is embedded in the object allocation, calculate the offset and update the pointer in the new allocation */
|
/* The sds is embedded in the object allocation, calculate the
|
||||||
|
* offset and update the pointer in the new allocation. */
|
||||||
long ofs = (intptr_t)ob->ptr - (intptr_t)ob;
|
long ofs = (intptr_t)ob->ptr - (intptr_t)ob;
|
||||||
if ((ret = activeDefragAlloc(ob))) {
|
if ((ret = activeDefragAlloc(ob))) {
|
||||||
ret->ptr = (void*)((intptr_t)ret + ofs);
|
ret->ptr = (void*)((intptr_t)ret + ofs);
|
||||||
@ -129,14 +132,16 @@ robj *activeDefragStringOb(robj* ob, int *defragged) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Defrag helper for dictEntries to be used during dict iteration (called on each step).
|
/* Defrag helper for dictEntries to be used during dict iteration (called on
|
||||||
* returns a stat of how many pointers were moved. */
|
* each step). Teturns a stat of how many pointers were moved. */
|
||||||
int dictIterDefragEntry(dictIterator *iter) {
|
int dictIterDefragEntry(dictIterator *iter) {
|
||||||
/* This function is a little bit dirty since it messes with the internals of the dict and it's iterator,
|
/* This function is a little bit dirty since it messes with the internals
|
||||||
* but the benefit is that it is very easy to use, and require no other chagnes in the dict. */
|
* of the dict and it's iterator, but the benefit is that it is very easy
|
||||||
|
* to use, and require no other chagnes in the dict. */
|
||||||
int defragged = 0;
|
int defragged = 0;
|
||||||
dictht *ht;
|
dictht *ht;
|
||||||
/* handle the next entry (if there is one), and update the pointer in the current entry. */
|
/* Handle the next entry (if there is one), and update the pointer in the
|
||||||
|
* current entry. */
|
||||||
if (iter->nextEntry) {
|
if (iter->nextEntry) {
|
||||||
dictEntry *newde = activeDefragAlloc(iter->nextEntry);
|
dictEntry *newde = activeDefragAlloc(iter->nextEntry);
|
||||||
if (newde) {
|
if (newde) {
|
||||||
@ -159,8 +164,8 @@ int dictIterDefragEntry(dictIterator *iter) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Defrag helper for dict main allocations (dict struct, and hash tables).
|
/* Defrag helper for dict main allocations (dict struct, and hash tables).
|
||||||
* receives a pointer to the dict* and implicitly updates it when the dict struct itself was moved.
|
* receives a pointer to the dict* and implicitly updates it when the dict
|
||||||
* returns a stat of how many pointers were moved. */
|
* struct itself was moved. Returns a stat of how many pointers were moved. */
|
||||||
int dictDefragTables(dict** dictRef) {
|
int dictDefragTables(dict** dictRef) {
|
||||||
dict *d = *dictRef;
|
dict *d = *dictRef;
|
||||||
dictEntry **newtable;
|
dictEntry **newtable;
|
||||||
@ -200,10 +205,12 @@ void zslUpdateNode(zskiplist *zsl, zskiplistNode *oldnode, zskiplistNode *newnod
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Defrag helper for sorted set.
|
/* Defrag helper for sorted set.
|
||||||
* Update the robj pointer, defrag the skiplist struct and return the new score reference.
|
* Update the robj pointer, defrag the skiplist struct and return the new score
|
||||||
* we may not access oldele pointer (not even the pointer stored in the skiplist), as it was already freed.
|
* reference. We may not access oldele pointer (not even the pointer stored in
|
||||||
* newele may be null, in which case we only need to defrag the skiplist, but not update the obj pointer.
|
* the skiplist), as it was already freed. Newele may be null, in which case we
|
||||||
* when return value is non-NULL, it is the score reference that must be updated in the dict record. */
|
* only need to defrag the skiplist, but not update the obj pointer.
|
||||||
|
* When return value is non-NULL, it is the score reference that must be updated
|
||||||
|
* in the dict record. */
|
||||||
double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) {
|
double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) {
|
||||||
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx;
|
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx;
|
||||||
int i;
|
int i;
|
||||||
@ -214,7 +221,9 @@ double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) {
|
|||||||
x = zsl->header;
|
x = zsl->header;
|
||||||
for (i = zsl->level-1; i >= 0; i--) {
|
for (i = zsl->level-1; i >= 0; i--) {
|
||||||
while (x->level[i].forward &&
|
while (x->level[i].forward &&
|
||||||
x->level[i].forward->ele != oldele && /* make sure not to access the ->obj pointer if it matches oldele */
|
x->level[i].forward->ele != oldele && /* make sure not to access the
|
||||||
|
->obj pointer if it matches
|
||||||
|
oldele */
|
||||||
(x->level[i].forward->score < score ||
|
(x->level[i].forward->score < score ||
|
||||||
(x->level[i].forward->score == score &&
|
(x->level[i].forward->score == score &&
|
||||||
sdscmp(x->level[i].forward->ele,ele) < 0)))
|
sdscmp(x->level[i].forward->ele,ele) < 0)))
|
||||||
@ -237,12 +246,13 @@ double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Utility function that replaces an old key pointer in the dictionary with a new pointer.
|
/* Utility function that replaces an old key pointer in the dictionary with a
|
||||||
* Additionally, we try to defrag the dictEntry in that dict.
|
* new pointer. Additionally, we try to defrag the dictEntry in that dict.
|
||||||
* oldkey mey be a dead pointer and should not be accessed (we get a pre-calculated hash value).
|
* Oldkey mey be a dead pointer and should not be accessed (we get a
|
||||||
* newkey may be null if the key pointer wasn't moved.
|
* pre-calculated hash value). Newkey may be null if the key pointer wasn't
|
||||||
* return value is the the dictEntry if found, or NULL if not found.
|
* moved. Return value is the the dictEntry if found, or NULL if not found.
|
||||||
* NOTE: this is very ugly code, but it let's us avoid the complication of doing a scan on another dict. */
|
* NOTE: this is very ugly code, but it let's us avoid the complication of
|
||||||
|
* doing a scan on another dict. */
|
||||||
dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, int *defragged) {
|
dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, int *defragged) {
|
||||||
dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash);
|
dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash);
|
||||||
if (deref) {
|
if (deref) {
|
||||||
@ -259,8 +269,9 @@ dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sd
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for each key we scan in the main dict, this function will attempt to defrag all the various pointers it has.
|
/* for each key we scan in the main dict, this function will attempt to defrag
|
||||||
* returns a stat of how many pointers were moved. */
|
* all the various pointers it has. Returns a stat of how many pointers were
|
||||||
|
* moved. */
|
||||||
int defargKey(redisDb *db, dictEntry *de) {
|
int defargKey(redisDb *db, dictEntry *de) {
|
||||||
sds keysds = dictGetKey(de);
|
sds keysds = dictGetKey(de);
|
||||||
robj *newob, *ob;
|
robj *newob, *ob;
|
||||||
@ -270,19 +281,19 @@ int defargKey(redisDb *db, dictEntry *de) {
|
|||||||
int defragged = 0;
|
int defragged = 0;
|
||||||
sds newsds;
|
sds newsds;
|
||||||
|
|
||||||
/* try to defrag the key name */
|
/* Try to defrag the key name. */
|
||||||
newsds = activeDefragSds(keysds);
|
newsds = activeDefragSds(keysds);
|
||||||
if (newsds)
|
if (newsds)
|
||||||
defragged++, de->key = newsds;
|
defragged++, de->key = newsds;
|
||||||
if (dictSize(db->expires)) {
|
if (dictSize(db->expires)) {
|
||||||
/* Dirty code:
|
/* Dirty code:
|
||||||
* i can't search in db->expires for that key after i already released the pointer it holds
|
* I can't search in db->expires for that key after i already released
|
||||||
* it won't be able to do the string compare */
|
* the pointer it holds it won't be able to do the string compare */
|
||||||
unsigned int hash = dictGetHash(db->dict, de->key);
|
unsigned int hash = dictGetHash(db->dict, de->key);
|
||||||
replaceSateliteDictKeyPtrAndOrDefragDictEntry(db->expires, keysds, newsds, hash, &defragged);
|
replaceSateliteDictKeyPtrAndOrDefragDictEntry(db->expires, keysds, newsds, hash, &defragged);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* try to defrag robj and / or string value */
|
/* Try to defrag robj and / or string value. */
|
||||||
ob = dictGetVal(de);
|
ob = dictGetVal(de);
|
||||||
if ((newob = activeDefragStringOb(ob, &defragged))) {
|
if ((newob = activeDefragStringOb(ob, &defragged))) {
|
||||||
de->v.val = newob;
|
de->v.val = newob;
|
||||||
@ -290,7 +301,7 @@ int defargKey(redisDb *db, dictEntry *de) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ob->type == OBJ_STRING) {
|
if (ob->type == OBJ_STRING) {
|
||||||
/* already handled in activeDefragStringOb */
|
/* Already handled in activeDefragStringOb. */
|
||||||
} else if (ob->type == OBJ_LIST) {
|
} else if (ob->type == OBJ_LIST) {
|
||||||
if (ob->encoding == OBJ_ENCODING_QUICKLIST) {
|
if (ob->encoding == OBJ_ENCODING_QUICKLIST) {
|
||||||
quicklist *ql = ob->ptr, *newql;
|
quicklist *ql = ob->ptr, *newql;
|
||||||
@ -400,7 +411,7 @@ int defargKey(redisDb *db, dictEntry *de) {
|
|||||||
return defragged;
|
return defragged;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* defrag scan callback for the main db dictionary */
|
/* Defrag scan callback for the main db dictionary. */
|
||||||
void defragScanCallback(void *privdata, const dictEntry *de) {
|
void defragScanCallback(void *privdata, const dictEntry *de) {
|
||||||
int defragged = defargKey((redisDb*)privdata, (dictEntry*)de);
|
int defragged = defargKey((redisDb*)privdata, (dictEntry*)de);
|
||||||
server.stat_active_defrag_hits += defragged;
|
server.stat_active_defrag_hits += defragged;
|
||||||
@ -410,8 +421,8 @@ void defragScanCallback(void *privdata, const dictEntry *de) {
|
|||||||
server.stat_active_defrag_key_misses++;
|
server.stat_active_defrag_key_misses++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* defrag scan callback for for each hash table bicket,
|
/* Defrag scan callback for for each hash table bicket,
|
||||||
* used in order to defrag the dictEntry allocations */
|
* used in order to defrag the dictEntry allocations. */
|
||||||
void defragDictBucketCallback(void *privdata, dictEntry **bucketref) {
|
void defragDictBucketCallback(void *privdata, dictEntry **bucketref) {
|
||||||
UNUSED(privdata);
|
UNUSED(privdata);
|
||||||
while(*bucketref) {
|
while(*bucketref) {
|
||||||
@ -424,15 +435,24 @@ void defragDictBucketCallback(void *privdata, dictEntry **bucketref) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Utility function to get the fragmentation ratio from jemalloc.
|
/* Utility function to get the fragmentation ratio from jemalloc.
|
||||||
* it is critical to do that by comparing only heap maps that belown to jemalloc, and skip ones the jemalloc keeps as spare.
|
* It is critical to do that by comparing only heap maps that belown to
|
||||||
* since we use this fragmentation ratio in order to decide if a defrag action should be taken or not,
|
* jemalloc, and skip ones the jemalloc keeps as spare. Since we use this
|
||||||
* a false detection can cause the defragmenter to waste a lot of CPU without the possibility of getting any results. */
|
* fragmentation ratio in order to decide if a defrag action should be taken
|
||||||
|
* or not, a false detection can cause the defragmenter to waste a lot of CPU
|
||||||
|
* without the possibility of getting any results. */
|
||||||
float getAllocatorFragmentation(size_t *out_frag_bytes) {
|
float getAllocatorFragmentation(size_t *out_frag_bytes) {
|
||||||
size_t epoch = 1, allocated = 0, resident = 0, active = 0, sz = sizeof(size_t);
|
size_t epoch = 1, allocated = 0, resident = 0, active = 0, sz = sizeof(size_t);
|
||||||
je_mallctl("epoch", &epoch, &sz, &epoch, sz); /* Update the statistics cached by mallctl. */
|
/* Update the statistics cached by mallctl. */
|
||||||
je_mallctl("stats.resident", &resident, &sz, NULL, 0); /* unlike RSS, this does not include RSS from shared libraries and other non heap mappings */
|
je_mallctl("epoch", &epoch, &sz, &epoch, sz);
|
||||||
je_mallctl("stats.active", &active, &sz, NULL, 0); /* unlike resident, this doesn't not include the pages jemalloc reserves for re-use (purge will clean that) */
|
/* Unlike RSS, this does not include RSS from shared libraries and other non
|
||||||
je_mallctl("stats.allocated", &allocated, &sz, NULL, 0); /* unlike zmalloc_used_memory, this matches the stats.resident by taking into account all allocations done by this process (not only zmalloc) */
|
* heap mappings. */
|
||||||
|
je_mallctl("stats.resident", &resident, &sz, NULL, 0);
|
||||||
|
/* Unlike resident, this doesn't not include the pages jemalloc reserves
|
||||||
|
* for re-use (purge will clean that). */
|
||||||
|
je_mallctl("stats.active", &active, &sz, NULL, 0);
|
||||||
|
/* Unlike zmalloc_used_memory, this matches the stats.resident by taking
|
||||||
|
* into account all allocations done by this process (not only zmalloc). */
|
||||||
|
je_mallctl("stats.allocated", &allocated, &sz, NULL, 0);
|
||||||
float frag_pct = ((float)active / allocated)*100 - 100;
|
float frag_pct = ((float)active / allocated)*100 - 100;
|
||||||
size_t frag_bytes = active - allocated;
|
size_t frag_bytes = active - allocated;
|
||||||
float rss_pct = ((float)resident / allocated)*100 - 100;
|
float rss_pct = ((float)resident / allocated)*100 - 100;
|
||||||
@ -461,24 +481,33 @@ void activeDefragCycle(void) {
|
|||||||
long long start, timelimit;
|
long long start, timelimit;
|
||||||
|
|
||||||
if (server.aof_child_pid!=-1 || server.rdb_child_pid!=-1)
|
if (server.aof_child_pid!=-1 || server.rdb_child_pid!=-1)
|
||||||
return; /* defragging memory while there's a fork will just do damage. */
|
return; /* Defragging memory while there's a fork will just do damage. */
|
||||||
|
|
||||||
/* once a second, check if we the fragmentation justfies starting a scan or making it more aggressive */
|
/* Once a second, check if we the fragmentation justfies starting a scan
|
||||||
|
* or making it more aggressive. */
|
||||||
run_with_period(1000) {
|
run_with_period(1000) {
|
||||||
size_t frag_bytes;
|
size_t frag_bytes;
|
||||||
float frag_pct = getAllocatorFragmentation(&frag_bytes);
|
float frag_pct = getAllocatorFragmentation(&frag_bytes);
|
||||||
/* if we're not already running, and below the threshold, exit. */
|
/* If we're not already running, and below the threshold, exit. */
|
||||||
if (!server.active_defrag_running) {
|
if (!server.active_defrag_running) {
|
||||||
if(frag_pct < server.active_defrag_threshold_lower || frag_bytes < server.active_defrag_ignore_bytes)
|
if(frag_pct < server.active_defrag_threshold_lower || frag_bytes < server.active_defrag_ignore_bytes)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* calculate the adaptive aggressiveness of the defrag */
|
/* Calculate the adaptive aggressiveness of the defrag */
|
||||||
int cpu_pct = INTERPOLATE(frag_pct, server.active_defrag_threshold_lower, server.active_defrag_threshold_upper,
|
int cpu_pct = INTERPOLATE(frag_pct,
|
||||||
server.active_defrag_cycle_min, server.active_defrag_cycle_max);
|
server.active_defrag_threshold_lower,
|
||||||
cpu_pct = LIMIT(cpu_pct, server.active_defrag_cycle_min, server.active_defrag_cycle_max);
|
server.active_defrag_threshold_upper,
|
||||||
/* we allow increasing the aggressiveness during a scan, but don't reduce it */
|
server.active_defrag_cycle_min,
|
||||||
if (!server.active_defrag_running || cpu_pct > server.active_defrag_running) {
|
server.active_defrag_cycle_max);
|
||||||
|
cpu_pct = LIMIT(cpu_pct,
|
||||||
|
server.active_defrag_cycle_min,
|
||||||
|
server.active_defrag_cycle_max);
|
||||||
|
/* We allow increasing the aggressiveness during a scan, but don't
|
||||||
|
* reduce it. */
|
||||||
|
if (!server.active_defrag_running ||
|
||||||
|
cpu_pct > server.active_defrag_running)
|
||||||
|
{
|
||||||
server.active_defrag_running = cpu_pct;
|
server.active_defrag_running = cpu_pct;
|
||||||
serverLog(LL_VERBOSE,
|
serverLog(LL_VERBOSE,
|
||||||
"Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%",
|
"Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%",
|
||||||
@ -495,7 +524,7 @@ void activeDefragCycle(void) {
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
if (!cursor) {
|
if (!cursor) {
|
||||||
/* Move on to next database, and stop if we reached the last one */
|
/* Move on to next database, and stop if we reached the last one. */
|
||||||
if (++current_db >= server.dbnum) {
|
if (++current_db >= server.dbnum) {
|
||||||
long long now = ustime();
|
long long now = ustime();
|
||||||
size_t frag_bytes;
|
size_t frag_bytes;
|
||||||
@ -512,7 +541,7 @@ void activeDefragCycle(void) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else if (current_db==0) {
|
else if (current_db==0) {
|
||||||
/* start a scan from the first database */
|
/* Start a scan from the first database. */
|
||||||
start_scan = ustime();
|
start_scan = ustime();
|
||||||
start_stat = server.stat_active_defrag_hits;
|
start_stat = server.stat_active_defrag_hits;
|
||||||
}
|
}
|
||||||
@ -523,8 +552,9 @@ void activeDefragCycle(void) {
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
cursor = dictScan(db->dict, cursor, defragScanCallback, defragDictBucketCallback, db);
|
cursor = dictScan(db->dict, cursor, defragScanCallback, defragDictBucketCallback, db);
|
||||||
/* once in 16 scan iterations, or 1000 pointer reallocations (if we have a lot of pointers in one hash bucket),
|
/* Once in 16 scan iterations, or 1000 pointer reallocations
|
||||||
* check if we reached the tiem limit */
|
* (if we have a lot of pointers in one hash bucket), check if we
|
||||||
|
* reached the tiem limit. */
|
||||||
if (cursor && (++iterations > 16 || server.stat_active_defrag_hits - defragged > 1000)) {
|
if (cursor && (++iterations > 16 || server.stat_active_defrag_hits - defragged > 1000)) {
|
||||||
if ((ustime() - start) > timelimit) {
|
if ((ustime() - start) > timelimit) {
|
||||||
return;
|
return;
|
||||||
@ -539,7 +569,7 @@ void activeDefragCycle(void) {
|
|||||||
#else /* HAVE_DEFRAG */
|
#else /* HAVE_DEFRAG */
|
||||||
|
|
||||||
void activeDefragCycle(void) {
|
void activeDefragCycle(void) {
|
||||||
/* not implemented yet*/
|
/* Not implemented yet. */
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user