From ef59a8bc9ef426f9d24e701e9b73a6a03ddd3d0f Mon Sep 17 00:00:00 2001
From: antirez <antirez@gmail.com>
Date: Thu, 14 Oct 2010 13:52:58 +0200
Subject: [PATCH] Object approximated LRU algorithm enhanced / fixed /
 refactored. This is used for the VM currently but will soon be used for
 maxmemory expiring.

---
 src/db.c     |  5 +++--
 src/debug.c  |  6 ++++--
 src/object.c | 31 +++++++++++++++++++++++--------
 src/redis.c  | 18 ++++++++----------
 src/redis.h  |  2 ++
 src/vm.c     |  2 +-
 6 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/src/db.c b/src/db.c
index 44507847..c1ce79b5 100644
--- a/src/db.c
+++ b/src/db.c
@@ -11,6 +11,9 @@ robj *lookupKey(redisDb *db, robj *key) {
     if (de) {
         robj *val = dictGetEntryVal(de);
 
+        /* Update the access time for the aging algorithm. */
+        val->lru = server.lruclock;
+
         if (server.vm_enabled) {
             if (val->storage == REDIS_VM_MEMORY ||
                 val->storage == REDIS_VM_SWAPPING)
@@ -18,8 +21,6 @@ robj *lookupKey(redisDb *db, robj *key) {
                 /* If we were swapping the object out, cancel the operation */
                 if (val->storage == REDIS_VM_SWAPPING)
                     vmCancelThreadedIOJob(val);
-                /* Update the access time for the aging algorithm. */
-                val->lru = server.lruclock;
             } else {
                 int notify = (val->storage == REDIS_VM_LOADING);
 
diff --git a/src/debug.c b/src/debug.c
index 2f7ab58f..3b187da4 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -213,9 +213,11 @@ void debugCommand(redisClient *c) {
             strenc = strEncoding(val->encoding);
             addReplyStatusFormat(c,
                 "Value at:%p refcount:%d "
-                "encoding:%s serializedlength:%lld",
+                "encoding:%s serializedlength:%lld "
+                "lru     :%d lru_seconds_idle:%lu",
                 (void*)val, val->refcount,
-                strenc, (long long) rdbSavedObjectLen(val,NULL));
+                strenc, (long long) rdbSavedObjectLen(val,NULL),
+                val->lru, estimateObjectIdleTime(val));
         } else {
             vmpointer *vp = (vmpointer*) val;
             addReplyStatusFormat(c,
diff --git a/src/object.c b/src/object.c
index c1a08245..e7fa3742 100644
--- a/src/object.c
+++ b/src/object.c
@@ -19,14 +19,19 @@ robj *createObject(int type, void *ptr) {
     o->encoding = REDIS_ENCODING_RAW;
     o->ptr = ptr;
     o->refcount = 1;
-    if (server.vm_enabled) {
-        /* Note that this code may run in the context of an I/O thread
-         * and accessing server.lruclock in theory is an error
-         * (no locks). But in practice this is safe, and even if we read
-         * garbage Redis will not fail. */
-        o->lru = server.lruclock;
-        o->storage = REDIS_VM_MEMORY;
-    }
+    /* Set the LRU to the current lruclock (minutes resolution).
+     * We do this regardless of the fact VM is active as LRU is also
+     * used for the maxmemory directive when Redis is used as cache.
+     *
+     * Note that this code may run in the context of an I/O thread
+     * and accessing server.lruclock in theory is an error
+     * (no locks). But in practice this is safe, and even if we read
+     * garbage Redis will not fail. */
+    o->lru = server.lruclock;
+    /* The following is only needed if VM is active, but since the conditional
+     * is probably more costly than initializing the field it's better to
+     * have every field properly initialized anyway. */
+    o->storage = REDIS_VM_MEMORY;
     return o;
 }
 
@@ -433,3 +438,13 @@ char *strEncoding(int encoding) {
     default: return "unknown";
     }
 }
+
+/* Given an object returns the min number of seconds the object was never
+ * requested, using an approximated LRU algorithm. */
+unsigned long estimateObjectIdleTime(robj *o) {
+    if (server.lruclock >= o->lru) {
+        return (server.lruclock - o->lru) * 60;
+    } else {
+        return ((REDIS_LRU_CLOCK_MAX - o->lru) + server.lruclock) * 60;
+    }
+}
diff --git a/src/redis.c b/src/redis.c
index 27a855d9..774b3a81 100644
--- a/src/redis.c
+++ b/src/redis.c
@@ -490,19 +490,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
      * in objects at every object access, and accuracy is not needed.
      * To access a global var is faster than calling time(NULL) */
     server.unixtime = time(NULL);
-    /* We have just 21 bits per object for LRU information.
+    /* We have just 22 bits per object for LRU information.
      * So we use an (eventually wrapping) LRU clock with minutes resolution.
+     * 2^22 minutes are more than 7 years.
      *
-     * When we need to select what object to swap, we compute the minimum
-     * time distance between the current lruclock and the object last access
-     * lruclock info. Even if clocks will wrap on overflow, there is
-     * the interesting property that we are sure that at least
-     * ABS(A-B) minutes passed between current time and timestamp B.
-     *
-     * This is not precise but we don't need at all precision, but just
-     * something statistically reasonable.
+     * Note that even if this will wrap after 7 years it's not a problem,
+     * everything will still work but just some object will appear younger
+     * to Redis :)
      */
-    server.lruclock = (time(NULL)/60)&((1<<21)-1);
+    server.lruclock = (time(NULL)/60) & REDIS_LRU_CLOCK_MAX;
 
     /* We received a SIGTERM, shutting down here in a safe way, as it is
      * not ok doing so inside the signal handler. */
@@ -1165,6 +1161,7 @@ sds genRedisInfoString(void) {
         "process_id:%ld\r\n"
         "uptime_in_seconds:%ld\r\n"
         "uptime_in_days:%ld\r\n"
+        "lru_clock:%ld\r\n"
         "used_cpu_sys:%.2f\r\n"
         "used_cpu_user:%.2f\r\n"
         "used_cpu_sys_childrens:%.2f\r\n"
@@ -1196,6 +1193,7 @@ sds genRedisInfoString(void) {
         (long) getpid(),
         uptime,
         uptime/(3600*24),
+        (unsigned long) server.lruclock,
         (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000,
         (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000,
         (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000,
diff --git a/src/redis.h b/src/redis.h
index 3e9fc236..d768b184 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -211,6 +211,7 @@ void _redisPanic(char *msg, char *file, int line);
 /* A redis object, that is a type able to hold a string / list / set */
 
 /* The actual Redis Object */
+#define REDIS_LRU_CLOCK_MAX ((1<<21)-1) /* Max value of obj->lru */
 typedef struct redisObject {
     unsigned type:4;
     unsigned storage:2;     /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
@@ -678,6 +679,7 @@ int getLongLongFromObject(robj *o, long long *target);
 char *strEncoding(int encoding);
 int compareStringObjects(robj *a, robj *b);
 int equalStringObjects(robj *a, robj *b);
+unsigned long estimateObjectIdleTime(robj *o);
 
 /* Replication */
 void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
diff --git a/src/vm.c b/src/vm.c
index ee831fb9..1aad95d7 100644
--- a/src/vm.c
+++ b/src/vm.c
@@ -362,7 +362,7 @@ robj *vmPreviewObject(robj *o) {
 double computeObjectSwappability(robj *o) {
     /* actual age can be >= minage, but not < minage. As we use wrapping
      * 21 bit clocks with minutes resolution for the LRU. */
-    time_t minage = abs(server.lruclock - o->lru);
+    time_t minage = estimateObjectIdleTime(o);
     long asize = 0, elesize;
     robj *ele;
     list *l;