From 156929ee974ce968585144b0d0226e56622e6add Mon Sep 17 00:00:00 2001
From: antirez <antirez@gmail.com>
Date: Fri, 28 Mar 2014 16:24:35 +0100
Subject: [PATCH] HLLADD implemented.

---
 src/hyperloglog.c | 94 +++++++++++++++++++++++++++++++++++++++++++++--
 src/redis.c       |  1 +
 src/redis.h       |  1 +
 3 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index 8874cde7..b426efc5 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -29,8 +29,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <stdint.h>
 #include "redis.h"
-#include "crc64.h"
 
 /* The Redis HyperLogLog implementation is based on the following ideas:
  *
@@ -182,6 +182,42 @@
 
 /* ========================= HyperLogLog algorithm  ========================= */
 
+/* Our hahs function is MurmurHash2, 64 bit version. */
+uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
+    const uint64_t m = 0xc6a4a7935bd1e995;
+    const int r = 47;
+    uint64_t h = seed ^ (len * m);
+    const uint64_t *data = (const uint64_t *)key;
+    const uint64_t *end = data + (len/8);
+
+    while(data != end) {
+        uint64_t k = *data++;
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+        h ^= k;
+        h *= m;
+    }
+
+    const unsigned char *data2 = (const unsigned char*)data;
+
+    switch(len & 7) {
+    case 7: h ^= (uint64_t)data2[6] << 48;
+    case 6: h ^= (uint64_t)data2[5] << 40;
+    case 5: h ^= (uint64_t)data2[4] << 32;
+    case 4: h ^= (uint64_t)data2[3] << 24;
+    case 3: h ^= (uint64_t)data2[2] << 16;
+    case 2: h ^= (uint64_t)data2[1] << 8;
+    case 1: h ^= (uint64_t)data2[0];
+            h *= m;
+    };
+
+    h ^= h >> r;
+    h *= m;
+    h ^= h >> r;
+    return h;
+}
+
 /* "Add" the element in the hyperloglog data structure.
  * Actually nothing is added, but the max 0 pattern counter of the subset
  * the element belongs to is incremented if needed.
@@ -193,7 +229,7 @@
  * The function always succeed, however if as a result of the operation
  * the approximated cardinality changed, 1 is returned. Otherwise 0
  * is returned. */
-int hllAdd(uint8_t *registers, uint8_t *ele, size_t elesize) {
+int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
     uint64_t hash, bit, index;
     uint8_t oldcount, count;
 
@@ -203,7 +239,7 @@ int hllAdd(uint8_t *registers, uint8_t *ele, size_t elesize) {
      *
      * This may sound like inefficient, but actually in the average case
      * there are high probabilities to find a 1 after a few iterations. */
-    hash = crc64(0,ele,elesize);
+    hash = MurmurHash64A(ele,elesize,0);
     bit = REDIS_HLL_REGISTERS;
     count = 0;
     while((hash & bit) == 0) {
@@ -215,6 +251,7 @@ int hllAdd(uint8_t *registers, uint8_t *ele, size_t elesize) {
         bit <<= 1;
     }
 
+    /* Update the register if this element produced a longer run of zeroes. */
     index = hash & REDIS_HLL_P_MASK; /* Index a register inside registers. */
     HLL_GET_REGISTER(oldcount,registers,index);
     if (count > oldcount) {
@@ -227,6 +264,57 @@ int hllAdd(uint8_t *registers, uint8_t *ele, size_t elesize) {
 
 /* ========================== HyperLogLog commands ========================== */
 
+/* HLLADD var ele ele ele ... ele => :0 or :1 */
+void hllAddCommand(redisClient *c) {
+    robj *o = lookupKeyWrite(c->db,c->argv[1]);
+    uint8_t *registers;
+    int updated = 0, j;
+
+    if (o == NULL) {
+        /* Create the key with a string value of the exact length to
+         * hold our HLL data structure. sdsnewlen() when NULL is passed
+         * is guaranteed to return bytes initialized to zero. */
+        o = createObject(REDIS_STRING,sdsnewlen(NULL,REDIS_HLL_SIZE));
+        dbAdd(c->db,c->argv[1],o);
+    } else {
+        /* Key exists, check type */
+        if (checkType(c,o,REDIS_STRING))
+            return;
+
+        /* If this is a string representing an HLL, the size should match
+         * exactly. */
+        if (stringObjectLen(o) != REDIS_HLL_SIZE) {
+            addReplyErrorFormat(c,
+                "HLLADD target key must contain a %d bytes string.",
+                REDIS_HLL_SIZE);
+            return;
+        }
+
+        /* If the object is shared or encoded, we have to make a copy. */
+        if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
+            robj *decoded = getDecodedObject(o);
+            o = createRawStringObject(decoded->ptr, sdslen(decoded->ptr));
+            decrRefCount(decoded);
+            dbOverwrite(c->db,c->argv[1],o);
+        }
+    }
+    /* Perform the low level ADD operation for every element. */
+    registers = (uint8_t*) o->ptr;
+    for (j = 2; j < c->argc; j++) {
+        if (hllAdd(registers, (unsigned char*)c->argv[j]->ptr,
+                sdslen(c->argv[j]->ptr)))
+        {
+            updated++;
+        }
+    }
+    if (updated) {
+        signalModifiedKey(c->db,c->argv[1]);
+        notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"hlladd",c->argv[1],c->db->id);
+        server.dirty++;
+    }
+    addReply(c, updated ? shared.cone : shared.czero);
+}
+
 /* This command performs a self-test of the HLL registers implementation.
  * Something that is not easy to test from within the outside.
  *
diff --git a/src/redis.c b/src/redis.c
index 07ff7c20..b2ddd8b2 100644
--- a/src/redis.c
+++ b/src/redis.c
@@ -269,6 +269,7 @@ struct redisCommand redisCommandTable[] = {
     {"bitpos",bitposCommand,-3,"r",0,NULL,1,1,1,0,0},
     {"wait",waitCommand,3,"rs",0,NULL,0,0,0,0,0},
     {"hllselftest",hllSelftestCommand,1,"r",0,NULL,0,0,0,0,0},
+    {"hlladd",hllAddCommand,-2,"wm",0,NULL,1,1,1,0,0}
 };
 
 struct evictionPoolEntry *evictionPoolAlloc(void);
diff --git a/src/redis.h b/src/redis.h
index 106d22ba..19779356 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -1448,6 +1448,7 @@ void bitposCommand(redisClient *c);
 void replconfCommand(redisClient *c);
 void waitCommand(redisClient *c);
 void hllSelftestCommand(redisClient *c);
+void hllAddCommand(redisClient *c);
 
 #if defined(__GNUC__)
 void *calloc(size_t count, size_t size) __attribute__ ((deprecated));