From 3ed947fb30727aab198e164853cf3dcfc960429e Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 29 Mar 2014 12:12:44 +0100 Subject: [PATCH] HLLCOUNT 3x faster taking fast path for default params. --- src/hyperloglog.c | 56 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 50983c33..951f8bc5 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -290,16 +290,52 @@ uint64_t hllCount(uint8_t *registers) { initialized = 1; } - /* Compute SUM(2^-register[0..i]). */ - for (j = 0; j < REDIS_HLL_REGISTERS; j++) { - unsigned long reg; + /* Compute SUM(2^-register[0..i]). + * Redis default is to use 16384 registers 6 bits each. The code works + * with other values by modifying the defines, but for our target value + * we take a faster path with unrolled loops. */ + if (REDIS_HLL_REGISTERS == 16384 && REDIS_HLL_BITS == 6 && 1) { + uint8_t *r = registers; + unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, + r10, r11, r12, r13, r14, r15; + for (j = 0; j < 1024; j++) { + /* Handle 16 registers per iteration. */ + r0 = r[0] & 63; if (r0 == 0) ez++; + r1 = (r[0] << 6 | r[1] >> 2) & 63; if (r1 == 0) ez++; + r2 = (r[1] << 4 | r[2] >> 4) & 63; if (r2 == 0) ez++; + r3 = ((r[2] << 2) | (r[3] >> 6)) & 63; if (r3 == 0) ez++; + r4 = (r[3] | r[4] >> 8) & 63; if (r4 == 0) ez++; + r5 = (r[3] << 6 | r[4] >> 2) & 63; if (r5 == 0) ez++; + r6 = (r[4] << 4 | r[5] >> 4) & 63; if (r6 == 0) ez++; + r7 = (r[5] << 2 | r[6] >> 6) & 63; if (r7 == 0) ez++; + r8 = (r[6] | r[7] >> 8) & 63; if (r8 == 0) ez++; + r9 = (r[6] << 6 | r[7] >> 2) & 63; if (r9 == 0) ez++; + r10 = (r[7] << 4 | r[8] >> 4) & 63; if (r10 == 0) ez++; + r11 = (r[8] << 2 | r[9] >> 6) & 63; if (r11 == 0) ez++; + r12 = (r[9] | r[10] >> 8) & 63; if (r12 == 0) ez++; + r13 = (r[9] << 6 | r[10] >> 2) & 63; if (r13 == 0) ez++; + r14 = (r[10] << 4 | r[11] >> 4) & 63; if (r14 == 0) ez++; + r15 = (r[11] << 2 | r[12] >> 6) & 63; if (r15 == 0) ez++; - HLL_GET_REGISTER(reg,registers,j); - if (reg == 0) { - ez++; - E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ - } else { - E += PE[reg]; /* Precomputed 2^(-reg[j]). */ + /* Additional parens will allow the compiler to optimize the + * code more with a loss of precision that is not very relevant + * here (floating point math is not commutative!). */ + E += (PE[r0] + PE[r1]) + (PE[r2] + PE[r3]) + (PE[r4] + PE[r5]) + + (PE[r6] + PE[r7]) + (PE[r8] + PE[r9]) + (PE[r10] + PE[r11]) + + (PE[r12] + PE[r13]) + (PE[r14] + PE[r15]); + r += 12; + } + } else { + for (j = 0; j < REDIS_HLL_REGISTERS; j++) { + unsigned long reg; + + HLL_GET_REGISTER(reg,registers,j); + if (reg == 0) { + ez++; + E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ + } else { + E += PE[reg]; /* Precomputed 2^(-reg[j]). */ + } } } /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ @@ -376,7 +412,7 @@ void hllCountCommand(redisClient *c) { if (o == NULL) { /* No key? Cardinality is zero since no element was added, otherwise - * we would have a key as HHLADD creates it as a side effect. */ + * we would have a key as HLLADD creates it as a side effect. */ addReply(c,shared.czero); } else { /* Key exists, check type */