LogLog-Beta Algorithm support within HLL

Config option to use LogLog-Beta Algorithm for Cardinality
This commit is contained in:
Harish Murthy 2016-12-09 14:28:19 +05:30 committed by antirez
parent 5ad2a94a16
commit c55e3fbae5
4 changed files with 55 additions and 25 deletions

View File

@ -688,6 +688,10 @@ void loadServerConfigFromString(char *config) {
err = sentinelHandleConfiguration(argv+1,argc-1); err = sentinelHandleConfiguration(argv+1,argc-1);
if (err) goto loaderr; if (err) goto loaderr;
} }
} else if (!strcasecmp(argv[0],"hll-use-loglogbeta") && argc == 2) {
if ((server.hll_use_loglogbeta = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else { } else {
err = "Bad directive or wrong number of arguments"; goto loaderr; err = "Bad directive or wrong number of arguments"; goto loaderr;
} }
@ -981,6 +985,8 @@ void configSetCommand(client *c) {
"slave-lazy-flush",server.repl_slave_lazy_flush) { "slave-lazy-flush",server.repl_slave_lazy_flush) {
} config_set_bool_field( } config_set_bool_field(
"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) { "no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) {
} config_set_bool_field(
"hll-use-loglogbeta",server.hll_use_loglogbeta) {
/* Numerical fields. /* Numerical fields.
* config_set_numerical_field(name,var,min,max) */ * config_set_numerical_field(name,var,min,max) */
@ -1245,6 +1251,8 @@ void configGetCommand(client *c) {
server.lazyfree_lazy_server_del); server.lazyfree_lazy_server_del);
config_get_bool_field("slave-lazy-flush", config_get_bool_field("slave-lazy-flush",
server.repl_slave_lazy_flush); server.repl_slave_lazy_flush);
config_get_bool_field("hll-use-loglogbeta",
server.hll_use_loglogbeta);
/* Enum values */ /* Enum values */
config_get_enum_field("maxmemory-policy", config_get_enum_field("maxmemory-policy",
@ -1963,6 +1971,7 @@ int rewriteConfig(char *path) {
rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE); rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE);
rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL); rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL);
rewriteConfigYesNoOption(state,"slave-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH); rewriteConfigYesNoOption(state,"slave-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH);
rewriteConfigYesNoOption(state,"hll-use-loglogbeta",server.hll_use_loglogbeta,CONFIG_DEFAULT_HLL_USE_LOGLOGBETA);
/* Rewrite Sentinel config if in Sentinel mode. */ /* Rewrite Sentinel config if in Sentinel mode. */
if (server.sentinel_mode) rewriteConfigSentinelOption(state); if (server.sentinel_mode) rewriteConfigSentinelOption(state);

View File

@ -994,32 +994,50 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) {
serverPanic("Unknown HyperLogLog encoding in hllCount()"); serverPanic("Unknown HyperLogLog encoding in hllCount()");
} }
/* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ if(server.hll_use_loglogbeta) {
E = (1/E)*alpha*m*m; /* For loglog-beta there is a single formula to compute
* cardinality for the enture range
*/
/* Use the LINEARCOUNTING algorithm for small cardinalities. double zl = log(ez + 1);
* For larger values but up to 72000 HyperLogLog raw approximation is double beta = -0.370393911*ez +
* used since linear counting error starts to increase. However HyperLogLog 0.070471823*zl +
* shows a strong bias in the range 2.5*16384 - 72000, so we try to 0.17393686*pow(zl,2) +
* compensate for it. */ 0.16339839*pow(zl,3) +
if (E < m*2.5 && ez != 0) { -0.09237745*pow(zl,4) +
E = m*log(m/ez); /* LINEARCOUNTING() */ 0.03738027*pow(zl,5) +
} else if (m == 16384 && E < 72000) { -0.005384159*pow(zl,6) +
/* We did polynomial regression of the bias for this range, this 0.00042419*pow(zl,7);
* way we can compute the bias for a given cardinality and correct
* according to it. Only apply the correction for P=14 that's what E = alpha*m*(m-ez)*(1/(E+beta));
* we use and the value the correction was verified with. */ } else {
double bias = 5.9119*1.0e-18*(E*E*E*E) /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */
-1.4253*1.0e-12*(E*E*E)+ E = (1/E)*alpha*m*m;
1.2940*1.0e-7*(E*E)
-5.2921*1.0e-3*E+ /* Use the LINEARCOUNTING algorithm for small cardinalities.
83.3216; * For larger values but up to 72000 HyperLogLog raw approximation is
E -= E*(bias/100); * used since linear counting error starts to increase. However HyperLogLog
* shows a strong bias in the range 2.5*16384 - 72000, so we try to
* compensate for it. */
if (E < m*2.5 && ez != 0) {
E = m*log(m/ez); /* LINEARCOUNTING() */
} else if (m == 16384 && E < 72000) {
/* We did polynomial regression of the bias for this range, this
* way we can compute the bias for a given cardinality and correct
* according to it. Only apply the correction for P=14 that's what
* we use and the value the correction was verified with. */
double bias = 5.9119*1.0e-18*(E*E*E*E)
-1.4253*1.0e-12*(E*E*E)+
1.2940*1.0e-7*(E*E)
-5.2921*1.0e-3*E+
83.3216;
E -= E*(bias/100);
}
/* We don't apply the correction for E > 1/30 of 2^32 since we use
* a 64 bit function and 6 bit counters. To apply the correction for
* 1/30 of 2^64 is not needed since it would require a huge set
* to approach such a value. */
} }
/* We don't apply the correction for E > 1/30 of 2^32 since we use
* a 64 bit function and 6 bit counters. To apply the correction for
* 1/30 of 2^64 is not needed since it would require a huge set
* to approach such a value. */
return (uint64_t) E; return (uint64_t) E;
} }

View File

@ -1400,6 +1400,7 @@ void initServerConfig(void) {
server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION; server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION;
server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE; server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE;
server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL; server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL;
server.hll_use_loglogbeta = CONFIG_DEFAULT_HLL_USE_LOGLOGBETA;
server.lruclock = getLRUClock(); server.lruclock = getLRUClock();
resetServerSaveParams(); resetServerSaveParams();

View File

@ -151,6 +151,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0
#define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0
#define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0 #define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0
#define CONFIG_DEFAULT_HLL_USE_LOGLOGBETA 0
#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */ #define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */ #define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */
@ -1149,6 +1150,7 @@ struct redisServer {
int watchdog_period; /* Software watchdog period in ms. 0 = off */ int watchdog_period; /* Software watchdog period in ms. 0 = off */
/* System hardware info */ /* System hardware info */
size_t system_memory_size; /* Total memory in system as reported by OS */ size_t system_memory_size; /* Total memory in system as reported by OS */
int hll_use_loglogbeta; /* Use loglog-beta algorithm for HLL */
}; };
typedef struct pubsubPattern { typedef struct pubsubPattern {