Detect corrupted sparse HLLs in hllSparseSum().

This commit is contained in:
antirez 2014-04-14 15:20:26 +02:00
parent db40da0a47
commit 681bf7468b

View File

@ -869,7 +869,7 @@ promote: /* Promote to dense representation. */
* PE is an array with a pre-computer table of values 2^-reg indexed by reg. * PE is an array with a pre-computer table of values 2^-reg indexed by reg.
* As a side effect the integer pointed by 'ezp' is set to the number * As a side effect the integer pointed by 'ezp' is set to the number
* of zero registers. */ * of zero registers. */
double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) { double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *invalid) {
double E = 0; double E = 0;
int ez = 0, idx = 0, runlen, regval; int ez = 0, idx = 0, runlen, regval;
uint8_t *end = sparse+sparselen, *p = sparse; uint8_t *end = sparse+sparselen, *p = sparse;
@ -895,7 +895,7 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) {
p++; p++;
} }
} }
redisAssert(idx == HLL_REGISTERS); if (idx != HLL_REGISTERS && invalid) *invalid = 1;
*ezp = ez; *ezp = ez;
return E; return E;
} }
@ -908,13 +908,14 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) {
/* Return the approximated cardinality of the set based on the armonic /* Return the approximated cardinality of the set based on the armonic
* mean of the registers values. 'hdr' points to the start of the SDS * mean of the registers values. 'hdr' points to the start of the SDS
* representing the String object holding the HLL representation. */ * representing the String object holding the HLL representation.
uint64_t hllCount(struct hllhdr *hdr) { *
* If the sparse representation of the HLL object is not valid, the integer
* pointed by 'invalid' is set to non-zero, otherwise it is left untouched. */
uint64_t hllCount(struct hllhdr *hdr, int *invalid) {
double m = HLL_REGISTERS; double m = HLL_REGISTERS;
double alpha = 0.7213/(1+1.079/m); double E, alpha = 0.7213/(1+1.079/m);
double E; int j, ez; /* Number of registers equal to 0. */
int ez; /* Number of registers equal to 0. */
int j;
/* We precompute 2^(-reg[j]) in a small table in order to /* We precompute 2^(-reg[j]) in a small table in order to
* speedup the computation of SUM(2^-register[0..i]). */ * speedup the computation of SUM(2^-register[0..i]). */
@ -933,7 +934,8 @@ uint64_t hllCount(struct hllhdr *hdr) {
if (hdr->encoding == HLL_DENSE) { if (hdr->encoding == HLL_DENSE) {
E = hllDenseSum(hdr->registers,PE,&ez); E = hllDenseSum(hdr->registers,PE,&ez);
} else { } else {
E = hllSparseSum(hdr->registers,sdslen((sds)hdr)-HLL_HDR_SIZE,PE,&ez); E = hllSparseSum(hdr->registers,
sdslen((sds)hdr)-HLL_HDR_SIZE,PE,&ez,invalid);
} }
/* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */
@ -1111,8 +1113,13 @@ void pfcountCommand(redisClient *c) {
card |= (uint64_t)hdr->card[6] << 48; card |= (uint64_t)hdr->card[6] << 48;
card |= (uint64_t)hdr->card[7] << 56; card |= (uint64_t)hdr->card[7] << 56;
} else { } else {
int invalid = 0;
/* Recompute it and update the cached value. */ /* Recompute it and update the cached value. */
card = hllCount(hdr); card = hllCount(hdr,&invalid);
if (invalid) {
addReplyError(c,"Invalid HLL object");
return;
}
hdr->card[0] = card & 0xff; hdr->card[0] = card & 0xff;
hdr->card[1] = (card >> 8) & 0xff; hdr->card[1] = (card >> 8) & 0xff;
hdr->card[2] = (card >> 16) & 0xff; hdr->card[2] = (card >> 16) & 0xff;
@ -1245,7 +1252,7 @@ void pfselftestCommand(redisClient *c) {
ele = j ^ seed; ele = j ^ seed;
hllDenseAdd(hdr->registers,(unsigned char*)&ele,sizeof(ele)); hllDenseAdd(hdr->registers,(unsigned char*)&ele,sizeof(ele));
if (j == checkpoint) { if (j == checkpoint) {
int64_t abserr = checkpoint- (int64_t)hllCount(hdr); int64_t abserr = checkpoint - (int64_t)hllCount(hdr,NULL);
if (abserr < 0) abserr = -abserr; if (abserr < 0) abserr = -abserr;
if (abserr > (uint64_t)(relerr*4*checkpoint)) { if (abserr > (uint64_t)(relerr*4*checkpoint)) {
addReplyErrorFormat(c, addReplyErrorFormat(c,