initial work for integer encoding in ziplists

This commit is contained in:
Pieter Noordhuis 2010-05-22 15:07:12 +02:00
parent f6eb17477a
commit 29b14d5fac
2 changed files with 208 additions and 44 deletions

154
zip.c
View File

@ -1,41 +1,153 @@
#define ZIP_BIGLEN 254
#define ZIP_END 255
/* The following macro returns the number of bytes needed to encode the length
* for the integer value _l, that is, 1 byte for lengths < ZIP_BIGLEN and
* 5 bytes for all the other lengths. */
#define ZIP_LEN_BYTES(_l) (((_l) < ZIP_BIGLEN) ? 1 : sizeof(unsigned int)+1)
/* Entry encoding */
#define ZIP_ENC_RAW 0
#define ZIP_ENC_SHORT 1
#define ZIP_ENC_INT 2
#define ZIP_ENC_LLONG 3
#define ZIP_ENCODING(p) ((p)[0] >> 6)
/* Decode the encoded length pointed by 'p' */
static unsigned int zipDecodeLength(unsigned char *p) {
unsigned int len = *p;
/* Length encoding for raw entries */
#define ZIP_LEN_INLINE 0
#define ZIP_LEN_UINT16 1
#define ZIP_LEN_UINT32 2
if (len < ZIP_BIGLEN) return len;
memcpy(&len,p+1,sizeof(unsigned int));
static unsigned int zipEncodingSize(char encoding) {
if (encoding == ZIP_ENC_SHORT) {
return sizeof(short int);
} else if (encoding == ZIP_ENC_INT) {
return sizeof(int);
} else if (encoding == ZIP_ENC_LLONG) {
return sizeof(long long);
}
assert(NULL);
}
/* Decode the encoded length pointed by 'p'. If a pointer to 'lensize' is
* provided, it is set to the number of bytes required to encode the length. */
static unsigned int zipDecodeLength(unsigned char *p, unsigned int *lensize) {
unsigned char encoding = ZIP_ENCODING(p), lenenc;
unsigned int len;
if (encoding == ZIP_ENC_RAW) {
lenenc = (p[0] >> 4) & 0x3;
if (lenenc == ZIP_LEN_INLINE) {
len = p[0] & 0xf;
if (lensize) *lensize = 1;
} else if (lenenc == ZIP_LEN_UINT16) {
len = p[1] | (p[2] << 8);
if (lensize) *lensize = 3;
} else {
len = p[1] | (p[2] << 8) | (p[3] << 16) | (p[4] << 24);
if (lensize) *lensize = 5;
}
} else {
len = zipEncodingSize(encoding);
if (lensize) *lensize = 1;
}
return len;
}
/* Encode the length 'l' writing it in 'p'. If p is NULL it just returns
* the amount of bytes required to encode such a length. */
static unsigned int zipEncodeLength(unsigned char *p, unsigned int len) {
if (p == NULL) {
return ZIP_LEN_BYTES(len);
} else {
if (len < ZIP_BIGLEN) {
p[0] = len;
return 1;
static unsigned int zipEncodeLength(unsigned char *p, char encoding, unsigned int rawlen) {
unsigned char len = 1, lenenc, buf[5];
if (encoding == ZIP_ENC_RAW) {
if (rawlen <= 0xf) {
if (!p) return len;
lenenc = ZIP_LEN_INLINE;
buf[0] = rawlen;
} else if (rawlen <= 0xffff) {
len += 2;
if (!p) return len;
lenenc = ZIP_LEN_UINT16;
buf[1] = (rawlen ) & 0xff;
buf[2] = (rawlen >> 8) & 0xff;
} else {
p[0] = ZIP_BIGLEN;
memcpy(p+1,&len,sizeof(len));
return 1+sizeof(len);
len += 4;
if (!p) return len;
lenenc = ZIP_LEN_UINT32;
buf[1] = (rawlen ) & 0xff;
buf[2] = (rawlen >> 8) & 0xff;
buf[3] = (rawlen >> 16) & 0xff;
buf[4] = (rawlen >> 24) & 0xff;
}
buf[0] = (lenenc << 4) | (buf[0] & 0xf);
}
if (!p) return len;
/* Apparently we need to store the length in 'p' */
buf[0] = (encoding << 6) | (buf[0] & 0x3f);
memcpy(p,buf,len);
return len;
}
/* Check if string pointed to by 'entry' can be encoded as an integer.
* Stores the integer value in 'v' and its encoding in 'encoding'.
* Warning: this function requires a NULL-terminated string! */
static int zipTryEncoding(unsigned char *entry, long long *v, char *encoding) {
long long value;
char *eptr;
if (entry[0] == '-' || (entry[0] >= '0' && entry[0] <= '9')) {
value = strtoll(entry,&eptr,10);
if (eptr[0] != '\0') return 0;
if (value >= SHRT_MIN && value <= SHRT_MAX) {
*encoding = ZIP_ENC_SHORT;
} else if (value >= INT_MIN && value <= INT_MAX) {
*encoding = ZIP_ENC_INT;
} else {
*encoding = ZIP_ENC_LLONG;
}
*v = value;
return 1;
}
return 0;
}
static void zipSaveInteger(unsigned char *p, long long value, char encoding) {
short int s;
int i;
long long l;
if (encoding == ZIP_ENC_SHORT) {
s = value;
memcpy(p,&s,sizeof(s));
} else if (encoding == ZIP_ENC_INT) {
i = value;
memcpy(p,&i,sizeof(i));
} else if (encoding == ZIP_ENC_LLONG) {
l = value;
memcpy(p,&l,sizeof(l));
} else {
assert(NULL);
}
}
static long long zipLoadInteger(unsigned char *p, char encoding) {
short int s;
int i;
long long l, ret;
if (encoding == ZIP_ENC_SHORT) {
memcpy(&s,p,sizeof(s));
ret = s;
} else if (encoding == ZIP_ENC_INT) {
memcpy(&i,p,sizeof(i));
ret = i;
} else if (encoding == ZIP_ENC_LLONG) {
memcpy(&l,p,sizeof(l));
ret = l;
} else {
assert(NULL);
}
return ret;
}
/* Return the total amount used by an entry (encoded length + payload). */
static unsigned int zipRawEntryLength(unsigned char *p) {
unsigned int l = zipDecodeLength(p);
return zipEncodeLength(NULL,l) + l;
unsigned int lensize, len;
len = zipDecodeLength(p, &lensize);
return lensize + len;
}
/* Resize the zip* structure. */

View File

@ -15,8 +15,10 @@
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include "zmalloc.h"
#include "sds.h"
#include "ziplist.h"
@ -60,13 +62,21 @@ static unsigned char *ziplistTail(unsigned char *zl) {
}
unsigned char *ziplistPush(unsigned char *zl, unsigned char *entry, unsigned int elen, int where) {
unsigned int curlen = ZIPLIST_BYTES(zl);
unsigned int reqlen = zipEncodeLength(NULL,elen)+elen;
unsigned int curlen = ZIPLIST_BYTES(zl), reqlen;
unsigned char *p;
char encoding = ZIP_ENC_RAW;
long long value;
/* Resize the ziplist */
/* See if the entry can be encoded */
if (zipTryEncoding(entry,&value,&encoding)) {
reqlen = zipEncodingSize(encoding);
} else {
reqlen = elen;
}
reqlen += zipEncodeLength(NULL,encoding,elen);
/* Resize the ziplist and move if needed */
zl = ziplistResize(zl,curlen+reqlen);
if (where == ZIPLIST_HEAD) {
p = zl+ZIPLIST_HEADER_SIZE;
if (*p != ZIP_END) {
@ -78,31 +88,44 @@ unsigned char *ziplistPush(unsigned char *zl, unsigned char *entry, unsigned int
}
/* Write the entry */
p += zipEncodeLength(p,elen);
memcpy(p,entry,elen);
p += zipEncodeLength(p,encoding,elen);
if (encoding != ZIP_ENC_RAW) {
zipSaveInteger(p,value,encoding);
} else {
memcpy(p,entry,elen);
}
ZIPLIST_INCR_LENGTH(zl,1);
return zl;
}
unsigned char *ziplistPop(unsigned char *zl, sds *value, int where) {
unsigned int curlen = ZIPLIST_BYTES(zl), len, rlen;
unsigned char *ziplistPop(unsigned char *zl, sds *target, int where) {
unsigned int curlen = ZIPLIST_BYTES(zl), rawlen;
unsigned int len, lensize;
unsigned char *p;
if (value) *value = NULL;
long long value;
if (target) *target = NULL;
/* Get pointer to element to remove */
p = (where == ZIPLIST_HEAD) ? ziplistHead(zl) : ziplistTail(zl);
if (*p == ZIP_END) return zl;
len = zipDecodeLength(p);
if (value) *value = sdsnewlen(p+zipEncodeLength(NULL,len),len);
len = zipDecodeLength(p,&lensize);
if (target) {
if (ZIP_ENCODING(p) == ZIP_ENC_RAW) {
*target = sdsnewlen(p+lensize,len);
} else {
value = zipLoadInteger(p+lensize,ZIP_ENCODING(p));
*target = sdscatprintf(sdsempty(), "%lld", value);
}
}
/* Move list to front when popping from the head */
rlen = zipRawEntryLength(p);
rawlen = lensize+len;
if (where == ZIPLIST_HEAD) {
memmove(p,p+rlen,curlen-ZIPLIST_HEADER_SIZE-len);
memmove(p,p+rawlen,curlen-ZIPLIST_HEADER_SIZE-len);
}
/* Resize and update length */
zl = ziplistResize(zl,curlen-rlen);
zl = ziplistResize(zl,curlen-rawlen);
ZIPLIST_INCR_LENGTH(zl,-1);
return zl;
}
@ -121,10 +144,11 @@ unsigned char *ziplistIndex(unsigned char *zl, unsigned int index) {
/* Store entry at current position in sds *value and return pointer
* to the next entry. */
unsigned char *ziplistNext(unsigned char *p, unsigned char **q, unsigned char **entry, unsigned int *elen) {
unsigned int lensize;
if (*p == ZIP_END) return NULL;
if (entry) {
*elen = zipDecodeLength(p);
*entry = p+ZIP_LEN_BYTES(*elen);
*elen = zipDecodeLength(p,&lensize);
*entry = p+lensize;
}
if (q != NULL) *q = p;
p += zipRawEntryLength(p);
@ -174,16 +198,22 @@ unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
}
void ziplistRepr(unsigned char *zl) {
unsigned char *p;
unsigned int l;
unsigned char *p, encoding;
unsigned int l, lsize;
long long value;
printf("{bytes %d} {length %u}\n",ZIPLIST_BYTES(zl), ZIPLIST_LENGTH(zl));
printf("{total bytes %d} {length %u}\n",ZIPLIST_BYTES(zl), ZIPLIST_LENGTH(zl));
p = ziplistHead(zl);
while(*p != ZIP_END) {
l = zipDecodeLength(p);
printf("{key %u}",l);
p += zipEncodeLength(NULL,l);
fwrite(p,l,1,stdout);
l = zipDecodeLength(p,&lsize);
printf("{header %u, payload %u} ",lsize,l);
encoding = ZIP_ENCODING(p);
p += lsize;
if (encoding == ZIP_ENC_RAW) {
fwrite(p,l,1,stdout);
} else {
printf("%lld", zipLoadInteger(p,encoding));
}
printf("\n");
p += l;
}
@ -200,11 +230,33 @@ unsigned char *createList() {
return zl;
}
unsigned char *createIntList() {
unsigned char *zl = ziplistNew();
char buf[32];
sprintf(buf, "100");
zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_TAIL);
sprintf(buf, "128000");
zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_TAIL);
sprintf(buf, "-100");
zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_HEAD);
sprintf(buf, "4294967296");
zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_HEAD);
sprintf(buf, "non integer");
zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_TAIL);
sprintf(buf, "much much longer non integer");
zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_TAIL);
return zl;
}
int main(int argc, char **argv) {
unsigned char *zl, *p, *q, *entry;
unsigned int elen;
sds s;
zl = createIntList();
ziplistRepr(zl);
zl = createList();
ziplistRepr(zl);