From 5870e22423e069452e9f858b80ac40bfc455bfe6 Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Tue, 23 Dec 2014 10:10:42 -0500 Subject: [PATCH] Upgrade LZF to 3.6 (2011) from 3.5 (2009) This is lzf_c and lzf_d from http://dist.schmorp.de/liblzf/liblzf-3.6.tar.gz --- src/lzfP.h | 56 ++++++++++++++++++++++++++++++++++++++-------------- src/lzf_c.c | 42 ++++++++++++++++++--------------------- src/lzf_d.c | 57 ++++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 106 insertions(+), 49 deletions(-) diff --git a/src/lzfP.h b/src/lzfP.h index c9eae3f6..c6d2e096 100644 --- a/src/lzfP.h +++ b/src/lzfP.h @@ -49,7 +49,7 @@ * the difference between 15 and 14 is very small * for small blocks (and 14 is usually a bit faster). * For a low-memory/faster configuration, use HLOG == 13; - * For best compression, use 15 or 16 (or more, up to 23). + * For best compression, use 15 or 16 (or more, up to 22). */ #ifndef HLOG # define HLOG 16 @@ -94,7 +94,7 @@ /* * Avoid assigning values to errno variable? for some embedding purposes * (linux kernel for example), this is necessary. NOTE: this breaks - * the documentation in lzf.h. + * the documentation in lzf.h. Avoiding errno has no speed impact. */ #ifndef AVOID_ERRNO # define AVOID_ERRNO 0 @@ -121,16 +121,52 @@ # define CHECK_INPUT 1 #endif +/* + * Whether to store pointers or offsets inside the hash table. On + * 64 bit architetcures, pointers take up twice as much space, + * and might also be slower. Default is to autodetect. + */ +/*#define LZF_USER_OFFSETS autodetect */ + /*****************************************************************************/ /* nothing should be changed below */ +#ifdef __cplusplus +# include +# include +using namespace std; +#else +# include +# include +#endif + +#ifndef LZF_USE_OFFSETS +# if defined (WIN32) +# define LZF_USE_OFFSETS defined(_M_X64) +# else +# if __cplusplus > 199711L +# include +# else +# include +# endif +# define LZF_USE_OFFSETS (UINTPTR_MAX > 0xffffffffU) +# endif +#endif + typedef unsigned char u8; -typedef const u8 *LZF_STATE[1 << (HLOG)]; +#if LZF_USE_OFFSETS +# define LZF_HSLOT_BIAS ((const u8 *)in_data) + typedef unsigned int LZF_HSLOT; +#else +# define LZF_HSLOT_BIAS 0 + typedef const u8 *LZF_HSLOT; +#endif + +typedef LZF_HSLOT LZF_STATE[1 << (HLOG)]; #if !STRICT_ALIGN /* for unaligned accesses we need a 16 bit datatype. */ -# include # if USHRT_MAX == 65535 typedef unsigned short u16; # elif UINT_MAX == 65535 @@ -142,17 +178,7 @@ typedef const u8 *LZF_STATE[1 << (HLOG)]; #endif #if ULTRA_FAST -# if defined(VERY_FAST) -# undef VERY_FAST -# endif -#endif - -#if INIT_HTAB -# ifdef __cplusplus -# include -# else -# include -# endif +# undef VERY_FAST #endif #endif diff --git a/src/lzf_c.c b/src/lzf_c.c index 9e031ad0..e9c69a0b 100644 --- a/src/lzf_c.c +++ b/src/lzf_c.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Marc Alexander Lehmann + * Copyright (c) 2000-2010 Marc Alexander Lehmann * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: @@ -40,8 +40,8 @@ /* * don't play with this unless you benchmark! - * decompression is not dependent on the hash function - * the hashing function might seem strange, just believe me + * the data format is not dependent on the hash function. + * the hash function might seem strange, just believe me, * it works ;) */ #ifndef FRST @@ -89,9 +89,9 @@ /* * compressed format * - * 000LLLLL ; literal - * LLLooooo oooooooo ; backref L - * 111ooooo LLLLLLLL oooooooo ; backref L+7 + * 000LLLLL ; literal, L+1=1..33 octets + * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset + * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset * */ @@ -106,7 +106,6 @@ lzf_compress (const void *const in_data, unsigned int in_len, #if !LZF_STATE_ARG LZF_STATE htab; #endif - const u8 **hslot; const u8 *ip = (const u8 *)in_data; u8 *op = (u8 *)out_data; const u8 *in_end = ip + in_len; @@ -133,10 +132,6 @@ lzf_compress (const void *const in_data, unsigned int in_len, #if INIT_HTAB memset (htab, 0, sizeof (htab)); -# if 0 - for (hslot = htab; hslot < htab + HSIZE; hslot++) - *hslot++ = ip; -# endif #endif lit = 0; op++; /* start run */ @@ -144,24 +139,23 @@ lzf_compress (const void *const in_data, unsigned int in_len, hval = FRST (ip); while (ip < in_end - 2) { + LZF_HSLOT *hslot; + hval = NEXT (hval, ip); hslot = htab + IDX (hval); - ref = *hslot; *hslot = ip; + ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS; if (1 #if INIT_HTAB && ref < ip /* the next test will actually take care of this, but this is faster */ #endif && (off = ip - ref - 1) < MAX_OFF - && ip + 4 < in_end && ref > (u8 *)in_data -#if STRICT_ALIGN - && ref[0] == ip[0] - && ref[1] == ip[1] && ref[2] == ip[2] +#if STRICT_ALIGN + && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0]) #else && *(u16 *)ref == *(u16 *)ip - && ref[2] == ip[2] #endif ) { @@ -170,12 +164,13 @@ lzf_compress (const void *const in_data, unsigned int in_len, unsigned int maxlen = in_end - ip - len; maxlen = maxlen > MAX_REF ? MAX_REF : maxlen; + if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */ + if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */ + return 0; + op [- lit - 1] = lit - 1; /* stop run */ op -= !lit; /* undo run if length is zero */ - if (expect_false (op + 3 + 1 >= out_end)) - return 0; - for (;;) { if (expect_true (maxlen > 16)) @@ -222,6 +217,7 @@ lzf_compress (const void *const in_data, unsigned int in_len, } *op++ = off; + lit = 0; op++; /* start run */ ip += len + 1; @@ -237,12 +233,12 @@ lzf_compress (const void *const in_data, unsigned int in_len, hval = FRST (ip); hval = NEXT (hval, ip); - htab[IDX (hval)] = ip; + htab[IDX (hval)] = ip - LZF_HSLOT_BIAS; ip++; # if VERY_FAST && !ULTRA_FAST hval = NEXT (hval, ip); - htab[IDX (hval)] = ip; + htab[IDX (hval)] = ip - LZF_HSLOT_BIAS; ip++; # endif #else @@ -251,7 +247,7 @@ lzf_compress (const void *const in_data, unsigned int in_len, do { hval = NEXT (hval, ip); - htab[IDX (hval)] = ip; + htab[IDX (hval)] = ip - LZF_HSLOT_BIAS; ip++; } while (len--); diff --git a/src/lzf_d.c b/src/lzf_d.c index 6c723f5e..c32be8e8 100644 --- a/src/lzf_d.c +++ b/src/lzf_d.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Marc Alexander Lehmann + * Copyright (c) 2000-2010 Marc Alexander Lehmann * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: @@ -43,14 +43,14 @@ # define SET_ERRNO(n) errno = (n) #endif -/* +#if USE_REP_MOVSB /* small win on amd, big loss on intel */ #if (__i386 || __amd64) && __GNUC__ >= 3 # define lzf_movsb(dst, src, len) \ asm ("rep movsb" \ : "=D" (dst), "=S" (src), "=c" (len) \ : "0" (dst), "1" (src), "2" (len)); #endif -*/ +#endif unsigned int lzf_decompress (const void *const in_data, unsigned int in_len, @@ -86,9 +86,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len, #ifdef lzf_movsb lzf_movsb (op, ip, ctrl); #else - do - *op++ = *ip++; - while (--ctrl); + switch (ctrl) + { + case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++; + case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++; + case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++; + case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++; + case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++; + case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case 9: *op++ = *ip++; + case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++; + case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++; + } #endif } else /* back reference */ @@ -134,12 +142,39 @@ lzf_decompress (const void *const in_data, unsigned int in_len, len += 2; lzf_movsb (op, ref, len); #else - *op++ = *ref++; - *op++ = *ref++; + switch (len) + { + default: + len += 2; - do - *op++ = *ref++; - while (--len); + if (op >= ref + len) + { + /* disjunct areas */ + memcpy (op, ref, len); + op += len; + } + else + { + /* overlapping, use octte by octte copying */ + do + *op++ = *ref++; + while (--len); + } + + break; + + case 9: *op++ = *ref++; + case 8: *op++ = *ref++; + case 7: *op++ = *ref++; + case 6: *op++ = *ref++; + case 5: *op++ = *ref++; + case 4: *op++ = *ref++; + case 3: *op++ = *ref++; + case 2: *op++ = *ref++; + case 1: *op++ = *ref++; + case 0: *op++ = *ref++; /* two octets more */ + *op++ = *ref++; + } #endif } }