From 1e272a6b52d663e0b4db8f42162c4461405b7f84 Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Sun, 19 Feb 2017 14:01:58 +0000 Subject: [PATCH 1/4] ARM: Fix 64 bit unaligned access in MurmurHash64A(). --- src/config.h | 6 ++++++ src/hyperloglog.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/src/config.h b/src/config.h index 9fd53626..354f8f5e 100644 --- a/src/config.h +++ b/src/config.h @@ -206,4 +206,10 @@ void setproctitle(const char *fmt, ...); #endif #endif +/* Make sure we can test for ARM just checking for __arm__, since sometimes + * __arm is defined but __arm__ is not. */ +#if defined(__arm) && !defined(__arm__) +#define __arm__ +#endif + #endif diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 0800bf59..7de5786f 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -401,7 +401,11 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { uint64_t k; #if (BYTE_ORDER == LITTLE_ENDIAN) + #ifdef __arm__ + memcpy(&k,data,sizeof(uint64_t)); + #else k = *((uint64_t*)data); + #endif #else k = (uint64_t) data[0]; k |= (uint64_t) data[1] << 8; From 72d6d64771ce52519c240fb122dbb35f989b4669 Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Sun, 19 Feb 2017 14:59:39 +0000 Subject: [PATCH 2/4] ARM: Avoid memcpy() in MurmurHash64A() if we are using 64 bit ARM. However note that in architectures supporting 64 bit unaligned accesses memcpy(...,...,8) is likely translated to a simple word memory movement anyway. --- src/Makefile | 1 + src/config.h | 3 +++ src/hyperloglog.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 3f445f40..f211eb84 100644 --- a/src/Makefile +++ b/src/Makefile @@ -14,6 +14,7 @@ release_hdr := $(shell sh -c './mkreleasehdr.sh') uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') +uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') OPTIMIZATION?=-O2 DEPENDENCY_TARGETS=hiredis linenoise lua NODEPS:=clean distclean diff --git a/src/config.h b/src/config.h index 354f8f5e..1005dcc2 100644 --- a/src/config.h +++ b/src/config.h @@ -211,5 +211,8 @@ void setproctitle(const char *fmt, ...); #if defined(__arm) && !defined(__arm__) #define __arm__ #endif +#if defined (__aarch64__) && !defined(__arm64__) +#define __arm64__ +#endif #endif diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 7de5786f..b8a63a73 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -401,7 +401,7 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { uint64_t k; #if (BYTE_ORDER == LITTLE_ENDIAN) - #ifdef __arm__ + #if defined(__arm__) && !defined(__arm64__) memcpy(&k,data,sizeof(uint64_t)); #else k = *((uint64_t*)data); From 4e9cf4cc7ed4b732fc4bb592f19ceb41d132954e Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Sun, 19 Feb 2017 15:02:37 +0000 Subject: [PATCH 3/4] ARM: Use libc malloc by default. I'm not sure how much test Jemalloc gets on ARM, moreover compiling Redis with Jemalloc support in not very powerful devices, like most ARMs people will build Redis on, is extremely slow. It is possible to enable Jemalloc build anyway if needed by using "make MALLOC=jemalloc". --- src/Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index f211eb84..83f5c6d7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -28,11 +28,14 @@ PREFIX?=/usr/local INSTALL_BIN=$(PREFIX)/bin INSTALL=install -# Default allocator +# Default allocator defaults to Jemalloc if it's not an ARM +MALLOC=libc +ifneq ($(uname_M),armv6l) +ifneq ($(uname_M),armv7l) ifeq ($(uname_S),Linux) MALLOC=jemalloc -else - MALLOC=libc +endif +endif endif # Backwards compatibility for selecting an allocator From 7329cc39818a05c168e7d1e791afb03c089f1933 Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Sun, 19 Feb 2017 15:07:08 +0000 Subject: [PATCH 4/4] ARM: Avoid fast path for BITOP. GCC will produce certain unaligned multi load-store instructions that will be trapped by the Linux kernel since ARM v6 cannot handle them with unaligned addresses. Better to use the slower but safer implementation instead of generating the exception which should be anyway very slow. --- src/bitops.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bitops.c b/src/bitops.c index 46eee22c..7ab72633 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -654,8 +654,11 @@ void bitopCommand(client *c) { /* Fast path: as far as we have data for all the input bitmaps we * can take a fast path that performs much better than the - * vanilla algorithm. */ + * vanilla algorithm. On ARM we skip the fast path since it will + * result in GCC compiling the code using multiple-words load/store + * operations that are not supported even in ARM >= v6. */ j = 0; + #ifndef __arm__ if (minlen >= sizeof(unsigned long)*4 && numkeys <= 16) { unsigned long *lp[16]; unsigned long *lres = (unsigned long*) res; @@ -716,6 +719,7 @@ void bitopCommand(client *c) { } } } + #endif /* j is set to the next byte to process by the previous loop. */ for (; j < maxlen; j++) {