From f9bca13a1a216c27509a6fb910f71c039dd60c7a Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Thu, 26 Jun 2014 08:51:13 -0400 Subject: [PATCH 1/3] Use predefined macro for used_memory() update --- src/zmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zmalloc.c b/src/zmalloc.c index d0cf726c..bd36654f 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -220,7 +220,7 @@ size_t zmalloc_used_memory(void) { if (zmalloc_thread_safe) { #ifdef HAVE_ATOMIC - um = __sync_add_and_fetch(&used_memory, 0); + um = update_zmalloc_stat_add(0); #else pthread_mutex_lock(&used_memory_mutex); um = used_memory; From a953c883815bada9365504940af52306d9a413ea Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Thu, 26 Jun 2014 08:52:53 -0400 Subject: [PATCH 2/3] Allow atomic memory count update with C11 builtins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From mailing list post https://groups.google.com/forum/#!topic/redis-db/QLjiQe4D7LA In zmalloc.c the following primitives are currently used to synchronize access to single global variable: __sync_add_and_fetch __sync_sub_and_fetch In some architectures such as powerpc these primitives are overhead intensive. More efficient C11 __atomic builtins are available with newer GCC versions, see http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fatomic-Builtins.html#_005f_005fatomic-Builtins By substituting the following __atomic… builtins: __atomic_add_fetch __atomic_sub_fetch the performance improvement on certain architectures such as powerpc can be significant, around 10% to 15%, over the implementation using __sync builtins while there is only slight uptick on Intel architectures because it was already enforcing Intel Strongly ordered memory semantics. The selection of __atomic built-ins can be predicated on the definition of ATOMIC_RELAXED which Is available on in gcc 4.8.2 and later versions. --- src/zmalloc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/zmalloc.c b/src/zmalloc.c index bd36654f..11616e5a 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -67,7 +67,10 @@ void zlibc_free(void *ptr) { #define free(ptr) je_free(ptr) #endif -#ifdef HAVE_ATOMIC +#if defined(__ATOMIC_RELAXED) +#define update_zmalloc_stat_add(__n) __atomic_add_fetch(&used_memory, (__n), __ATOMIC_RELAXED) +#define update_zmalloc_stat_sub(__n) __atomic_sub_fetch(&used_memory, (__n), __ATOMIC_RELAXED) +#elif defined(HAVE_ATOMIC) #define update_zmalloc_stat_add(__n) __sync_add_and_fetch(&used_memory, (__n)) #define update_zmalloc_stat_sub(__n) __sync_sub_and_fetch(&used_memory, (__n)) #else @@ -219,7 +222,7 @@ size_t zmalloc_used_memory(void) { size_t um; if (zmalloc_thread_safe) { -#ifdef HAVE_ATOMIC +#if defined(__ATOMIC_RELAXED) || defined(HAVE_ATOMIC) um = update_zmalloc_stat_add(0); #else pthread_mutex_lock(&used_memory_mutex); From a3e7a665ad62ab276b098c3c10460f111ea81200 Mon Sep 17 00:00:00 2001 From: Matt Stancliff Date: Thu, 26 Jun 2014 08:55:47 -0400 Subject: [PATCH 3/3] Allow __powerpc__ to define HAVE_ATOMIC too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From mailing list post https://groups.google.com/forum/#!topic/redis-db/D3k7KmJmYgM In the file “config.h”, the definition HAVE_ATOMIC is used to indicate if an architecture on which redis is implemented supports atomic synchronization primitives. Powerpc supports atomic synchronization primitives, however, it is not listed as one of the architectures supported in config.h. This patch adds the __powerpc__ to the list of architectures supporting these primitives. The improvement of redis due to the atomic synchronization on powerpc is significant, around 30% to 40%, over the default implementation using pthreads. This proposal adds __powerpc__ to the list of architectures designated to support atomic builtins. --- src/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.h b/src/config.h index 8041f7eb..1bc70a13 100644 --- a/src/config.h +++ b/src/config.h @@ -185,7 +185,7 @@ void setproctitle(const char *fmt, ...); #error "Undefined or invalid BYTE_ORDER" #endif -#if (__i386 || __amd64) && __GNUC__ +#if (__i386 || __amd64 || __powerpc__) && __GNUC__ #define GNUC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) #if (GNUC_VERSION >= 40100) || defined(__clang__) #define HAVE_ATOMIC