diff --git a/src/bio.c b/src/bio.c index eaac8e40..aa2cdf9f 100644 --- a/src/bio.c +++ b/src/bio.c @@ -108,9 +108,18 @@ void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) { void *bioProcessBackgroundJobs(void *arg) { struct bio_job *job; unsigned long type = (unsigned long) arg; + sigset_t sigset; pthread_detach(pthread_self()); pthread_mutex_lock(&bio_mutex[type]); + /* Block SIGALRM so we are sure that only the main thread will + * receive the watchdog signal. */ + sigemptyset(&sigset); + sigaddset(&sigset, SIGALRM); + if (pthread_sigmask(SIG_BLOCK, &sigset, NULL)) + redisLog(REDIS_WARNING, + "Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno)); + while(1) { listNode *ln; diff --git a/src/config.c b/src/config.c index ab49178a..8dffe288 100644 --- a/src/config.c +++ b/src/config.c @@ -627,6 +627,12 @@ void configSetCommand(redisClient *c) { } else if (!strcasecmp(c->argv[2]->ptr,"repl-timeout")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt; server.repl_timeout = ll; + } else if (!strcasecmp(c->argv[2]->ptr,"watchdog-period")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; + if (ll) + enableWatchdog(ll); + else + disableWatchdog(); } else { addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s", (char*)c->argv[2]->ptr); @@ -715,6 +721,7 @@ void configGetCommand(redisClient *c) { config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period); config_get_numerical_field("repl-timeout",server.repl_timeout); config_get_numerical_field("maxclients",server.maxclients); + config_get_numerical_field("watchdog-period",server.watchdog_period); /* Bool (yes/no) values */ config_get_bool_field("no-appendfsync-on-rewrite", diff --git a/src/debug.c b/src/debug.c index 9c8b6ef9..49c76824 100644 --- a/src/debug.c +++ b/src/debug.c @@ -559,10 +559,11 @@ void logRegisters(ucontext_t *uc) { } /* Logs the stack trace using the backtrace() call. */ -void logStackTrace(ucontext_t *uc) { +sds getStackTrace(ucontext_t *uc) { void *trace[100]; int i, trace_size = 0; char **messages = NULL; + sds st = sdsempty(); /* Generate the stack trace */ trace_size = backtrace(trace, 100); @@ -572,9 +573,12 @@ void logStackTrace(ucontext_t *uc) { trace[1] = getMcontextEip(uc); } messages = backtrace_symbols(trace, trace_size); - redisLog(REDIS_WARNING, "--- STACK TRACE"); - for (i=1; i + +void watchdogSignalHandler(int sig, siginfo_t *info, void *secret) { + ucontext_t *uc = (ucontext_t*) secret; + REDIS_NOTUSED(info); + REDIS_NOTUSED(sig); + sds st, log; + time_t now = time(NULL); + char date[128]; + FILE *fp; + + fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a"); + if (fp == NULL) return; + + strftime(date,sizeof(date),"%d %b %H:%M:%S",localtime(&now)); + log = sdscatprintf(sdsempty(), + "\n--- WATCHDOG TIMER EXPIRED (%s) ---\n",date); +#ifdef HAVE_BACKTRACE + st = getStackTrace(uc); +#else + st = sdsnew("Sorry: no support for backtrace().\n"); +#endif + log = sdscatsds(log,st); + log = sdscat(log,"------\n\n"); + fprintf(fp,"%s",log); + if (server.logfile) fclose(fp); + sdsfree(st); + sdsfree(log); +} + +/* Schedule a SIGALRM delivery after the specified period in milliseconds. + * If a timer is already scheduled, this function will re-schedule it to the + * specified time. If period is 0 the current timer is disabled. */ +void watchdogScheduleSignal(int period) { + struct itimerval it; + + /* Will stop the timer if period is 0. */ + it.it_value.tv_sec = period/1000; + it.it_value.tv_usec = (period%1000)*1000; + /* Don't automatically restart. */ + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 0; + setitimer(ITIMER_REAL, &it, NULL); +} + +/* Enable the software watchdong with the specified period in milliseconds. */ +void enableWatchdog(int period) { + if (server.watchdog_period == 0) { + struct sigaction act; + + /* Watchdog was actually disabled, so we have to setup the signal + * handler. */ + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_SIGINFO; + act.sa_sigaction = watchdogSignalHandler; + sigaction(SIGALRM, &act, NULL); + } + if (period < 200) period = 200; /* We don't accept periods < 200 ms. */ + watchdogScheduleSignal(period); /* Adjust the current timer. */ + server.watchdog_period = period; +} + +/* Disable the software watchdog. */ +void disableWatchdog(void) { + struct sigaction act; + if (server.watchdog_period == 0) return; /* Already disabled. */ + watchdogScheduleSignal(0); /* Stop the current timer. */ + + /* Set the signal handler to SIG_IGN, this will also remove pending + * signals from the queue. */ + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = SIG_IGN; + sigaction(SIGALRM, &act, NULL); + server.watchdog_period = 0; +} diff --git a/src/redis.c b/src/redis.c index 79cf2b83..5ee44a31 100644 --- a/src/redis.c +++ b/src/redis.c @@ -726,6 +726,10 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { REDIS_NOTUSED(id); REDIS_NOTUSED(clientData); + /* Software watchdog: deliver the SIGALRM that will reach the signal + * handler if we don't return here fast enough. */ + if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period); + /* We take a cached value of the unix time in the global state because * with virtual memory and aging there is to store the current time * in objects at every object access, and accuracy is not needed. @@ -1084,11 +1088,12 @@ void initServerConfig() { server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN; server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN; - /* Assert */ + /* Debugging */ server.assert_failed = ""; server.assert_file = ""; server.assert_line = 0; server.bug_report_start = 0; + server.watchdog_period = 0; } /* This function will try to raise the max number of open files accordingly to diff --git a/src/redis.h b/src/redis.h index a1468a3b..c5812f0a 100644 --- a/src/redis.h +++ b/src/redis.h @@ -722,6 +722,7 @@ struct redisServer { char *assert_file; int assert_line; int bug_report_start; /* True if bug report header was already logged. */ + int watchdog_period; /* Software watchdog period in ms. 0 = off */ }; typedef struct pubsubPattern { @@ -1255,4 +1256,7 @@ void bugReportStart(void); void redisLogObjectDebugInfo(robj *o); void sigsegvHandler(int sig, siginfo_t *info, void *secret); sds genRedisInfoString(char *section); +void enableWatchdog(int period); +void disableWatchdog(void); +void watchdogScheduleSignal(int period); #endif diff --git a/src/zmalloc.c b/src/zmalloc.c index 97067735..79b56158 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -30,6 +30,15 @@ #include #include + +/* This function provide us access to the original libc free(). This is useful + * for instance to free results obtained by backtrace_symbols(). We need + * to define this function before including zmalloc.h that may shadow the + * free implementation if we use jemalloc or another non standard allocator. */ +void zlibc_free(void *ptr) { + free(ptr); +} + #include #include #include "config.h" diff --git a/src/zmalloc.h b/src/zmalloc.h index 995814c8..8079fb53 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -75,6 +75,7 @@ size_t zmalloc_used_memory(void); void zmalloc_enable_thread_safeness(void); float zmalloc_get_fragmentation_ratio(void); size_t zmalloc_get_rss(void); +void zlibc_free(void *ptr); #ifndef HAVE_MALLOC_SIZE size_t zmalloc_size(void *ptr);