Merge branch 'watchdog' into unstable

This commit is contained in:
antirez 2012-03-28 13:16:19 +02:00
commit 1043c8064b
7 changed files with 126 additions and 9 deletions

View File

@ -108,9 +108,18 @@ void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
void *bioProcessBackgroundJobs(void *arg) {
struct bio_job *job;
unsigned long type = (unsigned long) arg;
sigset_t sigset;
pthread_detach(pthread_self());
pthread_mutex_lock(&bio_mutex[type]);
/* Block SIGALRM so we are sure that only the main thread will
* receive the watchdog signal. */
sigemptyset(&sigset);
sigaddset(&sigset, SIGALRM);
if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
redisLog(REDIS_WARNING,
"Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno));
while(1) {
listNode *ln;

View File

@ -627,6 +627,12 @@ void configSetCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[2]->ptr,"repl-timeout")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
server.repl_timeout = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"watchdog-period")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
if (ll)
enableWatchdog(ll);
else
disableWatchdog();
} else {
addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s",
(char*)c->argv[2]->ptr);
@ -715,6 +721,7 @@ void configGetCommand(redisClient *c) {
config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period);
config_get_numerical_field("repl-timeout",server.repl_timeout);
config_get_numerical_field("maxclients",server.maxclients);
config_get_numerical_field("watchdog-period",server.watchdog_period);
/* Bool (yes/no) values */
config_get_bool_field("no-appendfsync-on-rewrite",

View File

@ -559,10 +559,11 @@ void logRegisters(ucontext_t *uc) {
}
/* Logs the stack trace using the backtrace() call. */
void logStackTrace(ucontext_t *uc) {
sds getStackTrace(ucontext_t *uc) {
void *trace[100];
int i, trace_size = 0;
char **messages = NULL;
sds st = sdsempty();
/* Generate the stack trace */
trace_size = backtrace(trace, 100);
@ -572,9 +573,12 @@ void logStackTrace(ucontext_t *uc) {
trace[1] = getMcontextEip(uc);
}
messages = backtrace_symbols(trace, trace_size);
redisLog(REDIS_WARNING, "--- STACK TRACE");
for (i=1; i<trace_size; ++i)
redisLog(REDIS_WARNING,"%s", messages[i]);
for (i=1; i<trace_size; ++i) {
st = sdscat(st,messages[i]);
st = sdscatlen(st,"\n",1);
}
zlibc_free(messages);
return st;
}
/* Log information about the "current" client, that is, the client that is
@ -617,7 +621,7 @@ void logCurrentClient(void) {
void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
ucontext_t *uc = (ucontext_t*) secret;
sds infostring, clients;
sds infostring, clients, st;
struct sigaction act;
REDIS_NOTUSED(info);
@ -629,7 +633,9 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
server.assert_file, server.assert_line);
/* Log the stack trace */
logStackTrace(uc);
st = getStackTrace(uc);
redisLog(REDIS_WARNING, "--- STACK TRACE\n%s", st);
sdsfree(st);
/* Log INFO and CLIENT LIST */
redisLog(REDIS_WARNING, "--- INFO OUTPUT");
@ -661,11 +667,87 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
/* Make sure we exit with the right signal at the end. So for instance
* the core will be dumped if enabled. */
sigemptyset (&act.sa_mask);
/* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
* is used. Otherwise, sa_handler is used */
act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND;
act.sa_handler = SIG_DFL;
sigaction (sig, &act, NULL);
kill(getpid(),sig);
}
#endif /* HAVE_BACKTRACE */
/* =========================== Software Watchdog ============================ */
#include <sys/time.h>
void watchdogSignalHandler(int sig, siginfo_t *info, void *secret) {
ucontext_t *uc = (ucontext_t*) secret;
REDIS_NOTUSED(info);
REDIS_NOTUSED(sig);
sds st, log;
time_t now = time(NULL);
char date[128];
FILE *fp;
fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
if (fp == NULL) return;
strftime(date,sizeof(date),"%d %b %H:%M:%S",localtime(&now));
log = sdscatprintf(sdsempty(),
"\n--- WATCHDOG TIMER EXPIRED (%s) ---\n",date);
#ifdef HAVE_BACKTRACE
st = getStackTrace(uc);
#else
st = sdsnew("Sorry: no support for backtrace().\n");
#endif
log = sdscatsds(log,st);
log = sdscat(log,"------\n\n");
fprintf(fp,"%s",log);
if (server.logfile) fclose(fp);
sdsfree(st);
sdsfree(log);
}
/* Schedule a SIGALRM delivery after the specified period in milliseconds.
* If a timer is already scheduled, this function will re-schedule it to the
* specified time. If period is 0 the current timer is disabled. */
void watchdogScheduleSignal(int period) {
struct itimerval it;
/* Will stop the timer if period is 0. */
it.it_value.tv_sec = period/1000;
it.it_value.tv_usec = (period%1000)*1000;
/* Don't automatically restart. */
it.it_interval.tv_sec = 0;
it.it_interval.tv_usec = 0;
setitimer(ITIMER_REAL, &it, NULL);
}
/* Enable the software watchdong with the specified period in milliseconds. */
void enableWatchdog(int period) {
if (server.watchdog_period == 0) {
struct sigaction act;
/* Watchdog was actually disabled, so we have to setup the signal
* handler. */
sigemptyset(&act.sa_mask);
act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_SIGINFO;
act.sa_sigaction = watchdogSignalHandler;
sigaction(SIGALRM, &act, NULL);
}
if (period < 200) period = 200; /* We don't accept periods < 200 ms. */
watchdogScheduleSignal(period); /* Adjust the current timer. */
server.watchdog_period = period;
}
/* Disable the software watchdog. */
void disableWatchdog(void) {
struct sigaction act;
if (server.watchdog_period == 0) return; /* Already disabled. */
watchdogScheduleSignal(0); /* Stop the current timer. */
/* Set the signal handler to SIG_IGN, this will also remove pending
* signals from the queue. */
sigemptyset(&act.sa_mask);
act.sa_flags = 0;
act.sa_handler = SIG_IGN;
sigaction(SIGALRM, &act, NULL);
server.watchdog_period = 0;
}

View File

@ -726,6 +726,10 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
REDIS_NOTUSED(id);
REDIS_NOTUSED(clientData);
/* Software watchdog: deliver the SIGALRM that will reach the signal
* handler if we don't return here fast enough. */
if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
/* We take a cached value of the unix time in the global state because
* with virtual memory and aging there is to store the current time
* in objects at every object access, and accuracy is not needed.
@ -1084,11 +1088,12 @@ void initServerConfig() {
server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN;
/* Assert */
/* Debugging */
server.assert_failed = "<no assertion failed>";
server.assert_file = "<no file>";
server.assert_line = 0;
server.bug_report_start = 0;
server.watchdog_period = 0;
}
/* This function will try to raise the max number of open files accordingly to

View File

@ -722,6 +722,7 @@ struct redisServer {
char *assert_file;
int assert_line;
int bug_report_start; /* True if bug report header was already logged. */
int watchdog_period; /* Software watchdog period in ms. 0 = off */
};
typedef struct pubsubPattern {
@ -1255,4 +1256,7 @@ void bugReportStart(void);
void redisLogObjectDebugInfo(robj *o);
void sigsegvHandler(int sig, siginfo_t *info, void *secret);
sds genRedisInfoString(char *section);
void enableWatchdog(int period);
void disableWatchdog(void);
void watchdogScheduleSignal(int period);
#endif

View File

@ -30,6 +30,15 @@
#include <stdio.h>
#include <stdlib.h>
/* This function provide us access to the original libc free(). This is useful
* for instance to free results obtained by backtrace_symbols(). We need
* to define this function before including zmalloc.h that may shadow the
* free implementation if we use jemalloc or another non standard allocator. */
void zlibc_free(void *ptr) {
free(ptr);
}
#include <string.h>
#include <pthread.h>
#include "config.h"

View File

@ -75,6 +75,7 @@ size_t zmalloc_used_memory(void);
void zmalloc_enable_thread_safeness(void);
float zmalloc_get_fragmentation_ratio(void);
size_t zmalloc_get_rss(void);
void zlibc_free(void *ptr);
#ifndef HAVE_MALLOC_SIZE
size_t zmalloc_size(void *ptr);