loading side of the threaded VM

This commit is contained in:
antirez 2010-01-28 10:12:04 -05:00
parent a544018d04
commit d5d55fc319
5 changed files with 290 additions and 64 deletions

1
TODO
View File

@ -17,6 +17,7 @@ Virtual Memory sub-TODO:
* Possibly decrRefCount() against swapped objects can be moved into I/O threads, as it's a slow operation against million elements list, and in general consumes CPU time that can be consumed by other threads (and cores).
* EXISTS should avoid loading the object if possible without too make the code too specialized.
* vm-min-age <seconds> option
* Make sure objects loaded from the VM are specially encoded when possible.
* Hashes (GET/SET/DEL/INCRBY/EXISTS/FIELDS/LEN/MSET/MGET). Special encoding for hashes with < N keys.

10
ae.c
View File

@ -62,6 +62,7 @@ aeEventLoop *aeCreateEventLoop(void) {
eventLoop->timeEventNextId = 0;
eventLoop->stop = 0;
eventLoop->maxfd = -1;
eventLoop->beforesleep = NULL;
if (aeApiCreate(eventLoop) == -1) {
zfree(eventLoop);
return NULL;
@ -373,10 +374,17 @@ int aeWait(int fd, int mask, long long milliseconds) {
void aeMain(aeEventLoop *eventLoop) {
eventLoop->stop = 0;
while (!eventLoop->stop)
while (!eventLoop->stop) {
if (eventLoop->beforesleep != NULL)
eventLoop->beforesleep(eventLoop);
aeProcessEvents(eventLoop, AE_ALL_EVENTS);
}
}
char *aeGetApiName(void) {
return aeApiName();
}
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) {
eventLoop->beforesleep = beforesleep;
}

3
ae.h
View File

@ -58,6 +58,7 @@ struct aeEventLoop;
typedef void aeFileProc(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
typedef int aeTimeProc(struct aeEventLoop *eventLoop, long long id, void *clientData);
typedef void aeEventFinalizerProc(struct aeEventLoop *eventLoop, void *clientData);
typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop);
/* File event structure */
typedef struct aeFileEvent {
@ -93,6 +94,7 @@ typedef struct aeEventLoop {
aeTimeEvent *timeEventHead;
int stop;
void *apidata; /* This is used for polling API specific data */
aeBeforeSleepProc *beforesleep;
} aeEventLoop;
/* Prototypes */
@ -110,5 +112,6 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags);
int aeWait(int fd, int mask, long long milliseconds);
void aeMain(aeEventLoop *eventLoop);
char *aeGetApiName(void);
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep);
#endif

335
redis.c
View File

@ -172,13 +172,12 @@
#define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
/* Client flags */
#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
#define REDIS_SLAVE 2 /* This client is a slave server */
#define REDIS_MASTER 4 /* This client is a master server */
#define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
#define REDIS_MULTI 16 /* This client is in a MULTI context */
#define REDIS_BLOCKED 32 /* The client is waiting in a blocking operation */
#define REDIS_IO_WAIT 64 /* The client is waiting for Virtual Memory I/O */
#define REDIS_SLAVE 1 /* This client is a slave server */
#define REDIS_MASTER 2 /* This client is a master server */
#define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
#define REDIS_MULTI 8 /* This client is in a MULTI context */
#define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
#define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
/* Slave replication state - slave side */
#define REDIS_REPL_NONE 0 /* No active replication */
@ -269,6 +268,7 @@ typedef struct redisDb {
dict *dict; /* The keyspace for this DB */
dict *expires; /* Timeout of keys with a timeout set */
dict *blockingkeys; /* Keys with clients waiting for data (BLPOP) */
dict *io_keys; /* Keys with clients waiting for VM I/O */
int id;
} redisDb;
@ -298,8 +298,7 @@ typedef struct redisClient {
list *reply;
int sentlen;
time_t lastinteraction; /* time of the last interaction, used for timeout */
int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
/* REDIS_MULTI */
int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
int slaveseldb; /* slave selected db, if this client is a slave */
int authenticated; /* when requirepass is non-NULL */
int replstate; /* replication state if this is a slave */
@ -307,7 +306,7 @@ typedef struct redisClient {
long repldboff; /* replication DB file offset */
off_t repldbsize; /* replication DB file size */
multiState mstate; /* MULTI/EXEC state */
robj **blockingkeys; /* The key we waiting to terminate a blocking
robj **blockingkeys; /* The key we are waiting to terminate a blocking
* operation such as BLPOP. Otherwise NULL. */
int blockingkeysnum; /* Number of blocking keys */
time_t blockingto; /* Blocking operation timeout. If UNIX current time
@ -373,7 +372,8 @@ struct redisServer {
int replstate;
unsigned int maxclients;
unsigned long long maxmemory;
unsigned int blockedclients;
unsigned int blpop_blocked_clients;
unsigned int vm_blocked_clients;
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
int sort_desc;
@ -399,7 +399,7 @@ struct redisServer {
list *io_newjobs; /* List of VM I/O jobs yet to be processed */
list *io_processing; /* List of VM I/O jobs being processed */
list *io_processed; /* List of VM I/O jobs already processed */
list *io_clients; /* All the clients waiting for SWAP I/O operations */
list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */
pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */
pthread_mutex_t obj_freelist_mutex; /* safe redis objects creation/free */
pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */
@ -487,7 +487,7 @@ static double R_Zero, R_PosInf, R_NegInf, R_Nan;
#define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
#define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
#define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
typedef struct iojon {
typedef struct iojob {
int type; /* Request type, REDIS_IOJOB_* */
redisDb *db;/* Redis database */
robj *key; /* This I/O request is about swapping this key */
@ -565,6 +565,13 @@ static robj *vmReadObjectFromSwap(off_t page, int type);
static void waitEmptyIOJobsQueue(void);
static void vmReopenSwapFile(void);
static int vmFreePage(off_t page);
static int blockClientOnSwappedKeys(struct redisCommand *cmd, redisClient *c);
static int dontWaitForSwappedKey(redisClient *c, robj *key);
static void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key);
static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
static struct redisCommand *lookupCommand(char *name);
static void call(redisClient *c, struct redisCommand *cmd);
static void resetClient(redisClient *c);
static void authCommand(redisClient *c);
static void pingCommand(redisClient *c);
@ -994,7 +1001,8 @@ static dictType keyptrDictType = {
};
/* Keylist hash table type has unencoded redis objects as keys and
* lists as values. It's used for blocking operations (BLPOP) */
* lists as values. It's used for blocking operations (BLPOP) and to
* map swapped keys to a list of clients waiting for this keys to be loaded. */
static dictType keylistDictType = {
dictObjHash, /* hash function */
NULL, /* key dup */
@ -1195,7 +1203,7 @@ static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientD
}
/* Close connections of timedout clients */
if ((server.maxidletime && !(loops % 10)) || server.blockedclients)
if ((server.maxidletime && !(loops % 10)) || server.blpop_blocked_clients)
closeTimedoutClients();
/* Check if a background saving or AOF rewrite in progress terminated */
@ -1294,6 +1302,38 @@ static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientD
return 1000;
}
/* This function gets called every time Redis is entering the
* main loop of the event driven library, that is, before to sleep
* for ready file descriptors. */
static void beforeSleep(struct aeEventLoop *eventLoop) {
REDIS_NOTUSED(eventLoop);
if (server.vm_enabled && listLength(server.io_ready_clients)) {
listIter li;
listNode *ln;
listRewind(server.io_ready_clients,&li);
while((ln = listNext(&li))) {
redisClient *c = ln->value;
struct redisCommand *cmd;
/* Resume the client. */
listDelNode(server.io_ready_clients,ln);
c->flags &= (~REDIS_IO_WAIT);
server.vm_blocked_clients--;
aeCreateFileEvent(server.el, c->fd, AE_READABLE,
readQueryFromClient, c);
cmd = lookupCommand(c->argv[0]->ptr);
assert(cmd != NULL);
call(c,cmd);
resetClient(c);
/* There may be more data to process in the input buffer. */
if (c->querybuf && sdslen(c->querybuf) > 0)
processInputBuffer(c);
}
}
}
static void createSharedObjects(void) {
shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
@ -1367,7 +1407,7 @@ static void initServerConfig() {
server.rdbcompression = 1;
server.sharingpoolsize = 1024;
server.maxclients = 0;
server.blockedclients = 0;
server.blpop_blocked_clients = 0;
server.maxmemory = 0;
server.vm_enabled = 0;
server.vm_swap_file = zstrdup("/tmp/redis-%p.vm");
@ -1375,6 +1415,7 @@ static void initServerConfig() {
server.vm_pages = 1024*1024*100; /* 104 millions of pages */
server.vm_max_memory = 1024LL*1024*1024*1; /* 1 GB of RAM */
server.vm_max_threads = 4;
server.vm_blocked_clients = 0;
resetServerSaveParams();
@ -1425,6 +1466,8 @@ static void initServer() {
server.db[j].dict = dictCreate(&hashDictType,NULL);
server.db[j].expires = dictCreate(&keyptrDictType,NULL);
server.db[j].blockingkeys = dictCreate(&keylistDictType,NULL);
if (server.vm_enabled)
server.db[j].io_keys = dictCreate(&keylistDictType,NULL);
server.db[j].id = j;
}
server.cronloops = 0;
@ -1685,11 +1728,17 @@ static void freeClient(redisClient *c) {
ln = listSearchKey(server.clients,c);
redisAssert(ln != NULL);
listDelNode(server.clients,ln);
/* Remove from the list of clients waiting for VM operations */
if (server.vm_enabled && listLength(c->io_keys)) {
ln = listSearchKey(server.io_clients,c);
if (ln) listDelNode(server.io_clients,ln);
listRelease(c->io_keys);
/* Remove from the list of clients waiting for swapped keys */
if (c->flags & REDIS_IO_WAIT && listLength(c->io_keys) == 0) {
ln = listSearchKey(server.io_ready_clients,c);
if (ln) {
listDelNode(server.io_ready_clients,ln);
server.vm_blocked_clients--;
}
}
while (server.vm_enabled && listLength(c->io_keys)) {
ln = listFirst(c->io_keys);
dontWaitForSwappedKey(c,ln->value);
}
listRelease(c->io_keys);
/* Other cleanup */
@ -2002,6 +2051,9 @@ static int processCommand(redisClient *c) {
freeClient(c);
return 0;
}
/* Now lookup the command and check ASAP about trivial error conditions
* such wrong arity, bad command name and so forth. */
cmd = lookupCommand(c->argv[0]->ptr);
if (!cmd) {
addReplySds(c,
@ -2022,6 +2074,7 @@ static int processCommand(redisClient *c) {
resetClient(c);
return 1;
} else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
/* This is a bulk command, we have to read the last argument yet. */
int bulklen = atoi(c->argv[c->argc-1]->ptr);
decrRefCount(c->argv[c->argc-1]);
@ -2043,6 +2096,8 @@ static int processCommand(redisClient *c) {
c->argc++;
c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
} else {
/* Otherwise return... there is to read the last argument
* from the socket. */
return 1;
}
}
@ -2068,14 +2123,12 @@ static int processCommand(redisClient *c) {
queueMultiCommand(c,cmd);
addReply(c,shared.queued);
} else {
if (server.vm_enabled && server.vm_max_threads > 0 &&
blockClientOnSwappedKeys(cmd,c)) return 1;
call(c,cmd);
}
/* Prepare the client for the next command */
if (c->flags & REDIS_CLOSE) {
freeClient(c);
return 0;
}
resetClient(c);
return 1;
}
@ -2550,10 +2603,16 @@ static robj *lookupKey(redisDb *db, robj *key) {
/* Update the access time of the key for the aging algorithm. */
key->vm.atime = server.unixtime;
} else {
int notify = (key->storage == REDIS_VM_LOADING);
/* Our value was swapped on disk. Bring it at home. */
redisAssert(val == NULL);
val = vmLoadObject(key);
dictGetEntryVal(de) = val;
/* Clients blocked by the VM subsystem may be waiting for
* this key... */
if (notify) handleClientsBlockedOnSwappedKey(db,key);
}
}
return val;
@ -5618,7 +5677,7 @@ static sds genRedisInfoString(void) {
uptime/(3600*24),
listLength(server.clients)-listLength(server.slaves),
listLength(server.slaves),
server.blockedclients,
server.blpop_blocked_clients,
zmalloc_used_memory(),
hmem,
server.dirty,
@ -5656,8 +5715,8 @@ static sds genRedisInfoString(void) {
"vm_stats_io_newjobs_len:%lu\r\n"
"vm_stats_io_processing_len:%lu\r\n"
"vm_stats_io_processed_len:%lu\r\n"
"vm_stats_io_waiting_clients:%lu\r\n"
"vm_stats_io_active_threads:%lu\r\n"
"vm_stats_blocked_clients:%lu\r\n"
,(unsigned long long) server.vm_max_memory,
(unsigned long long) server.vm_page_size,
(unsigned long long) server.vm_pages,
@ -5668,8 +5727,8 @@ static sds genRedisInfoString(void) {
(unsigned long) listLength(server.io_newjobs),
(unsigned long) listLength(server.io_processing),
(unsigned long) listLength(server.io_processed),
(unsigned long) listLength(server.io_clients),
(unsigned long) server.io_active_threads
(unsigned long) server.io_active_threads,
(unsigned long) server.vm_blocked_clients
);
unlockThreadedIO();
}
@ -5942,7 +6001,7 @@ static void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeou
/* Mark the client as a blocked client */
c->flags |= REDIS_BLOCKED;
aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
server.blockedclients++;
server.blpop_blocked_clients++;
}
/* Unblock a client that's waiting in a blocking operation such as BLPOP */
@ -5968,7 +6027,7 @@ static void unblockClientWaitingData(redisClient *c) {
zfree(c->blockingkeys);
c->blockingkeys = NULL;
c->flags &= (~REDIS_BLOCKED);
server.blockedclients--;
server.blpop_blocked_clients--;
/* Ok now we are ready to get read events from socket, note that we
* can't trap errors here as it's possible that unblockClientWaitingDatas() is
* called from freeClient() itself, and the only thing we can do
@ -7061,7 +7120,7 @@ static void vmInit(void) {
server.io_newjobs = listCreate();
server.io_processing = listCreate();
server.io_processed = listCreate();
server.io_clients = listCreate();
server.io_ready_clients = listCreate();
pthread_mutex_init(&server.io_mutex,NULL);
pthread_mutex_init(&server.obj_freelist_mutex,NULL);
pthread_mutex_init(&server.io_swapfile_mutex,NULL);
@ -7254,13 +7313,13 @@ static robj *vmReadObjectFromSwap(off_t page, int type) {
if (server.vm_enabled) pthread_mutex_lock(&server.io_swapfile_mutex);
if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
redisLog(REDIS_WARNING,
"Unrecoverable VM problem in vmLoadObject(): can't seek: %s",
"Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
strerror(errno));
exit(1);
}
o = rdbLoadObject(type,server.vm_fp);
if (o == NULL) {
redisLog(REDIS_WARNING, "Unrecoverable VM problem in vmLoadObject(): can't load object from swap file: %s", strerror(errno));
redisLog(REDIS_WARNING, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno));
exit(1);
}
if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
@ -7275,7 +7334,7 @@ static robj *vmReadObjectFromSwap(off_t page, int type) {
static robj *vmGenericLoadObject(robj *key, int preview) {
robj *val;
redisAssert(key->storage == REDIS_VM_SWAPPED);
redisAssert(key->storage == REDIS_VM_SWAPPED || key->storage == REDIS_VM_LOADING);
val = vmReadObjectFromSwap(key->vm.page,key->vtype);
if (!preview) {
key->storage = REDIS_VM_MEMORY;
@ -7485,8 +7544,9 @@ static int deleteIfSwapped(redisDb *db, robj *key) {
/* =================== Virtual Memory - Threaded I/O ======================= */
static void freeIOJob(iojob *j) {
if (j->type == REDIS_IOJOB_PREPARE_SWAP ||
j->type == REDIS_IOJOB_DO_SWAP)
if ((j->type == REDIS_IOJOB_PREPARE_SWAP ||
j->type == REDIS_IOJOB_DO_SWAP ||
j->type == REDIS_IOJOB_LOAD) && j->val != NULL)
decrRefCount(j->val);
decrRefCount(j->key);
zfree(j);
@ -7537,6 +7597,8 @@ static void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata,
assert(de != NULL);
key = dictGetEntryKey(de);
if (j->type == REDIS_IOJOB_LOAD) {
redisDb *db;
/* Key loaded, bring it at home */
key->storage = REDIS_VM_MEMORY;
key->vm.atime = server.unixtime;
@ -7545,7 +7607,12 @@ static void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata,
(unsigned char*) key->ptr);
server.vm_stats_swapped_objects--;
server.vm_stats_swapins++;
dictGetEntryVal(de) = j->val;
incrRefCount(j->val);
db = j->db;
freeIOJob(j);
/* Handle clients waiting for this key to be loaded. */
handleClientsBlockedOnSwappedKey(db,key);
} else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {
/* Now we know the amount of pages required to swap this object.
* Let's find some space for it, and queue this task again
@ -7671,23 +7738,25 @@ again:
listDelNode(lists[i],ln);
break;
case 1: /* io_processing */
/* Oh Shi- the thread is messing with the Job, and
* probably with the object if this is a
* PREPARE_SWAP or DO_SWAP job. Better to wait for the
* job to move into the next queue... */
if (job->type != REDIS_IOJOB_LOAD) {
/* Yes, we try again and again until the job
* is completed. */
unlockThreadedIO();
/* But let's wait some time for the I/O thread
* to finish with this job. After all this condition
* should be very rare. */
usleep(1);
goto again;
} else {
job->canceled = 1;
break;
}
/* Oh Shi- the thread is messing with the Job:
*
* Probably it's accessing the object if this is a
* PREPARE_SWAP or DO_SWAP job.
* If it's a LOAD job it may be reading from disk and
* if we don't wait for the job to terminate before to
* cancel it, maybe in a few microseconds data can be
* corrupted in this pages. So the short story is:
*
* Better to wait for the job to move into the
* next queue (processed)... */
/* We try again and again until the job is completed. */
unlockThreadedIO();
/* But let's wait some time for the I/O thread
* to finish with this job. After all this condition
* should be very rare. */
usleep(1);
goto again;
case 2: /* io_processed */
/* The job was already processed, that's easy...
* just mark it as canceled so that we'll ignore it
@ -7740,6 +7809,7 @@ static void *IOThreadEntryPoint(void *arg) {
/* Process the Job */
if (j->type == REDIS_IOJOB_LOAD) {
j->val = vmReadObjectFromSwap(j->page,j->key->vtype);
} else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {
FILE *fp = fopen("/dev/null","w+");
j->pages = rdbSavedObjectPages(j->val,fp);
@ -7843,16 +7913,154 @@ static int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db) {
/* ============ Virtual Memory - Blocking clients on missing keys =========== */
/* Is this client attempting to run a command against swapped keys?
* If so, block it ASAP, load the keys in background, then resume it.4
*
* The improtat thing about this function is that it can fail! If keys will
* still be swapped when the client is resumed, a few of key lookups will
* just block loading keys from disk. */
#if 0
static void blockClientOnSwappedKeys(redisClient *c) {
/* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
* If there is not already a job loading the key, it is craeted.
* The key is added to the io_keys list in the client structure, and also
* in the hash table mapping swapped keys to waiting clients, that is,
* server.io_waited_keys. */
static int waitForSwappedKey(redisClient *c, robj *key) {
struct dictEntry *de;
robj *o;
list *l;
/* If the key does not exist or is already in RAM we don't need to
* block the client at all. */
de = dictFind(c->db->dict,key);
if (de == NULL) return 0;
o = dictGetEntryKey(de);
if (o->storage == REDIS_VM_MEMORY) {
return 0;
} else if (o->storage == REDIS_VM_SWAPPING) {
/* We were swapping the key, undo it! */
vmCancelThreadedIOJob(o);
return 0;
}
/* OK: the key is either swapped, or being loaded just now. */
/* Add the key to the list of keys this client is waiting for.
* This maps clients to keys they are waiting for. */
listAddNodeTail(c->io_keys,key);
incrRefCount(key);
/* Add the client to the swapped keys => clients waiting map. */
de = dictFind(c->db->io_keys,key);
if (de == NULL) {
int retval;
/* For every key we take a list of clients blocked for it */
l = listCreate();
retval = dictAdd(c->db->io_keys,key,l);
incrRefCount(key);
assert(retval == DICT_OK);
} else {
l = dictGetEntryVal(de);
}
listAddNodeTail(l,c);
/* Are we already loading the key from disk? If not create a job */
if (o->storage == REDIS_VM_SWAPPED) {
iojob *j;
o->storage = REDIS_VM_LOADING;
j = zmalloc(sizeof(*j));
j->type = REDIS_IOJOB_LOAD;
j->db = c->db;
j->key = dupStringObject(key);
j->key->vtype = o->vtype;
j->page = o->vm.page;
j->val = NULL;
j->canceled = 0;
j->thread = (pthread_t) -1;
lockThreadedIO();
queueIOJob(j);
unlockThreadedIO();
}
return 1;
}
/* Is this client attempting to run a command against swapped keys?
* If so, block it ASAP, load the keys in background, then resume it.
*
* The important idea about this function is that it can fail! If keys will
* still be swapped when the client is resumed, this key lookups will
* just block loading keys from disk. In practical terms this should only
* happen with SORT BY command or if there is a bug in this function.
*
* Return 1 if the client is marked as blocked, 0 if the client can
* continue as the keys it is going to access appear to be in memory. */
static int blockClientOnSwappedKeys(struct redisCommand *cmd, redisClient *c) {
if (cmd->proc == getCommand) {
waitForSwappedKey(c,c->argv[1]);
}
/* If the client was blocked for at least one key, mark it as blocked. */
if (listLength(c->io_keys)) {
c->flags |= REDIS_IO_WAIT;
aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
server.vm_blocked_clients++;
return 1;
} else {
return 0;
}
}
/* Remove the 'key' from the list of blocked keys for a given client.
*
* The function returns 1 when there are no longer blocking keys after
* the current one was removed (and the client can be unblocked). */
static int dontWaitForSwappedKey(redisClient *c, robj *key) {
list *l;
listNode *ln;
listIter li;
struct dictEntry *de;
/* Remove the key from the list of keys this client is waiting for. */
listRewind(c->io_keys,&li);
while ((ln = listNext(&li)) != NULL) {
if (compareStringObjects(ln->value,key) == 0) {
listDelNode(c->io_keys,ln);
break;
}
}
assert(ln != NULL);
/* Remove the client form the key => waiting clients map. */
de = dictFind(c->db->io_keys,key);
assert(de != NULL);
l = dictGetEntryVal(de);
ln = listSearchKey(l,c);
assert(ln != NULL);
listDelNode(l,ln);
if (listLength(l) == 0)
dictDelete(c->db->io_keys,key);
return listLength(c->io_keys) == 0;
}
static void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key) {
struct dictEntry *de;
list *l;
listNode *ln;
int len;
de = dictFind(db->io_keys,key);
if (!de) return;
l = dictGetEntryVal(de);
len = listLength(l);
/* Note: we can't use something like while(listLength(l)) as the list
* can be freed by the calling function when we remove the last element. */
while (len--) {
ln = listFirst(l);
redisClient *c = ln->value;
if (dontWaitForSwappedKey(c,key)) {
/* Put the client in the list of clients ready to go as we
* loaded all the keys about it. */
listAddNodeTail(server.io_ready_clients,c);
}
}
}
#endif
/* ================================= Debugging ============================== */
@ -8020,6 +8228,7 @@ int main(int argc, char **argv) {
redisLog(REDIS_NOTICE,"DB loaded from disk");
}
redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
aeSetBeforeSleepProc(server.el,beforeSleep);
aeMain(server.el);
aeDeleteEventLoop(server.el);
return 0;

View File

@ -9,8 +9,10 @@ static struct redisFunctionSym symsTable[] = {
{"aofRemoveTempFile",(unsigned long)aofRemoveTempFile},
{"appendServerSaveParams",(unsigned long)appendServerSaveParams},
{"authCommand",(unsigned long)authCommand},
{"beforeSleep",(unsigned long)beforeSleep},
{"bgrewriteaofCommand",(unsigned long)bgrewriteaofCommand},
{"bgsaveCommand",(unsigned long)bgsaveCommand},
{"blockClientOnSwappedKeys",(unsigned long)blockClientOnSwappedKeys},
{"blockForKeys",(unsigned long)blockForKeys},
{"blockingPopGenericCommand",(unsigned long)blockingPopGenericCommand},
{"blpopCommand",(unsigned long)blpopCommand},
@ -43,6 +45,7 @@ static struct redisFunctionSym symsTable[] = {
{"dictObjKeyCompare",(unsigned long)dictObjKeyCompare},
{"dictRedisObjectDestructor",(unsigned long)dictRedisObjectDestructor},
{"dictVanillaFree",(unsigned long)dictVanillaFree},
{"dontWaitForSwappedKey",(unsigned long)dontWaitForSwappedKey},
{"dupClientReplyValue",(unsigned long)dupClientReplyValue},
{"dupStringObject",(unsigned long)dupStringObject},
{"echoCommand",(unsigned long)echoCommand},
@ -79,6 +82,7 @@ static struct redisFunctionSym symsTable[] = {
{"getMcontextEip",(unsigned long)getMcontextEip},
{"getsetCommand",(unsigned long)getsetCommand},
{"glueReplyBuffersIfNeeded",(unsigned long)glueReplyBuffersIfNeeded},
{"handleClientsBlockedOnSwappedKey",(unsigned long)handleClientsBlockedOnSwappedKey},
{"handleClientsWaitingListPush",(unsigned long)handleClientsWaitingListPush},
{"htNeedsResize",(unsigned long)htNeedsResize},
{"incrCommand",(unsigned long)incrCommand},
@ -231,6 +235,7 @@ static struct redisFunctionSym symsTable[] = {
{"vmThreadedIOCompletedJob",(unsigned long)vmThreadedIOCompletedJob},
{"vmWriteObjectOnSwap",(unsigned long)vmWriteObjectOnSwap},
{"waitEmptyIOJobsQueue",(unsigned long)waitEmptyIOJobsQueue},
{"waitForSwappedKey",(unsigned long)waitForSwappedKey},
{"yesnotoi",(unsigned long)yesnotoi},
{"zaddCommand",(unsigned long)zaddCommand},
{"zaddGenericCommand",(unsigned long)zaddGenericCommand},