Cluster: cluster stuff moved from redis.h to cluster.h.

This commit is contained in:
antirez 2013-10-09 15:37:20 +02:00
parent 6fa9b1a420
commit 929b6a4480
7 changed files with 258 additions and 244 deletions

View File

@ -6,112 +6,114 @@ ae_kqueue.o: ae_kqueue.c
ae_select.o: ae_select.c
anet.o: anet.c fmacros.h anet.h
aof.o: aof.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h bio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h bio.h
bio.o: bio.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h bio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h bio.h
bitops.o: bitops.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
cluster.o: cluster.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h endianconv.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h cluster.h endianconv.h
config.o: config.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h cluster.h
crc16.o: crc16.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
crc64.o: crc64.c
db.o: db.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h cluster.h
debug.o: debug.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h sha1.h crc64.h bio.h
dict.o: dict.c fmacros.h dict.h zmalloc.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h sha1.h crc64.h bio.h
dict.o: dict.c fmacros.h dict.h zmalloc.h redisassert.h
endianconv.o: endianconv.c
intset.o: intset.c intset.h zmalloc.h endianconv.h config.h
lzf_c.o: lzf_c.c lzfP.h
lzf_d.o: lzf_d.c lzfP.h
memtest.o: memtest.c config.h
multi.o: multi.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
networking.o: networking.c redis.h fmacros.h config.h \
../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \
rio.h
../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \
rio.h
notify.o: notify.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
object.o: object.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
pqsort.o: pqsort.c
pubsub.o: pubsub.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
rand.o: rand.c
rdb.o: rdb.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h lzf.h zipmap.h \
endianconv.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h lzf.h zipmap.h \
endianconv.h
redis-benchmark.o: redis-benchmark.c fmacros.h ae.h \
../deps/hiredis/hiredis.h sds.h adlist.h zmalloc.h
../deps/hiredis/hiredis.h sds.h adlist.h zmalloc.h
redis-check-aof.o: redis-check-aof.c fmacros.h config.h
redis-check-dump.o: redis-check-dump.c lzf.h crc64.h
redis-cli.o: redis-cli.c fmacros.h version.h ../deps/hiredis/hiredis.h \
sds.h zmalloc.h ../deps/linenoise/linenoise.h help.h anet.h ae.h
sds.h zmalloc.h ../deps/linenoise/linenoise.h help.h anet.h ae.h
redis.o: redis.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h slowlog.h bio.h \
asciilogo.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h cluster.h slowlog.h \
bio.h asciilogo.h
release.o: release.c release.h version.h crc64.h
replication.o: replication.c redis.h fmacros.h config.h \
../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \
rio.h
rio.o: rio.c fmacros.h rio.h sds.h util.h crc64.h
../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h rdb.h \
rio.h
rio.o: rio.c fmacros.h rio.h sds.h util.h crc64.h config.h redis.h \
../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h dict.h adlist.h \
zmalloc.h anet.h ziplist.h intset.h version.h rdb.h
scripting.o: scripting.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h sha1.h rand.h \
../deps/lua/src/lauxlib.h ../deps/lua/src/lua.h \
../deps/lua/src/lualib.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h sha1.h rand.h \
../deps/lua/src/lauxlib.h ../deps/lua/src/lua.h ../deps/lua/src/lualib.h
sds.o: sds.c sds.h zmalloc.h
sentinel.o: sentinel.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h \
../deps/hiredis/hiredis.h ../deps/hiredis/async.h \
../deps/hiredis/hiredis.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h \
../deps/hiredis/hiredis.h ../deps/hiredis/async.h \
../deps/hiredis/hiredis.h
setproctitle.o: setproctitle.c
sha1.o: sha1.c sha1.h config.h
slowlog.o: slowlog.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h slowlog.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h slowlog.h
sort.o: sort.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h pqsort.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h pqsort.h
syncio.o: syncio.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
t_hash.o: t_hash.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
t_list.o: t_list.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
t_set.o: t_set.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
t_string.o: t_string.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
t_zset.o: t_zset.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
util.o: util.c fmacros.h util.h
ziplist.o: ziplist.c zmalloc.h util.h ziplist.h endianconv.h config.h
../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
ziplist.h intset.h version.h util.h rdb.h rio.h
util.o: util.c fmacros.h util.h sds.h
ziplist.o: ziplist.c zmalloc.h util.h sds.h ziplist.h endianconv.h \
config.h redisassert.h
zipmap.o: zipmap.c zmalloc.h endianconv.h config.h
zmalloc.o: zmalloc.c config.h zmalloc.h

View File

@ -29,6 +29,7 @@
*/
#include "redis.h"
#include "cluster.h"
#include "endianconv.h"
#include <sys/types.h>
@ -38,6 +39,8 @@
#include <unistd.h>
#include <sys/socket.h>
clusterNode *createClusterNode(char *nodename, int flags);
int clusterAddNode(clusterNode *node);
void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void clusterSendPing(clusterLink *link, int type);

181
src/cluster.h Normal file
View File

@ -0,0 +1,181 @@
#ifndef __REDIS_CLUSTER_H
#define __REDIS_CLUSTER_H
/*-----------------------------------------------------------------------------
* Redis cluster data structures, defines, exported API.
*----------------------------------------------------------------------------*/
#define REDIS_CLUSTER_SLOTS 16384
#define REDIS_CLUSTER_OK 0 /* Everything looks ok */
#define REDIS_CLUSTER_FAIL 1 /* The cluster can't work */
#define REDIS_CLUSTER_NAMELEN 40 /* sha1 hex length */
#define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
#define REDIS_CLUSTER_IPLEN INET6_ADDRSTRLEN /* IPv6 address string length */
/* The following defines are amunt of time, sometimes expressed as
* multiplicators of the node timeout value (when ending with MULT). */
#define REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT 15
#define REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
#define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
#define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
#define REDIS_CLUSTER_SLAVE_VALIDITY_MULT 10 /* Slave data validity. */
#define REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT 4 /* Auth request retry time. */
#define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
struct clusterNode;
/* clusterLink encapsulates everything needed to talk with a remote node. */
typedef struct clusterLink {
time_t ctime; /* Link creation time */
int fd; /* TCP socket file descriptor */
sds sndbuf; /* Packet send buffer */
sds rcvbuf; /* Packet reception buffer */
struct clusterNode *node; /* Node related to this link if any, or NULL */
} clusterLink;
/* Node flags */
#define REDIS_NODE_MASTER 1 /* The node is a master */
#define REDIS_NODE_SLAVE 2 /* The node is a slave */
#define REDIS_NODE_PFAIL 4 /* Failure? Need acknowledge */
#define REDIS_NODE_FAIL 8 /* The node is believed to be malfunctioning */
#define REDIS_NODE_MYSELF 16 /* This node is myself */
#define REDIS_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
#define REDIS_NODE_NOADDR 64 /* We don't know the address of this node */
#define REDIS_NODE_MEET 128 /* Send a MEET message to this node */
#define REDIS_NODE_PROMOTED 256 /* Master was a slave propoted by failover */
#define REDIS_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
/* This structure represent elements of node->fail_reports. */
struct clusterNodeFailReport {
struct clusterNode *node; /* Node reporting the failure condition. */
time_t time; /* Time of the last report from this node. */
} typedef clusterNodeFailReport;
struct clusterNode {
time_t ctime; /* Node object creation time. */
char name[REDIS_CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
int flags; /* REDIS_NODE_... */
uint64_t configEpoch; /* Last configEpoch observed for this node */
unsigned char slots[REDIS_CLUSTER_SLOTS/8]; /* slots handled by this node */
int numslots; /* Number of slots handled by this node */
int numslaves; /* Number of slave nodes, if this is a master */
struct clusterNode **slaves; /* pointers to slave nodes */
struct clusterNode *slaveof; /* pointer to the master node */
time_t ping_sent; /* Unix time we sent latest ping */
time_t pong_received; /* Unix time we received the pong */
time_t fail_time; /* Unix time when FAIL flag was set */
time_t voted_time; /* Last time we voted for a slave of this master */
char ip[REDIS_IP_STR_LEN]; /* Latest known IP address of this node */
int port; /* Latest known port of this node */
clusterLink *link; /* TCP/IP link with this node */
list *fail_reports; /* List of nodes signaling this as failing */
};
typedef struct clusterNode clusterNode;
typedef struct clusterState {
clusterNode *myself; /* This node */
uint64_t currentEpoch;
int state; /* REDIS_CLUSTER_OK, REDIS_CLUSTER_FAIL, ... */
int size; /* Num of master nodes with at least one slot */
dict *nodes; /* Hash table of name -> clusterNode structures */
clusterNode *migrating_slots_to[REDIS_CLUSTER_SLOTS];
clusterNode *importing_slots_from[REDIS_CLUSTER_SLOTS];
clusterNode *slots[REDIS_CLUSTER_SLOTS];
zskiplist *slots_to_keys;
/* The following fields are used to take the slave state on elections. */
mstime_t failover_auth_time;/* Time at which we'll try to get elected in ms*/
int failover_auth_count; /* Number of votes received so far. */
int failover_auth_sent; /* True if we already asked for votes. */
uint64_t failover_auth_epoch; /* Epoch of the current election. */
/* The followign fields are uesd by masters to take state on elections. */
uint64_t last_vote_epoch; /* Epoch of the last vote granted. */
int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */
long long stats_bus_messages_sent; /* Num of msg sent via cluster bus. */
long long stats_bus_messages_received; /* Num of msg received via cluster bus. */
} clusterState;
/* clusterState todo_before_sleep flags. */
#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0)
#define CLUSTER_TODO_UPDATE_STATE (1<<1)
#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
/* Redis cluster messages header */
/* Note that the PING, PONG and MEET messages are actually the same exact
* kind of packet. PONG is the reply to ping, in the exact format as a PING,
* while MEET is a special PING that forces the receiver to add the sender
* as a node (if it is not already in the list). */
#define CLUSTERMSG_TYPE_PING 0 /* Ping */
#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */
#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */
#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */
#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propagation */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6 /* Yes, you can failover. */
/* Initially we don't know our "name", but we'll find it once we connect
* to the first node, using the getsockname() function. Then we'll use this
* address for all the next messages. */
typedef struct {
char nodename[REDIS_CLUSTER_NAMELEN];
uint32_t ping_sent;
uint32_t pong_received;
char ip[16]; /* IP address last time it was seen */
uint16_t port; /* port last time it was seen */
uint16_t flags;
uint32_t notused; /* for 64 bit alignment */
} clusterMsgDataGossip;
typedef struct {
char nodename[REDIS_CLUSTER_NAMELEN];
} clusterMsgDataFail;
typedef struct {
uint32_t channel_len;
uint32_t message_len;
unsigned char bulk_data[8]; /* defined as 8 just for alignment concerns. */
} clusterMsgDataPublish;
union clusterMsgData {
/* PING, MEET and PONG */
struct {
/* Array of N clusterMsgDataGossip structures */
clusterMsgDataGossip gossip[1];
} ping;
/* FAIL */
struct {
clusterMsgDataFail about;
} fail;
/* PUBLISH */
struct {
clusterMsgDataPublish msg;
} publish;
};
typedef struct {
uint32_t totlen; /* Total length of this message */
uint16_t type; /* Message type */
uint16_t count; /* Only used for some kind of messages. */
uint64_t currentEpoch; /* The epoch accordingly to the sending node. */
uint64_t configEpoch; /* The config epoch if it's a master, or the last epoch
advertised by its master if it is a slave. */
char sender[REDIS_CLUSTER_NAMELEN]; /* Name of the sender node */
unsigned char myslots[REDIS_CLUSTER_SLOTS/8];
char slaveof[REDIS_CLUSTER_NAMELEN];
char notused1[32]; /* 32 bytes reserved for future usage. */
uint16_t port; /* Sender TCP base port */
uint16_t flags; /* Sender node flags */
unsigned char state; /* Cluster state from the POV of the sender */
unsigned char notused2[3]; /* Reserved for future use. For alignment. */
union clusterMsgData data;
} clusterMsg;
#define CLUSTERMSG_MIN_LEN (sizeof(clusterMsg)-sizeof(union clusterMsgData))
/* ----------------------- API exported outside cluster.c ------------------------- */
clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
#endif /* __REDIS_CLUSTER_H */

View File

@ -29,6 +29,7 @@
*/
#include "redis.h"
#include "cluster.h"
#include <fcntl.h>
#include <sys/stat.h>

View File

@ -28,6 +28,7 @@
*/
#include "redis.h"
#include "cluster.h"
#include <signal.h>
#include <ctype.h>

View File

@ -28,6 +28,7 @@
*/
#include "redis.h"
#include "cluster.h"
#include "slowlog.h"
#include "bio.h"

View File

@ -565,184 +565,12 @@ typedef struct redisOpArray {
int numops;
} redisOpArray;
/*-----------------------------------------------------------------------------
* Redis cluster data structures
*----------------------------------------------------------------------------*/
#define REDIS_CLUSTER_SLOTS 16384
#define REDIS_CLUSTER_OK 0 /* Everything looks ok */
#define REDIS_CLUSTER_FAIL 1 /* The cluster can't work */
#define REDIS_CLUSTER_NAMELEN 40 /* sha1 hex length */
#define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
#define REDIS_CLUSTER_IPLEN INET6_ADDRSTRLEN /* IPv6 address string length */
/* The following defines are amunt of time, sometimes expressed as
* multiplicators of the node timeout value (when ending with MULT). */
#define REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT 15
#define REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
#define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
#define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
#define REDIS_CLUSTER_SLAVE_VALIDITY_MULT 10 /* Slave data validity. */
#define REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT 4 /* Auth request retry time. */
#define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
struct clusterNode;
/* clusterLink encapsulates everything needed to talk with a remote node. */
typedef struct clusterLink {
time_t ctime; /* Link creation time */
int fd; /* TCP socket file descriptor */
sds sndbuf; /* Packet send buffer */
sds rcvbuf; /* Packet reception buffer */
struct clusterNode *node; /* Node related to this link if any, or NULL */
} clusterLink;
/* Node flags */
#define REDIS_NODE_MASTER 1 /* The node is a master */
#define REDIS_NODE_SLAVE 2 /* The node is a slave */
#define REDIS_NODE_PFAIL 4 /* Failure? Need acknowledge */
#define REDIS_NODE_FAIL 8 /* The node is believed to be malfunctioning */
#define REDIS_NODE_MYSELF 16 /* This node is myself */
#define REDIS_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
#define REDIS_NODE_NOADDR 64 /* We don't know the address of this node */
#define REDIS_NODE_MEET 128 /* Send a MEET message to this node */
#define REDIS_NODE_PROMOTED 256 /* Master was a slave propoted by failover */
#define REDIS_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
/* This structure represent elements of node->fail_reports. */
struct clusterNodeFailReport {
struct clusterNode *node; /* Node reporting the failure condition. */
time_t time; /* Time of the last report from this node. */
} typedef clusterNodeFailReport;
struct clusterNode {
time_t ctime; /* Node object creation time. */
char name[REDIS_CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
int flags; /* REDIS_NODE_... */
uint64_t configEpoch; /* Last configEpoch observed for this node */
unsigned char slots[REDIS_CLUSTER_SLOTS/8]; /* slots handled by this node */
int numslots; /* Number of slots handled by this node */
int numslaves; /* Number of slave nodes, if this is a master */
struct clusterNode **slaves; /* pointers to slave nodes */
struct clusterNode *slaveof; /* pointer to the master node */
time_t ping_sent; /* Unix time we sent latest ping */
time_t pong_received; /* Unix time we received the pong */
time_t fail_time; /* Unix time when FAIL flag was set */
time_t voted_time; /* Last time we voted for a slave of this master */
char ip[REDIS_IP_STR_LEN]; /* Latest known IP address of this node */
int port; /* Latest known port of this node */
clusterLink *link; /* TCP/IP link with this node */
list *fail_reports; /* List of nodes signaling this as failing */
};
typedef struct clusterNode clusterNode;
typedef struct {
clusterNode *myself; /* This node */
uint64_t currentEpoch;
int state; /* REDIS_CLUSTER_OK, REDIS_CLUSTER_FAIL, ... */
int size; /* Num of master nodes with at least one slot */
dict *nodes; /* Hash table of name -> clusterNode structures */
clusterNode *migrating_slots_to[REDIS_CLUSTER_SLOTS];
clusterNode *importing_slots_from[REDIS_CLUSTER_SLOTS];
clusterNode *slots[REDIS_CLUSTER_SLOTS];
zskiplist *slots_to_keys;
/* The following fields are used to take the slave state on elections. */
mstime_t failover_auth_time;/* Time at which we'll try to get elected in ms*/
int failover_auth_count; /* Number of votes received so far. */
int failover_auth_sent; /* True if we already asked for votes. */
uint64_t failover_auth_epoch; /* Epoch of the current election. */
/* The followign fields are uesd by masters to take state on elections. */
uint64_t last_vote_epoch; /* Epoch of the last vote granted. */
int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */
long long stats_bus_messages_sent; /* Num of msg sent via cluster bus. */
long long stats_bus_messages_received; /* Num of msg received via cluster bus. */
} clusterState;
/* clusterState todo_before_sleep flags. */
#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0)
#define CLUSTER_TODO_UPDATE_STATE (1<<1)
#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
/* Redis cluster messages header */
/* Note that the PING, PONG and MEET messages are actually the same exact
* kind of packet. PONG is the reply to ping, in the exact format as a PING,
* while MEET is a special PING that forces the receiver to add the sender
* as a node (if it is not already in the list). */
#define CLUSTERMSG_TYPE_PING 0 /* Ping */
#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */
#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */
#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */
#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propagation */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6 /* Yes, you can failover. */
/* Initially we don't know our "name", but we'll find it once we connect
* to the first node, using the getsockname() function. Then we'll use this
* address for all the next messages. */
typedef struct {
char nodename[REDIS_CLUSTER_NAMELEN];
uint32_t ping_sent;
uint32_t pong_received;
char ip[16]; /* IP address last time it was seen */
uint16_t port; /* port last time it was seen */
uint16_t flags;
uint32_t notused; /* for 64 bit alignment */
} clusterMsgDataGossip;
typedef struct {
char nodename[REDIS_CLUSTER_NAMELEN];
} clusterMsgDataFail;
typedef struct {
uint32_t channel_len;
uint32_t message_len;
unsigned char bulk_data[8]; /* defined as 8 just for alignment concerns. */
} clusterMsgDataPublish;
union clusterMsgData {
/* PING, MEET and PONG */
struct {
/* Array of N clusterMsgDataGossip structures */
clusterMsgDataGossip gossip[1];
} ping;
/* FAIL */
struct {
clusterMsgDataFail about;
} fail;
/* PUBLISH */
struct {
clusterMsgDataPublish msg;
} publish;
};
typedef struct {
uint32_t totlen; /* Total length of this message */
uint16_t type; /* Message type */
uint16_t count; /* Only used for some kind of messages. */
uint64_t currentEpoch; /* The epoch accordingly to the sending node. */
uint64_t configEpoch; /* The config epoch if it's a master, or the last epoch
advertised by its master if it is a slave. */
char sender[REDIS_CLUSTER_NAMELEN]; /* Name of the sender node */
unsigned char myslots[REDIS_CLUSTER_SLOTS/8];
char slaveof[REDIS_CLUSTER_NAMELEN];
char notused1[32]; /* 32 bytes reserved for future usage. */
uint16_t port; /* Sender TCP base port */
uint16_t flags; /* Sender node flags */
unsigned char state; /* Cluster state from the POV of the sender */
unsigned char notused2[3]; /* Reserved for future use. For alignment. */
union clusterMsgData data;
} clusterMsg;
#define CLUSTERMSG_MIN_LEN (sizeof(clusterMsg)-sizeof(union clusterMsgData))
/*-----------------------------------------------------------------------------
* Global server state
*----------------------------------------------------------------------------*/
struct clusterState;
struct redisServer {
/* General */
char *configfile; /* Absolute config file path, or NULL */
@ -942,7 +770,7 @@ struct redisServer {
int cluster_enabled; /* Is cluster enabled? */
int cluster_node_timeout; /* Cluster node timeout. */
char *cluster_configfile; /* Cluster auto-generated config file name. */
clusterState *cluster; /* State of the cluster */
struct clusterState *cluster; /* State of the cluster */
/* Scripting */
lua_State *lua; /* The Lua interpreter. We use just one for all clients */
redisClient *lua_client; /* The "fake client" to query Redis from Lua */
@ -1380,10 +1208,7 @@ int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *num
void clusterInit(void);
unsigned short crc16(const char *buf, int len);
unsigned int keyHashSlot(char *key, int keylen);
clusterNode *createClusterNode(char *nodename, int flags);
int clusterAddNode(clusterNode *node);
void clusterCron(void);
clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
void clusterPropagatePublish(robj *channel, robj *message);
void migrateCloseTimedoutSockets(void);
void clusterBeforeSleep(void);