mirror of
https://github.com/fluencelabs/redis
synced 2025-03-16 15:40:49 +00:00
Lock nodes.conf to avoid multiple processes using the same file.
This was a common source of problems among users. The solution adopted is not bullet-proof as if the user deletes the nodes.conf file manually, and starts a new instance with the same nodes.conf file path, two instances will use the same file. However following this reasoning the user may drop a nuclear bomb into the datacenter as well.
This commit is contained in:
parent
897adc1c8c
commit
db06108bc1
@ -39,6 +39,7 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/file.h>
|
||||
|
||||
/* A global reference to myself is handy to make code more clear.
|
||||
* Myself always points to server.cluster->myself, that is, the clusterNode
|
||||
@ -90,13 +91,27 @@ uint64_t clusterGetMaxEpoch(void) {
|
||||
return max;
|
||||
}
|
||||
|
||||
/* Load the cluster config from 'filename'.
|
||||
*
|
||||
* If the file does not exist or is zero-length (this may happen because
|
||||
* when we lock the nodes.conf file, we create a zero-length one for the
|
||||
* sake of locking if it does not already exist), REDIS_ERR is returned.
|
||||
* If the configuration was loaded from the file, REDIS_OK is returned. */
|
||||
int clusterLoadConfig(char *filename) {
|
||||
FILE *fp = fopen(filename,"r");
|
||||
struct stat sb;
|
||||
char *line;
|
||||
int maxline, j;
|
||||
|
||||
if (fp == NULL) return REDIS_ERR;
|
||||
|
||||
/* Check if the file is zero-length: if so return REDIS_ERR to signal
|
||||
* we have to write the config. */
|
||||
if (fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) {
|
||||
fclose(fp);
|
||||
return REDIS_ERR;
|
||||
}
|
||||
|
||||
/* Parse the file. Note that single liens of the cluster config file can
|
||||
* be really long as they include all the hash slots of the node.
|
||||
* This means in the worst possible case, half of the Redis slots will be
|
||||
@ -323,6 +338,46 @@ void clusterSaveConfigOrDie(int do_fsync) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Lock the cluster config using flock(), and leaks the file descritor used to
|
||||
* acquire the lock so that the file will be locked forever.
|
||||
*
|
||||
* This works because we always update nodes.conf with a new version
|
||||
* in-place, reopening the file, and writing to it in place (later adjusting
|
||||
* the length with ftruncate()).
|
||||
*
|
||||
* On success REDIS_OK is returned, otherwise an error is logged and
|
||||
* the function returns REDIS_ERR to signal a lock was not acquired. */
|
||||
int clusterLockConfig(char *filename) {
|
||||
/* To lock it, we need to open the file in a way it is created if
|
||||
* it does not exist, otherwise there is a race condition with other
|
||||
* processes. */
|
||||
int fd = open(filename,O_WRONLY|O_CREAT,0644);
|
||||
if (fd == -1) {
|
||||
redisLog(REDIS_WARNING,
|
||||
"Can't open %s in order to acquire a lock: %s",
|
||||
filename, strerror(errno));
|
||||
return REDIS_ERR;
|
||||
}
|
||||
|
||||
if (flock(fd,LOCK_EX|LOCK_NB) == -1) {
|
||||
if (errno == EWOULDBLOCK) {
|
||||
redisLog(REDIS_WARNING,
|
||||
"Sorry, the cluster configuration file %s is already used "
|
||||
"by a different Redis Cluster node. Please make sure that "
|
||||
"different nodes use different cluster configuration "
|
||||
"files.", filename);
|
||||
} else {
|
||||
redisLog(REDIS_WARNING,
|
||||
"Impossible to lock %s: %s", filename, strerror(errno));
|
||||
}
|
||||
close(fd);
|
||||
return REDIS_ERR;
|
||||
}
|
||||
/* Lock acquired: leak the 'fd' by not closing it, so that we'll retain the
|
||||
* lock to the file as long as the process exists. */
|
||||
return REDIS_OK;
|
||||
}
|
||||
|
||||
void clusterInit(void) {
|
||||
int saveconf = 0;
|
||||
|
||||
@ -344,6 +399,13 @@ void clusterInit(void) {
|
||||
server.cluster->stats_bus_messages_received = 0;
|
||||
memset(server.cluster->slots,0, sizeof(server.cluster->slots));
|
||||
clusterCloseAllSlots();
|
||||
|
||||
/* Lock the cluster config file to make sure every node uses
|
||||
* its own nodes.conf. */
|
||||
if (clusterLockConfig(server.cluster_configfile) == REDIS_ERR)
|
||||
exit(1);
|
||||
|
||||
/* Load or create a new nodes configuration. */
|
||||
if (clusterLoadConfig(server.cluster_configfile) == REDIS_ERR) {
|
||||
/* No configuration found. We will just use the random name provided
|
||||
* by the createClusterNode() function. */
|
||||
|
Loading…
x
Reference in New Issue
Block a user