cluster.tcl: saner error handling.

Better handling of connection errors in order to update the table and
recovery, populate the startup nodes table after fetching the list of
nodes.

More work to do about it, it is still not as reliable as
redis-rb-cluster implementation which is the minimal reference
implementation for Redis Cluster clients.
This commit is contained in:
antirez 2014-05-14 00:15:52 +02:00
parent bae30479fb
commit cdf2271c5b

View File

@ -56,14 +56,21 @@ proc redis_cluster {nodes} {
proc ::redis_cluster::__method__refresh_nodes_map {id} { proc ::redis_cluster::__method__refresh_nodes_map {id} {
# Contact the first responding startup node. # Contact the first responding startup node.
set idx 0; # Index of the node that will respond. set idx 0; # Index of the node that will respond.
set errmsg {}
foreach start_node $::redis_cluster::startup_nodes($id) { foreach start_node $::redis_cluster::startup_nodes($id) {
lassign [split $start_node :] start_host start_port lassign [split $start_node :] start_host start_port
if {[catch { if {[catch {
set r {}
set r [redis $start_host $start_port] set r [redis $start_host $start_port]
set nodes_descr [$r cluster nodes] set nodes_descr [$r cluster nodes]
$r close $r close
}]} { puts $e
} e]} {
if {$r ne {}} {catch {$r close}}
incr idx incr idx
if {[string length $errmsg] < 200} {
append errmsg " $start_node: $e"
}
continue ; # Try next. continue ; # Try next.
} else { } else {
break; # Good node found. break; # Good node found.
@ -71,7 +78,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
} }
if {$idx == [llength $::redis_cluster::startup_nodes($id)]} { if {$idx == [llength $::redis_cluster::startup_nodes($id)]} {
error "No good startup node found." error "No good startup node found. $errmsg"
} }
# Put the node that responded as first in the list if it is not # Put the node that responded as first in the list if it is not
@ -113,6 +120,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
link $link \ link $link \
] ]
dict set nodes $addr $node dict set nodes $addr $node
lappend ::redis_cluster::startup_nodes($id) $addr
} }
set ::redis_cluster::nodes($id) $nodes set ::redis_cluster::nodes($id) $nodes
@ -127,6 +135,9 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
} }
} }
} }
# Only retain unique entries in the startup nodes list
set ::redis_cluster::startup_nodes($id) [lsort -unique $::redis_cluster::startup_nodes($id)]
} }
# Free a redis_cluster handle. # Free a redis_cluster handle.
@ -166,13 +177,16 @@ proc ::redis_cluster::__dispatch__ {id method args} {
} }
# Execute the command in the node we think is the slot owner. # Execute the command in the node we think is the slot owner.
set retry 10 set retry 100
while {[incr retry -1]} { while {[incr retry -1]} {
if {$retry < 5} {after 100} if {$retry < 5} {after 100}
set node [dict get $::redis_cluster::nodes($id) $node_addr] set node [dict get $::redis_cluster::nodes($id) $node_addr]
set link [dict get $node link] set link [dict get $node link]
if {[catch {$link $method {*}$args} e]} { if {[catch {$link $method {*}$args} e]} {
if {[string range $e 0 4] eq {MOVED}} { if {$link eq {} || \
[string range $e 0 4] eq {MOVED} || \
[string range $e 0 2] eq {I/O} \
} {
# MOVED redirection. # MOVED redirection.
::redis_cluster::__method__refresh_nodes_map $id ::redis_cluster::__method__refresh_nodes_map $id
set node_addr [dict get $::redis_cluster::slots($id) $slot] set node_addr [dict get $::redis_cluster::slots($id) $slot]