From cdf2271c5b8c8574f8441cd1214ed28d9cb583db Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 14 May 2014 00:15:52 +0200 Subject: [PATCH] cluster.tcl: saner error handling. Better handling of connection errors in order to update the table and recovery, populate the startup nodes table after fetching the list of nodes. More work to do about it, it is still not as reliable as redis-rb-cluster implementation which is the minimal reference implementation for Redis Cluster clients. --- tests/support/cluster.tcl | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl index 90e8352d..79d9244d 100644 --- a/tests/support/cluster.tcl +++ b/tests/support/cluster.tcl @@ -56,14 +56,21 @@ proc redis_cluster {nodes} { proc ::redis_cluster::__method__refresh_nodes_map {id} { # Contact the first responding startup node. set idx 0; # Index of the node that will respond. + set errmsg {} foreach start_node $::redis_cluster::startup_nodes($id) { lassign [split $start_node :] start_host start_port if {[catch { + set r {} set r [redis $start_host $start_port] set nodes_descr [$r cluster nodes] $r close - }]} { + puts $e + } e]} { + if {$r ne {}} {catch {$r close}} incr idx + if {[string length $errmsg] < 200} { + append errmsg " $start_node: $e" + } continue ; # Try next. } else { break; # Good node found. @@ -71,7 +78,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} { } if {$idx == [llength $::redis_cluster::startup_nodes($id)]} { - error "No good startup node found." + error "No good startup node found. $errmsg" } # Put the node that responded as first in the list if it is not @@ -113,6 +120,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} { link $link \ ] dict set nodes $addr $node + lappend ::redis_cluster::startup_nodes($id) $addr } set ::redis_cluster::nodes($id) $nodes @@ -127,6 +135,9 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} { } } } + + # Only retain unique entries in the startup nodes list + set ::redis_cluster::startup_nodes($id) [lsort -unique $::redis_cluster::startup_nodes($id)] } # Free a redis_cluster handle. @@ -166,13 +177,16 @@ proc ::redis_cluster::__dispatch__ {id method args} { } # Execute the command in the node we think is the slot owner. - set retry 10 + set retry 100 while {[incr retry -1]} { if {$retry < 5} {after 100} set node [dict get $::redis_cluster::nodes($id) $node_addr] set link [dict get $node link] if {[catch {$link $method {*}$args} e]} { - if {[string range $e 0 4] eq {MOVED}} { + if {$link eq {} || \ + [string range $e 0 4] eq {MOVED} || \ + [string range $e 0 2] eq {I/O} \ + } { # MOVED redirection. ::redis_cluster::__method__refresh_nodes_map $id set node_addr [dict get $::redis_cluster::slots($id) $slot]