grpclb: consider IDLE SubConns as connecting (#4031)
Otherwise, when the first response is received from the grpclb server, the
parent ClientConn enters TransientFailure, and the first several
non-wait-for-ready RPCs will fail.
diff --git a/balancer/grpclb/grpclb.go b/balancer/grpclb/grpclb.go
index a7424cf..a43d896 100644
--- a/balancer/grpclb/grpclb.go
+++ b/balancer/grpclb/grpclb.go
@@ -288,7 +288,11 @@
//
// The aggregated state is:
// - If at least one SubConn in Ready, the aggregated state is Ready;
-// - Else if at least one SubConn in Connecting, the aggregated state is Connecting;
+// - Else if at least one SubConn in Connecting or IDLE, the aggregated state is Connecting;
+// - It's OK to consider IDLE as Connecting. SubConns never stay in IDLE,
+// they start to connect immediately. But there's a race between the overall
+// state is reported, and when the new SubConn state arrives. And SubConns
+// never go back to IDLE.
// - Else the aggregated state is TransientFailure.
func (lb *lbBalancer) aggregateSubConnStates() connectivity.State {
var numConnecting uint64
@@ -298,7 +302,7 @@
switch state {
case connectivity.Ready:
return connectivity.Ready
- case connectivity.Connecting:
+ case connectivity.Connecting, connectivity.Idle:
numConnecting++
}
}
diff --git a/balancer/grpclb/grpclb_test.go b/balancer/grpclb/grpclb_test.go
index dcc5235..dc94ca8 100644
--- a/balancer/grpclb/grpclb_test.go
+++ b/balancer/grpclb/grpclb_test.go
@@ -452,7 +452,7 @@
ctx, cancel = context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
- if _, err := testC.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
+ if _, err := testC.EmptyCall(ctx, &testpb.Empty{}); err != nil {
t.Fatalf("%v.EmptyCall(_, _) = _, %v, want _, <nil>", testC, err)
}
}