| // Copyright 2018 The gVisor Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // These tests are flaky when run under the go race detector due to some |
| // iterations taking long enough that the retransmit timer can kick in causing |
| // the congestion window measurements to fail due to extra packets etc. |
| // |
| // +build !race |
| |
| package tcp_test |
| |
| import ( |
| "bytes" |
| "fmt" |
| "math" |
| "testing" |
| "time" |
| |
| "gvisor.dev/gvisor/pkg/tcpip" |
| "gvisor.dev/gvisor/pkg/tcpip/header" |
| "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" |
| "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" |
| "gvisor.dev/gvisor/pkg/test/testutil" |
| ) |
| |
| func TestFastRecovery(t *testing.T) { |
| maxPayload := 32 |
| c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload)) |
| defer c.Cleanup() |
| |
| c.CreateConnected(789, 30000, -1 /* epRcvBuf */) |
| |
| const iterations = 3 |
| data := make([]byte, 2*maxPayload*(tcp.InitialCwnd<<(iterations+1))) |
| for i := range data { |
| data[i] = byte(i) |
| } |
| |
| // Write all the data in one shot. Packets will only be written at the |
| // MTU size though. |
| var r bytes.Reader |
| r.Reset(data) |
| if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil { |
| t.Fatalf("Write failed: %s", err) |
| } |
| |
| // Do slow start for a few iterations. |
| expected := tcp.InitialCwnd |
| bytesRead := 0 |
| for i := 0; i < iterations; i++ { |
| expected = tcp.InitialCwnd << uint(i) |
| if i > 0 { |
| // Acknowledge all the data received so far if not on |
| // first iteration. |
| c.SendAck(790, bytesRead) |
| } |
| |
| // Read all packets expected on this iteration. Don't |
| // acknowledge any of them just yet, so that we can measure the |
| // congestion window. |
| for j := 0; j < expected; j++ { |
| c.ReceiveAndCheckPacket(data, bytesRead, maxPayload) |
| bytesRead += maxPayload |
| } |
| |
| // Check we don't receive any more packets on this iteration. |
| // The timeout can't be too high or we'll trigger a timeout. |
| c.CheckNoPacketTimeout("More packets received than expected for this cwnd.", 50*time.Millisecond) |
| } |
| |
| // Send 3 duplicate acks. This should force an immediate retransmit of |
| // the pending packet and put the sender into fast recovery. |
| rtxOffset := bytesRead - maxPayload*expected |
| for i := 0; i < 3; i++ { |
| c.SendAck(790, rtxOffset) |
| } |
| |
| // Receive the retransmitted packet. |
| c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload) |
| |
| // Wait before checking metrics. |
| metricPollFn := func() error { |
| if got, want := c.Stack().Stats().TCP.FastRetransmit.Value(), uint64(1); got != want { |
| return fmt.Errorf("got stats.TCP.FastRetransmit.Value = %d, want = %d", got, want) |
| } |
| if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(1); got != want { |
| return fmt.Errorf("got stats.TCP.Retransmit.Value = %d, want = %d", got, want) |
| } |
| |
| if got, want := c.Stack().Stats().TCP.FastRecovery.Value(), uint64(1); got != want { |
| return fmt.Errorf("got stats.TCP.FastRecovery.Value = %d, want = %d", got, want) |
| } |
| return nil |
| } |
| |
| if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil { |
| t.Error(err) |
| } |
| |
| // Now send 7 mode duplicate acks. Each of these should cause a window |
| // inflation by 1 and cause the sender to send an extra packet. |
| for i := 0; i < 7; i++ { |
| c.SendAck(790, rtxOffset) |
| } |
| |
| recover := bytesRead |
| |
| // Ensure no new packets arrive. |
| c.CheckNoPacketTimeout("More packets received than expected during recovery after dupacks for this cwnd.", |
| 50*time.Millisecond) |
| |
| // Acknowledge half of the pending data. |
| rtxOffset = bytesRead - expected*maxPayload/2 |
| c.SendAck(790, rtxOffset) |
| |
| // Receive the retransmit due to partial ack. |
| c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload) |
| |
| // Wait before checking metrics. |
| metricPollFn = func() error { |
| if got, want := c.Stack().Stats().TCP.FastRetransmit.Value(), uint64(2); got != want { |
| return fmt.Errorf("got stats.TCP.FastRetransmit.Value = %d, want = %d", got, want) |
| } |
| if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(2); got != want { |
| return fmt.Errorf("got stats.TCP.Retransmit.Value = %d, want = %d", got, want) |
| } |
| return nil |
| } |
| if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil { |
| t.Error(err) |
| } |
| |
| // Receive the 10 extra packets that should have been released due to |
| // the congestion window inflation in recovery. |
| for i := 0; i < 10; i++ { |
| c.ReceiveAndCheckPacket(data, bytesRead, maxPayload) |
| bytesRead += maxPayload |
| } |
| |
| // A partial ACK during recovery should reduce congestion window by the |
| // number acked. Since we had "expected" packets outstanding before sending |
| // partial ack and we acked expected/2 , the cwnd and outstanding should |
| // be expected/2 + 10 (7 dupAcks + 3 for the original 3 dupacks that triggered |
| // fast recovery). Which means the sender should not send any more packets |
| // till we ack this one. |
| c.CheckNoPacketTimeout("More packets received than expected during recovery after partial ack for this cwnd.", |
| 50*time.Millisecond) |
| |
| // Acknowledge all pending data to recover point. |
| c.SendAck(790, recover) |
| |
| // At this point, the cwnd should reset to expected/2 and there are 10 |
| // packets outstanding. |
| // |
| // NOTE: Technically netstack is incorrect in that we adjust the cwnd on |
| // the same segment that takes us out of recovery. But because of that |
| // the actual cwnd at exit of recovery will be expected/2 + 1 as we |
| // acked a cwnd worth of packets which will increase the cwnd further by |
| // 1 in congestion avoidance. |
| // |
| // Now in the first iteration since there are 10 packets outstanding. |
| // We would expect to get expected/2 +1 - 10 packets. But subsequent |
| // iterations will send us expected/2 + 1 + 1 (per iteration). |
| expected = expected/2 + 1 - 10 |
| for i := 0; i < iterations; i++ { |
| // Read all packets expected on this iteration. Don't |
| // acknowledge any of them just yet, so that we can measure the |
| // congestion window. |
| for j := 0; j < expected; j++ { |
| c.ReceiveAndCheckPacket(data, bytesRead, maxPayload) |
| bytesRead += maxPayload |
| } |
| |
| // Check we don't receive any more packets on this iteration. |
| // The timeout can't be too high or we'll trigger a timeout. |
| c.CheckNoPacketTimeout(fmt.Sprintf("More packets received(after deflation) than expected %d for this cwnd.", expected), 50*time.Millisecond) |
| |
| // Acknowledge all the data received so far. |
| c.SendAck(790, bytesRead) |
| |
| // In cogestion avoidance, the packets trains increase by 1 in |
| // each iteration. |
| if i == 0 { |
| // After the first iteration we expect to get the full |
| // congestion window worth of packets in every |
| // iteration. |
| expected += 10 |
| } |
| expected++ |
| } |
| } |
| |
| func TestExponentialIncreaseDuringSlowStart(t *testing.T) { |
| maxPayload := 32 |
| c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload)) |
| defer c.Cleanup() |
| |
| c.CreateConnected(789, 30000, -1 /* epRcvBuf */) |
| |
| const iterations = 3 |
| data := make([]byte, maxPayload*(tcp.InitialCwnd<<(iterations+1))) |
| for i := range data { |
| data[i] = byte(i) |
| } |
| |
| // Write all the data in one shot. Packets will only be written at the |
| // MTU size though. |
| var r bytes.Reader |
| r.Reset(data) |
| if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil { |
| t.Fatalf("Write failed: %s", err) |
| } |
| |
| expected := tcp.InitialCwnd |
| bytesRead := 0 |
| for i := 0; i < iterations; i++ { |
| // Read all packets expected on this iteration. Don't |
| // acknowledge any of them just yet, so that we can measure the |
| // congestion window. |
| for j := 0; j < expected; j++ { |
| c.ReceiveAndCheckPacket(data, bytesRead, maxPayload) |
| bytesRead += maxPayload |
| } |
| |
| // Check we don't receive any more packets on this iteration. |
| // The timeout can't be too high or we'll trigger a timeout. |
| c.CheckNoPacketTimeout("More packets received than expected for this cwnd.", 50*time.Millisecond) |
| |
| // Acknowledge all the data received so far. |
| c.SendAck(790, bytesRead) |
| |
| // Double the number of expected packets for the next iteration. |
| expected *= 2 |
| } |
| } |
| |
| func TestCongestionAvoidance(t *testing.T) { |
| maxPayload := 32 |
| c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload)) |
| defer c.Cleanup() |
| |
| c.CreateConnected(789, 30000, -1 /* epRcvBuf */) |
| |
| const iterations = 3 |
| data := make([]byte, 2*maxPayload*(tcp.InitialCwnd<<(iterations+1))) |
| for i := range data { |
| data[i] = byte(i) |
| } |
| |
| // Write all the data in one shot. Packets will only be written at the |
| // MTU size though. |
| var r bytes.Reader |
| r.Reset(data) |
| if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil { |
| t.Fatalf("Write failed: %s", err) |
| } |
| |
| // Do slow start for a few iterations. |
| expected := tcp.InitialCwnd |
| bytesRead := 0 |
| for i := 0; i < iterations; i++ { |
| expected = tcp.InitialCwnd << uint(i) |
| if i > 0 { |
| // Acknowledge all the data received so far if not on |
| // first iteration. |
| c.SendAck(790, bytesRead) |
| } |
| |
| // Read all packets expected on this iteration. Don't |
| // acknowledge any of them just yet, so that we can measure the |
| // congestion window. |
| for j := 0; j < expected; j++ { |
| c.ReceiveAndCheckPacket(data, bytesRead, maxPayload) |
| bytesRead += maxPayload |
| } |
| |
| // Check we don't receive any more packets on this iteration. |
| // The timeout can't be too high or we'll trigger a timeout. |
| c.CheckNoPacketTimeout("More packets received than expected for this cwnd (slow start phase).", 50*time.Millisecond) |
| } |
| |
| // Don't acknowledge the first packet of the last packet train. Let's |
| // wait for them to time out, which will trigger a restart of slow |
| // start, and initialization of ssthresh to cwnd/2. |
| rtxOffset := bytesRead - maxPayload*expected |
| c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload) |
| |
| // Acknowledge all the data received so far. |
| c.SendAck(790, bytesRead) |
| |
| // This part is tricky: when the timeout happened, we had "expected" |
| // packets pending, cwnd reset to 1, and ssthresh set to expected/2. |
| // By acknowledging "expected" packets, the slow-start part will |
| // increase cwnd to expected/2 (which "consumes" expected/2-1 of the |
| // acknowledgements), then the congestion avoidance part will consume |
| // an extra expected/2 acks to take cwnd to expected/2 + 1. One ack |
| // remains in the "ack count" (which will cause cwnd to be incremented |
| // once it reaches cwnd acks). |
| // |
| // So we're straight into congestion avoidance with cwnd set to |
| // expected/2 + 1. |
| // |
| // Check that packets trains of cwnd packets are sent, and that cwnd is |
| // incremented by 1 after we acknowledge each packet. |
| expected = expected/2 + 1 |
| for i := 0; i < iterations; i++ { |
| // Read all packets expected on this iteration. Don't |
| // acknowledge any of them just yet, so that we can measure the |
| // congestion window. |
| for j := 0; j < expected; j++ { |
| c.ReceiveAndCheckPacket(data, bytesRead, maxPayload) |
| bytesRead += maxPayload |
| } |
| |
| // Check we don't receive any more packets on this iteration. |
| // The timeout can't be too high or we'll trigger a timeout. |
| c.CheckNoPacketTimeout("More packets received than expected for this cwnd (congestion avoidance phase).", 50*time.Millisecond) |
| |
| // Acknowledge all the data received so far. |
| c.SendAck(790, bytesRead) |
| |
| // In cogestion avoidance, the packets trains increase by 1 in |
| // each iteration. |
| expected++ |
| } |
| } |
| |
| // cubicCwnd returns an estimate of a cubic window given the |
| // originalCwnd, wMax, last congestion event time and sRTT. |
| func cubicCwnd(origCwnd int, wMax int, congEventTime time.Time, sRTT time.Duration) int { |
| cwnd := float64(origCwnd) |
| // We wait 50ms between each iteration so sRTT as computed by cubic |
| // should be close to 50ms. |
| elapsed := (time.Since(congEventTime) + sRTT).Seconds() |
| k := math.Cbrt(float64(wMax) * 0.3 / 0.7) |
| wtRTT := 0.4*math.Pow(elapsed-k, 3) + float64(wMax) |
| cwnd += (wtRTT - cwnd) / cwnd |
| return int(cwnd) |
| } |
| |
| func TestCubicCongestionAvoidance(t *testing.T) { |
| maxPayload := 32 |
| c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload)) |
| defer c.Cleanup() |
| |
| enableCUBIC(t, c) |
| |
| c.CreateConnected(789, 30000, -1 /* epRcvBuf */) |
| |
| const iterations = 3 |
| data := make([]byte, 2*maxPayload*(tcp.InitialCwnd<<(iterations+1))) |
| for i := range data { |
| data[i] = byte(i) |
| } |
| |
| // Write all the data in one shot. Packets will only be written at the |
| // MTU size though. |
| var r bytes.Reader |
| r.Reset(data) |
| if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil { |
| t.Fatalf("Write failed: %s", err) |
| } |
| |
| // Do slow start for a few iterations. |
| expected := tcp.InitialCwnd |
| bytesRead := 0 |
| for i := 0; i < iterations; i++ { |
| expected = tcp.InitialCwnd << uint(i) |
| if i > 0 { |
| // Acknowledge all the data received so far if not on |
| // first iteration. |
| c.SendAck(790, bytesRead) |
| } |
| |
| // Read all packets expected on this iteration. Don't |
| // acknowledge any of them just yet, so that we can measure the |
| // congestion window. |
| for j := 0; j < expected; j++ { |
| c.ReceiveAndCheckPacket(data, bytesRead, maxPayload) |
| bytesRead += maxPayload |
| } |
| |
| // Check we don't receive any more packets on this iteration. |
| // The timeout can't be too high or we'll trigger a timeout. |
| c.CheckNoPacketTimeout("More packets received than expected for this cwnd (during slow-start phase).", 50*time.Millisecond) |
| } |
| |
| // Don't acknowledge the first packet of the last packet train. Let's |
| // wait for them to time out, which will trigger a restart of slow |
| // start, and initialization of ssthresh to cwnd * 0.7. |
| rtxOffset := bytesRead - maxPayload*expected |
| c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload) |
| |
| // Acknowledge all pending data. |
| c.SendAck(790, bytesRead) |
| |
| // Store away the time we sent the ACK and assuming a 200ms RTO |
| // we estimate that the sender will have an RTO 200ms from now |
| // and go back into slow start. |
| packetDropTime := time.Now().Add(200 * time.Millisecond) |
| |
| // This part is tricky: when the timeout happened, we had "expected" |
| // packets pending, cwnd reset to 1, and ssthresh set to expected * 0.7. |
| // By acknowledging "expected" packets, the slow-start part will |
| // increase cwnd to expected/2 essentially putting the connection |
| // straight into congestion avoidance. |
| wMax := expected |
| // Lower expected as per cubic spec after a congestion event. |
| expected = int(float64(expected) * 0.7) |
| cwnd := expected |
| for i := 0; i < iterations; i++ { |
| // Cubic grows window independent of ACKs. Cubic Window growth |
| // is a function of time elapsed since last congestion event. |
| // As a result the congestion window does not grow |
| // deterministically in response to ACKs. |
| // |
| // We need to roughly estimate what the cwnd of the sender is |
| // based on when we sent the dupacks. |
| cwnd := cubicCwnd(cwnd, wMax, packetDropTime, 50*time.Millisecond) |
| |
| packetsExpected := cwnd |
| for j := 0; j < packetsExpected; j++ { |
| c.ReceiveAndCheckPacket(data, bytesRead, maxPayload) |
| bytesRead += maxPayload |
| } |
| t.Logf("expected packets received, next trying to receive any extra packets that may come") |
| |
| // If our estimate was correct there should be no more pending packets. |
| // We attempt to read a packet a few times with a short sleep in between |
| // to ensure that we don't see the sender send any unexpected packets. |
| unexpectedPackets := 0 |
| for { |
| gotPacket := c.ReceiveNonBlockingAndCheckPacket(data, bytesRead, maxPayload) |
| if !gotPacket { |
| break |
| } |
| bytesRead += maxPayload |
| unexpectedPackets++ |
| time.Sleep(1 * time.Millisecond) |
| } |
| if unexpectedPackets != 0 { |
| t.Fatalf("received %d unexpected packets for iteration %d", unexpectedPackets, i) |
| } |
| // Check we don't receive any more packets on this iteration. |
| // The timeout can't be too high or we'll trigger a timeout. |
| c.CheckNoPacketTimeout("More packets received than expected for this cwnd(congestion avoidance)", 5*time.Millisecond) |
| |
| // Acknowledge all the data received so far. |
| c.SendAck(790, bytesRead) |
| } |
| } |
| |
| func TestRetransmit(t *testing.T) { |
| maxPayload := 32 |
| c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload)) |
| defer c.Cleanup() |
| |
| c.CreateConnected(789, 30000, -1 /* epRcvBuf */) |
| |
| const iterations = 3 |
| data := make([]byte, maxPayload*(tcp.InitialCwnd<<(iterations+1))) |
| for i := range data { |
| data[i] = byte(i) |
| } |
| |
| // Write all the data in two shots. Packets will only be written at the |
| // MTU size though. |
| var r bytes.Reader |
| r.Reset(data[:len(data)/2]) |
| if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil { |
| t.Fatalf("Write failed: %s", err) |
| } |
| r.Reset(data[len(data)/2:]) |
| if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil { |
| t.Fatalf("Write failed: %s", err) |
| } |
| |
| // Do slow start for a few iterations. |
| expected := tcp.InitialCwnd |
| bytesRead := 0 |
| for i := 0; i < iterations; i++ { |
| expected = tcp.InitialCwnd << uint(i) |
| if i > 0 { |
| // Acknowledge all the data received so far if not on |
| // first iteration. |
| c.SendAck(790, bytesRead) |
| } |
| |
| // Read all packets expected on this iteration. Don't |
| // acknowledge any of them just yet, so that we can measure the |
| // congestion window. |
| for j := 0; j < expected; j++ { |
| c.ReceiveAndCheckPacket(data, bytesRead, maxPayload) |
| bytesRead += maxPayload |
| } |
| |
| // Check we don't receive any more packets on this iteration. |
| // The timeout can't be too high or we'll trigger a timeout. |
| c.CheckNoPacketTimeout("More packets received than expected for this cwnd.", 50*time.Millisecond) |
| } |
| |
| // Wait for a timeout and retransmit. |
| rtxOffset := bytesRead - maxPayload*expected |
| c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload) |
| |
| metricPollFn := func() error { |
| if got, want := c.Stack().Stats().TCP.Timeouts.Value(), uint64(1); got != want { |
| return fmt.Errorf("got stats.TCP.Timeouts.Value = %d, want = %d", got, want) |
| } |
| |
| if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(1); got != want { |
| return fmt.Errorf("got stats.TCP.Retransmits.Value = %d, want = %d", got, want) |
| } |
| |
| if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.Timeouts.Value(), uint64(1); got != want { |
| return fmt.Errorf("got EP SendErrors.Timeouts.Value = %d, want = %d", got, want) |
| } |
| |
| if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.Retransmits.Value(), uint64(1); got != want { |
| return fmt.Errorf("got EP stats SendErrors.Retransmits.Value = %d, want = %d", got, want) |
| } |
| |
| if got, want := c.Stack().Stats().TCP.SlowStartRetransmits.Value(), uint64(1); got != want { |
| return fmt.Errorf("got stats.TCP.SlowStartRetransmits.Value = %d, want = %d", got, want) |
| } |
| |
| return nil |
| } |
| |
| // Poll when checking metrics. |
| if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil { |
| t.Error(err) |
| } |
| |
| // Acknowledge half of the pending data. |
| rtxOffset = bytesRead - expected*maxPayload/2 |
| c.SendAck(790, rtxOffset) |
| |
| // Receive the remaining data, making sure that acknowledged data is not |
| // retransmitted. |
| for offset := rtxOffset; offset < len(data); offset += maxPayload { |
| c.ReceiveAndCheckPacket(data, offset, maxPayload) |
| c.SendAck(790, offset+maxPayload) |
| } |
| |
| c.CheckNoPacketTimeout("More packets received than expected for this cwnd.", 50*time.Millisecond) |
| } |