blob: ced3a9c581251907c40708801711ba7089607c1b [file] [log] [blame]
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// These tests are flaky when run under the go race detector due to some
// iterations taking long enough that the retransmit timer can kick in causing
// the congestion window measurements to fail due to extra packets etc.
//
// +build !race
package tcp_test
import (
"bytes"
"fmt"
"math"
"testing"
"time"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
"gvisor.dev/gvisor/pkg/test/testutil"
)
func TestFastRecovery(t *testing.T) {
maxPayload := 32
c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload))
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
const iterations = 3
data := make([]byte, 2*maxPayload*(tcp.InitialCwnd<<(iterations+1)))
for i := range data {
data[i] = byte(i)
}
// Write all the data in one shot. Packets will only be written at the
// MTU size though.
var r bytes.Reader
r.Reset(data)
if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %s", err)
}
// Do slow start for a few iterations.
expected := tcp.InitialCwnd
bytesRead := 0
for i := 0; i < iterations; i++ {
expected = tcp.InitialCwnd << uint(i)
if i > 0 {
// Acknowledge all the data received so far if not on
// first iteration.
c.SendAck(790, bytesRead)
}
// Read all packets expected on this iteration. Don't
// acknowledge any of them just yet, so that we can measure the
// congestion window.
for j := 0; j < expected; j++ {
c.ReceiveAndCheckPacket(data, bytesRead, maxPayload)
bytesRead += maxPayload
}
// Check we don't receive any more packets on this iteration.
// The timeout can't be too high or we'll trigger a timeout.
c.CheckNoPacketTimeout("More packets received than expected for this cwnd.", 50*time.Millisecond)
}
// Send 3 duplicate acks. This should force an immediate retransmit of
// the pending packet and put the sender into fast recovery.
rtxOffset := bytesRead - maxPayload*expected
for i := 0; i < 3; i++ {
c.SendAck(790, rtxOffset)
}
// Receive the retransmitted packet.
c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload)
// Wait before checking metrics.
metricPollFn := func() error {
if got, want := c.Stack().Stats().TCP.FastRetransmit.Value(), uint64(1); got != want {
return fmt.Errorf("got stats.TCP.FastRetransmit.Value = %d, want = %d", got, want)
}
if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(1); got != want {
return fmt.Errorf("got stats.TCP.Retransmit.Value = %d, want = %d", got, want)
}
if got, want := c.Stack().Stats().TCP.FastRecovery.Value(), uint64(1); got != want {
return fmt.Errorf("got stats.TCP.FastRecovery.Value = %d, want = %d", got, want)
}
return nil
}
if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
t.Error(err)
}
// Now send 7 mode duplicate acks. Each of these should cause a window
// inflation by 1 and cause the sender to send an extra packet.
for i := 0; i < 7; i++ {
c.SendAck(790, rtxOffset)
}
recover := bytesRead
// Ensure no new packets arrive.
c.CheckNoPacketTimeout("More packets received than expected during recovery after dupacks for this cwnd.",
50*time.Millisecond)
// Acknowledge half of the pending data.
rtxOffset = bytesRead - expected*maxPayload/2
c.SendAck(790, rtxOffset)
// Receive the retransmit due to partial ack.
c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload)
// Wait before checking metrics.
metricPollFn = func() error {
if got, want := c.Stack().Stats().TCP.FastRetransmit.Value(), uint64(2); got != want {
return fmt.Errorf("got stats.TCP.FastRetransmit.Value = %d, want = %d", got, want)
}
if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(2); got != want {
return fmt.Errorf("got stats.TCP.Retransmit.Value = %d, want = %d", got, want)
}
return nil
}
if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
t.Error(err)
}
// Receive the 10 extra packets that should have been released due to
// the congestion window inflation in recovery.
for i := 0; i < 10; i++ {
c.ReceiveAndCheckPacket(data, bytesRead, maxPayload)
bytesRead += maxPayload
}
// A partial ACK during recovery should reduce congestion window by the
// number acked. Since we had "expected" packets outstanding before sending
// partial ack and we acked expected/2 , the cwnd and outstanding should
// be expected/2 + 10 (7 dupAcks + 3 for the original 3 dupacks that triggered
// fast recovery). Which means the sender should not send any more packets
// till we ack this one.
c.CheckNoPacketTimeout("More packets received than expected during recovery after partial ack for this cwnd.",
50*time.Millisecond)
// Acknowledge all pending data to recover point.
c.SendAck(790, recover)
// At this point, the cwnd should reset to expected/2 and there are 10
// packets outstanding.
//
// NOTE: Technically netstack is incorrect in that we adjust the cwnd on
// the same segment that takes us out of recovery. But because of that
// the actual cwnd at exit of recovery will be expected/2 + 1 as we
// acked a cwnd worth of packets which will increase the cwnd further by
// 1 in congestion avoidance.
//
// Now in the first iteration since there are 10 packets outstanding.
// We would expect to get expected/2 +1 - 10 packets. But subsequent
// iterations will send us expected/2 + 1 + 1 (per iteration).
expected = expected/2 + 1 - 10
for i := 0; i < iterations; i++ {
// Read all packets expected on this iteration. Don't
// acknowledge any of them just yet, so that we can measure the
// congestion window.
for j := 0; j < expected; j++ {
c.ReceiveAndCheckPacket(data, bytesRead, maxPayload)
bytesRead += maxPayload
}
// Check we don't receive any more packets on this iteration.
// The timeout can't be too high or we'll trigger a timeout.
c.CheckNoPacketTimeout(fmt.Sprintf("More packets received(after deflation) than expected %d for this cwnd.", expected), 50*time.Millisecond)
// Acknowledge all the data received so far.
c.SendAck(790, bytesRead)
// In cogestion avoidance, the packets trains increase by 1 in
// each iteration.
if i == 0 {
// After the first iteration we expect to get the full
// congestion window worth of packets in every
// iteration.
expected += 10
}
expected++
}
}
func TestExponentialIncreaseDuringSlowStart(t *testing.T) {
maxPayload := 32
c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload))
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
const iterations = 3
data := make([]byte, maxPayload*(tcp.InitialCwnd<<(iterations+1)))
for i := range data {
data[i] = byte(i)
}
// Write all the data in one shot. Packets will only be written at the
// MTU size though.
var r bytes.Reader
r.Reset(data)
if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %s", err)
}
expected := tcp.InitialCwnd
bytesRead := 0
for i := 0; i < iterations; i++ {
// Read all packets expected on this iteration. Don't
// acknowledge any of them just yet, so that we can measure the
// congestion window.
for j := 0; j < expected; j++ {
c.ReceiveAndCheckPacket(data, bytesRead, maxPayload)
bytesRead += maxPayload
}
// Check we don't receive any more packets on this iteration.
// The timeout can't be too high or we'll trigger a timeout.
c.CheckNoPacketTimeout("More packets received than expected for this cwnd.", 50*time.Millisecond)
// Acknowledge all the data received so far.
c.SendAck(790, bytesRead)
// Double the number of expected packets for the next iteration.
expected *= 2
}
}
func TestCongestionAvoidance(t *testing.T) {
maxPayload := 32
c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload))
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
const iterations = 3
data := make([]byte, 2*maxPayload*(tcp.InitialCwnd<<(iterations+1)))
for i := range data {
data[i] = byte(i)
}
// Write all the data in one shot. Packets will only be written at the
// MTU size though.
var r bytes.Reader
r.Reset(data)
if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %s", err)
}
// Do slow start for a few iterations.
expected := tcp.InitialCwnd
bytesRead := 0
for i := 0; i < iterations; i++ {
expected = tcp.InitialCwnd << uint(i)
if i > 0 {
// Acknowledge all the data received so far if not on
// first iteration.
c.SendAck(790, bytesRead)
}
// Read all packets expected on this iteration. Don't
// acknowledge any of them just yet, so that we can measure the
// congestion window.
for j := 0; j < expected; j++ {
c.ReceiveAndCheckPacket(data, bytesRead, maxPayload)
bytesRead += maxPayload
}
// Check we don't receive any more packets on this iteration.
// The timeout can't be too high or we'll trigger a timeout.
c.CheckNoPacketTimeout("More packets received than expected for this cwnd (slow start phase).", 50*time.Millisecond)
}
// Don't acknowledge the first packet of the last packet train. Let's
// wait for them to time out, which will trigger a restart of slow
// start, and initialization of ssthresh to cwnd/2.
rtxOffset := bytesRead - maxPayload*expected
c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload)
// Acknowledge all the data received so far.
c.SendAck(790, bytesRead)
// This part is tricky: when the timeout happened, we had "expected"
// packets pending, cwnd reset to 1, and ssthresh set to expected/2.
// By acknowledging "expected" packets, the slow-start part will
// increase cwnd to expected/2 (which "consumes" expected/2-1 of the
// acknowledgements), then the congestion avoidance part will consume
// an extra expected/2 acks to take cwnd to expected/2 + 1. One ack
// remains in the "ack count" (which will cause cwnd to be incremented
// once it reaches cwnd acks).
//
// So we're straight into congestion avoidance with cwnd set to
// expected/2 + 1.
//
// Check that packets trains of cwnd packets are sent, and that cwnd is
// incremented by 1 after we acknowledge each packet.
expected = expected/2 + 1
for i := 0; i < iterations; i++ {
// Read all packets expected on this iteration. Don't
// acknowledge any of them just yet, so that we can measure the
// congestion window.
for j := 0; j < expected; j++ {
c.ReceiveAndCheckPacket(data, bytesRead, maxPayload)
bytesRead += maxPayload
}
// Check we don't receive any more packets on this iteration.
// The timeout can't be too high or we'll trigger a timeout.
c.CheckNoPacketTimeout("More packets received than expected for this cwnd (congestion avoidance phase).", 50*time.Millisecond)
// Acknowledge all the data received so far.
c.SendAck(790, bytesRead)
// In cogestion avoidance, the packets trains increase by 1 in
// each iteration.
expected++
}
}
// cubicCwnd returns an estimate of a cubic window given the
// originalCwnd, wMax, last congestion event time and sRTT.
func cubicCwnd(origCwnd int, wMax int, congEventTime time.Time, sRTT time.Duration) int {
cwnd := float64(origCwnd)
// We wait 50ms between each iteration so sRTT as computed by cubic
// should be close to 50ms.
elapsed := (time.Since(congEventTime) + sRTT).Seconds()
k := math.Cbrt(float64(wMax) * 0.3 / 0.7)
wtRTT := 0.4*math.Pow(elapsed-k, 3) + float64(wMax)
cwnd += (wtRTT - cwnd) / cwnd
return int(cwnd)
}
func TestCubicCongestionAvoidance(t *testing.T) {
maxPayload := 32
c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload))
defer c.Cleanup()
enableCUBIC(t, c)
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
const iterations = 3
data := make([]byte, 2*maxPayload*(tcp.InitialCwnd<<(iterations+1)))
for i := range data {
data[i] = byte(i)
}
// Write all the data in one shot. Packets will only be written at the
// MTU size though.
var r bytes.Reader
r.Reset(data)
if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %s", err)
}
// Do slow start for a few iterations.
expected := tcp.InitialCwnd
bytesRead := 0
for i := 0; i < iterations; i++ {
expected = tcp.InitialCwnd << uint(i)
if i > 0 {
// Acknowledge all the data received so far if not on
// first iteration.
c.SendAck(790, bytesRead)
}
// Read all packets expected on this iteration. Don't
// acknowledge any of them just yet, so that we can measure the
// congestion window.
for j := 0; j < expected; j++ {
c.ReceiveAndCheckPacket(data, bytesRead, maxPayload)
bytesRead += maxPayload
}
// Check we don't receive any more packets on this iteration.
// The timeout can't be too high or we'll trigger a timeout.
c.CheckNoPacketTimeout("More packets received than expected for this cwnd (during slow-start phase).", 50*time.Millisecond)
}
// Don't acknowledge the first packet of the last packet train. Let's
// wait for them to time out, which will trigger a restart of slow
// start, and initialization of ssthresh to cwnd * 0.7.
rtxOffset := bytesRead - maxPayload*expected
c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload)
// Acknowledge all pending data.
c.SendAck(790, bytesRead)
// Store away the time we sent the ACK and assuming a 200ms RTO
// we estimate that the sender will have an RTO 200ms from now
// and go back into slow start.
packetDropTime := time.Now().Add(200 * time.Millisecond)
// This part is tricky: when the timeout happened, we had "expected"
// packets pending, cwnd reset to 1, and ssthresh set to expected * 0.7.
// By acknowledging "expected" packets, the slow-start part will
// increase cwnd to expected/2 essentially putting the connection
// straight into congestion avoidance.
wMax := expected
// Lower expected as per cubic spec after a congestion event.
expected = int(float64(expected) * 0.7)
cwnd := expected
for i := 0; i < iterations; i++ {
// Cubic grows window independent of ACKs. Cubic Window growth
// is a function of time elapsed since last congestion event.
// As a result the congestion window does not grow
// deterministically in response to ACKs.
//
// We need to roughly estimate what the cwnd of the sender is
// based on when we sent the dupacks.
cwnd := cubicCwnd(cwnd, wMax, packetDropTime, 50*time.Millisecond)
packetsExpected := cwnd
for j := 0; j < packetsExpected; j++ {
c.ReceiveAndCheckPacket(data, bytesRead, maxPayload)
bytesRead += maxPayload
}
t.Logf("expected packets received, next trying to receive any extra packets that may come")
// If our estimate was correct there should be no more pending packets.
// We attempt to read a packet a few times with a short sleep in between
// to ensure that we don't see the sender send any unexpected packets.
unexpectedPackets := 0
for {
gotPacket := c.ReceiveNonBlockingAndCheckPacket(data, bytesRead, maxPayload)
if !gotPacket {
break
}
bytesRead += maxPayload
unexpectedPackets++
time.Sleep(1 * time.Millisecond)
}
if unexpectedPackets != 0 {
t.Fatalf("received %d unexpected packets for iteration %d", unexpectedPackets, i)
}
// Check we don't receive any more packets on this iteration.
// The timeout can't be too high or we'll trigger a timeout.
c.CheckNoPacketTimeout("More packets received than expected for this cwnd(congestion avoidance)", 5*time.Millisecond)
// Acknowledge all the data received so far.
c.SendAck(790, bytesRead)
}
}
func TestRetransmit(t *testing.T) {
maxPayload := 32
c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxPayload))
defer c.Cleanup()
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
const iterations = 3
data := make([]byte, maxPayload*(tcp.InitialCwnd<<(iterations+1)))
for i := range data {
data[i] = byte(i)
}
// Write all the data in two shots. Packets will only be written at the
// MTU size though.
var r bytes.Reader
r.Reset(data[:len(data)/2])
if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %s", err)
}
r.Reset(data[len(data)/2:])
if _, err := c.EP.Write(&r, tcpip.WriteOptions{}); err != nil {
t.Fatalf("Write failed: %s", err)
}
// Do slow start for a few iterations.
expected := tcp.InitialCwnd
bytesRead := 0
for i := 0; i < iterations; i++ {
expected = tcp.InitialCwnd << uint(i)
if i > 0 {
// Acknowledge all the data received so far if not on
// first iteration.
c.SendAck(790, bytesRead)
}
// Read all packets expected on this iteration. Don't
// acknowledge any of them just yet, so that we can measure the
// congestion window.
for j := 0; j < expected; j++ {
c.ReceiveAndCheckPacket(data, bytesRead, maxPayload)
bytesRead += maxPayload
}
// Check we don't receive any more packets on this iteration.
// The timeout can't be too high or we'll trigger a timeout.
c.CheckNoPacketTimeout("More packets received than expected for this cwnd.", 50*time.Millisecond)
}
// Wait for a timeout and retransmit.
rtxOffset := bytesRead - maxPayload*expected
c.ReceiveAndCheckPacket(data, rtxOffset, maxPayload)
metricPollFn := func() error {
if got, want := c.Stack().Stats().TCP.Timeouts.Value(), uint64(1); got != want {
return fmt.Errorf("got stats.TCP.Timeouts.Value = %d, want = %d", got, want)
}
if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(1); got != want {
return fmt.Errorf("got stats.TCP.Retransmits.Value = %d, want = %d", got, want)
}
if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.Timeouts.Value(), uint64(1); got != want {
return fmt.Errorf("got EP SendErrors.Timeouts.Value = %d, want = %d", got, want)
}
if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.Retransmits.Value(), uint64(1); got != want {
return fmt.Errorf("got EP stats SendErrors.Retransmits.Value = %d, want = %d", got, want)
}
if got, want := c.Stack().Stats().TCP.SlowStartRetransmits.Value(), uint64(1); got != want {
return fmt.Errorf("got stats.TCP.SlowStartRetransmits.Value = %d, want = %d", got, want)
}
return nil
}
// Poll when checking metrics.
if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
t.Error(err)
}
// Acknowledge half of the pending data.
rtxOffset = bytesRead - expected*maxPayload/2
c.SendAck(790, rtxOffset)
// Receive the remaining data, making sure that acknowledged data is not
// retransmitted.
for offset := rtxOffset; offset < len(data); offset += maxPayload {
c.ReceiveAndCheckPacket(data, offset, maxPayload)
c.SendAck(790, offset+maxPayload)
}
c.CheckNoPacketTimeout("More packets received than expected for this cwnd.", 50*time.Millisecond)
}