tcpip/transport/tcp/endpoint.go - third_party/netstack - Git at Google

 // Copyright 2018 The gVisor Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 package tcp

 import (
 	"encoding/binary"
 	"fmt"
 	"math"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"time"

 	"github.com/google/netstack/rand"
 	"github.com/google/netstack/sleep"
 	"github.com/google/netstack/tcpip"
 	"github.com/google/netstack/tcpip/buffer"
 	"github.com/google/netstack/tcpip/hash/jenkins"
 	"github.com/google/netstack/tcpip/header"
 	"github.com/google/netstack/tcpip/iptables"
 	"github.com/google/netstack/tcpip/seqnum"
 	"github.com/google/netstack/tcpip/stack"
 	"github.com/google/netstack/tmutex"
 	"github.com/google/netstack/waiter"
 )

 // EndpointState represents the state of a TCP endpoint.
 type EndpointState uint32

 // Endpoint states. Note that are represented in a netstack-specific manner and
 // may not be meaningful externally. Specifically, they need to be translated to
 // Linux's representation for these states if presented to userspace.
 const (
 	// Endpoint states internal to netstack. These map to the TCP state CLOSED.
 	StateInitial EndpointState = iota
 	StateBound
 	StateConnecting // Connect() called, but the initial SYN hasn't been sent.
 	StateError

 	// TCP protocol states.
 	StateEstablished
 	StateSynSent
 	StateSynRecv
 	StateFinWait1
 	StateFinWait2
 	StateTimeWait
 	StateClose
 	StateCloseWait
 	StateLastAck
 	StateListen
 	StateClosing
 )

 // connected is the set of states where an endpoint is connected to a peer.
 func (s EndpointState) connected() bool {
 	switch s {
 	case StateEstablished, StateFinWait1, StateFinWait2, StateTimeWait, StateCloseWait, StateLastAck, StateClosing:
 		return true
 	default:
 		return false
 	}
 }

 // String implements fmt.Stringer.String.
 func (s EndpointState) String() string {
 	switch s {
 	case StateInitial:
 		return "INITIAL"
 	case StateBound:
 		return "BOUND"
 	case StateConnecting:
 		return "CONNECTING"
 	case StateError:
 		return "ERROR"
 	case StateEstablished:
 		return "ESTABLISHED"
 	case StateSynSent:
 		return "SYN-SENT"
 	case StateSynRecv:
 		return "SYN-RCVD"
 	case StateFinWait1:
 		return "FIN-WAIT1"
 	case StateFinWait2:
 		return "FIN-WAIT2"
 	case StateTimeWait:
 		return "TIME-WAIT"
 	case StateClose:
 		return "CLOSED"
 	case StateCloseWait:
 		return "CLOSE-WAIT"
 	case StateLastAck:
 		return "LAST-ACK"
 	case StateListen:
 		return "LISTEN"
 	case StateClosing:
 		return "CLOSING"
 	default:
 		panic("unreachable")
 	}
 }

 // Reasons for notifying the protocol goroutine.
 const (
 	notifyNonZeroReceiveWindow = 1 << iota
 	notifyReceiveWindowChanged
 	notifyClose
 	notifyMTUChanged
 	notifyDrain
 	notifyReset
 	notifyKeepaliveChanged
 	notifyMSSChanged
 )

 // SACKInfo holds TCP SACK related information for a given endpoint.
 //
 // +stateify savable
 type SACKInfo struct {
 	// Blocks is the maximum number of SACK blocks we track
 	// per endpoint.
 	Blocks [MaxSACKBlocks]header.SACKBlock

 	// NumBlocks is the number of valid SACK blocks stored in the
 	// blocks array above.
 	NumBlocks int
 }

 // rcvBufAutoTuneParams are used to hold state variables to compute
 // the auto tuned recv buffer size.
 //
 // +stateify savable
 type rcvBufAutoTuneParams struct {
 	// measureTime is the time at which the current measurement
 	// was started.
 	measureTime time.Time

 	// copied is the number of bytes copied out of the receive
 	// buffers since this measure began.
 	copied int

 	// prevCopied is the number of bytes copied out of the receive
 	// buffers in the previous RTT period.
 	prevCopied int

 	// rtt is the non-smoothed minimum RTT as measured by observing the time
 	// between when a byte is first acknowledged and the receipt of data
 	// that is at least one window beyond the sequence number that was
 	// acknowledged.
 	rtt time.Duration

 	// rttMeasureSeqNumber is the highest acceptable sequence number at the
 	// time this RTT measurement period began.
 	rttMeasureSeqNumber seqnum.Value

 	// rttMeasureTime is the absolute time at which the current rtt
 	// measurement period began.
 	rttMeasureTime time.Time

 	// disabled is true if an explicit receive buffer is set for the
 	// endpoint.
 	disabled bool
 }

 // ReceiveErrors collect segment receive errors within transport layer.
 type ReceiveErrors struct {
 	tcpip.ReceiveErrors

 	// SegmentQueueDropped is the number of segments dropped due to
 	// a full segment queue.
 	SegmentQueueDropped tcpip.StatCounter

 	// ChecksumErrors is the number of segments dropped due to bad checksums.
 	ChecksumErrors tcpip.StatCounter

 	// ListenOverflowSynDrop is the number of times the listen queue overflowed
 	// and a SYN was dropped.
 	ListenOverflowSynDrop tcpip.StatCounter

 	// ListenOverflowAckDrop is the number of times the final ACK
 	// in the handshake was dropped due to overflow.
 	ListenOverflowAckDrop tcpip.StatCounter

 	// ZeroRcvWindowState is the number of times we advertised
 	// a zero receive window when rcvList is full.
 	ZeroRcvWindowState tcpip.StatCounter
 }

 // SendErrors collect segment send errors within the transport layer.
 type SendErrors struct {
 	tcpip.SendErrors

 	// SegmentSendToNetworkFailed is the number of TCP segments failed to be sent
 	// to the network endpoint.
 	SegmentSendToNetworkFailed tcpip.StatCounter

 	// SynSendToNetworkFailed is the number of TCP SYNs failed to be sent
 	// to the network endpoint.
 	SynSendToNetworkFailed tcpip.StatCounter

 	// Retransmits is the number of TCP segments retransmitted.
 	Retransmits tcpip.StatCounter

 	// FastRetransmit is the number of segments retransmitted in fast
 	// recovery.
 	FastRetransmit tcpip.StatCounter

 	// Timeouts is the number of times the RTO expired.
 	Timeouts tcpip.StatCounter
 }

 // Stats holds statistics about the endpoint.
 type Stats struct {
 	// SegmentsReceived is the number of TCP segments received that
 	// the transport layer successfully parsed.
 	SegmentsReceived tcpip.StatCounter

 	// SegmentsSent is the number of TCP segments sent.
 	SegmentsSent tcpip.StatCounter

 	// FailedConnectionAttempts is the number of times we saw Connect and
 	// Accept errors.
 	FailedConnectionAttempts tcpip.StatCounter

 	// ReceiveErrors collects segment receive errors within the
 	// transport layer.
 	ReceiveErrors ReceiveErrors

 	// ReadErrors collects segment read errors from an endpoint read call.
 	ReadErrors tcpip.ReadErrors

 	// SendErrors collects segment send errors within the transport layer.
 	SendErrors SendErrors

 	// WriteErrors collects segment write errors from an endpoint write call.
 	WriteErrors tcpip.WriteErrors
 }

 // IsEndpointStats is an empty method to implement the tcpip.EndpointStats
 // marker interface.
 func (*Stats) IsEndpointStats() {}

 // EndpointInfo holds useful information about a transport endpoint which
 // can be queried by monitoring tools.
 //
 // +stateify savable
 type EndpointInfo struct {
 	stack.TransportEndpointInfo

 	// HardError is meaningful only when state is stateError. It stores the
 	// error to be returned when read/write syscalls are called and the
 	// endpoint is in this state. HardError is protected by endpoint mu.
 	HardError *tcpip.Error
 }

 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo
 // marker interface.
 func (*EndpointInfo) IsEndpointInfo() {}

 // endpoint represents a TCP endpoint. This struct serves as the interface
 // between users of the endpoint and the protocol implementation; it is legal to
 // have concurrent goroutines make calls into the endpoint, they are properly
 // synchronized. The protocol implementation, however, runs in a single
 // goroutine.
 //
 // +stateify savable
 type endpoint struct {
 	EndpointInfo

 	// workMu is used to arbitrate which goroutine may perform protocol
 	// work. Only the main protocol goroutine is expected to call Lock() on
 	// it, but other goroutines (e.g., send) may call TryLock() to eagerly
 	// perform work without having to wait for the main one to wake up.
 	workMu tmutex.Mutex

 	// The following fields are initialized at creation time and do not
 	// change throughout the lifetime of the endpoint.
 	stack       *stack.Stack
 	waiterQueue *waiter.Queue

 	// lastError represents the last error that the endpoint reported;
 	// access to it is protected by the following mutex.
 	lastErrorMu sync.Mutex
 	lastError   *tcpip.Error

 	// The following fields are used to manage the receive queue. The
 	// protocol goroutine adds ready-for-delivery segments to rcvList,
 	// which are returned by Read() calls to users.
 	//
 	// Once the peer has closed its send side, rcvClosed is set to true
 	// to indicate to users that no more data is coming.
 	//
 	// rcvListMu can be taken after the endpoint mu below.
 	rcvListMu     sync.Mutex
 	rcvList       segmentList
 	rcvClosed     bool
 	rcvBufSize    int
 	rcvBufUsed    int
 	rcvAutoParams rcvBufAutoTuneParams
 	// zeroWindow indicates that the window was closed due to receive buffer
 	// space being filled up. This is set by the worker goroutine before
 	// moving a segment to the rcvList. This setting is cleared by the
 	// endpoint when a Read() call reads enough data for the new window to
 	// be non-zero.
 	zeroWindow bool

 	// The following fields are protected by the mutex.
 	mu sync.RWMutex

 	state EndpointState

 	isPortReserved    bool
 	isRegistered      bool
 	boundNICID        tcpip.NICID
 	route             stack.Route
 	ttl               uint8
 	v6only            bool
 	isConnectNotified bool
 	// TCP should never broadcast but Linux nevertheless supports enabling/
 	// disabling SO_BROADCAST, albeit as a NOOP.
 	broadcast bool

 	// effectiveNetProtos contains the network protocols actually in use. In
 	// most cases it will only contain "netProto", but in cases like IPv6
 	// endpoints with v6only set to false, this could include multiple
 	// protocols (e.g., IPv6 and IPv4) or a single different protocol (e.g.,
 	// IPv4 when IPv6 endpoint is bound or connected to an IPv4 mapped
 	// address).
 	effectiveNetProtos []tcpip.NetworkProtocolNumber

 	// workerRunning specifies if a worker goroutine is running.
 	workerRunning bool

 	// workerCleanup specifies if the worker goroutine must perform cleanup
 	// before exitting. This can only be set to true when workerRunning is
 	// also true, and they're both protected by the mutex.
 	workerCleanup bool

 	// sendTSOk is used to indicate when the TS Option has been negotiated.
 	// When sendTSOk is true every non-RST segment should carry a TS as per
 	// RFC7323#section-1.1
 	sendTSOk bool

 	// recentTS is the timestamp that should be sent in the TSEcr field of
 	// the timestamp for future segments sent by the endpoint. This field is
 	// updated if required when a new segment is received by this endpoint.
 	recentTS uint32

 	// tsOffset is a randomized offset added to the value of the
 	// TSVal field in the timestamp option.
 	tsOffset uint32

 	// shutdownFlags represent the current shutdown state of the endpoint.
 	shutdownFlags tcpip.ShutdownFlags

 	// sackPermitted is set to true if the peer sends the TCPSACKPermitted
 	// option in the SYN/SYN-ACK.
 	sackPermitted bool

 	// sack holds TCP SACK related information for this endpoint.
 	sack SACKInfo

 	// reusePort is set to true if SO_REUSEPORT is enabled.
 	reusePort bool

 	// bindToDevice is set to the NIC on which to bind or disabled if 0.
 	bindToDevice tcpip.NICID

 	// delay enables Nagle's algorithm.
 	//
 	// delay is a boolean (0 is false) and must be accessed atomically.
 	delay uint32

 	// cork holds back segments until full.
 	//
 	// cork is a boolean (0 is false) and must be accessed atomically.
 	cork uint32

 	// scoreboard holds TCP SACK Scoreboard information for this endpoint.
 	scoreboard *SACKScoreboard

 	// The options below aren't implemented, but we remember the user
 	// settings because applications expect to be able to set/query these
 	// options.
 	reuseAddr bool

 	// slowAck holds the negated state of quick ack. It is stubbed out and
 	// does nothing.
 	//
 	// slowAck is a boolean (0 is false) and must be accessed atomically.
 	slowAck uint32

 	// segmentQueue is used to hand received segments to the protocol
 	// goroutine. Segments are queued as long as the queue is not full,
 	// and dropped when it is.
 	segmentQueue segmentQueue

 	// synRcvdCount is the number of connections for this endpoint that are
 	// in SYN-RCVD state.
 	synRcvdCount int

 	// userMSS if non-zero is the MSS value explicitly set by the user
 	// for this endpoint using the TCP_MAXSEG setsockopt.
 	userMSS int

 	// The following fields are used to manage the send buffer. When
 	// segments are ready to be sent, they are added to sndQueue and the
 	// protocol goroutine is signaled via sndWaker.
 	//
 	// When the send side is closed, the protocol goroutine is notified via
 	// sndCloseWaker, and sndClosed is set to true.
 	sndBufMu      sync.Mutex
 	sndBufSize    int
 	sndBufUsed    int
 	sndClosed     bool
 	sndBufInQueue seqnum.Size
 	sndQueue      segmentList
 	sndWaker      sleep.Waker
 	sndCloseWaker sleep.Waker

 	// cc stores the name of the Congestion Control algorithm to use for
 	// this endpoint.
 	cc tcpip.CongestionControlOption

 	// The following are used when a "packet too big" control packet is
 	// received. They are protected by sndBufMu. They are used to
 	// communicate to the main protocol goroutine how many such control
 	// messages have been received since the last notification was processed
 	// and what was the smallest MTU seen.
 	packetTooBigCount int
 	sndMTU            int

 	// newSegmentWaker is used to indicate to the protocol goroutine that
 	// it needs to wake up and handle new segments queued to it.
 	newSegmentWaker sleep.Waker

 	// notificationWaker is used to indicate to the protocol goroutine that
 	// it needs to wake up and check for notifications.
 	notificationWaker sleep.Waker

 	// notifyFlags is a bitmask of flags used to indicate to the protocol
 	// goroutine what it was notified; this is only accessed atomically.
 	notifyFlags uint32

 	// keepalive manages TCP keepalive state. When the connection is idle
 	// (no data sent or received) for keepaliveIdle, we start sending
 	// keepalives every keepalive.interval. If we send keepalive.count
 	// without hearing a response, the connection is closed.
 	keepalive keepalive

 	// pendingAccepted is a synchronization primitive used to track number
 	// of connections that are queued up to be delivered to the accepted
 	// channel. We use this to ensure that all goroutines blocked on writing
 	// to the acceptedChan below terminate before we close acceptedChan.
 	pendingAccepted sync.WaitGroup

 	// acceptedChan is used by a listening endpoint protocol goroutine to
 	// send newly accepted connections to the endpoint so that they can be
 	// read by Accept() calls.
 	acceptedChan chan *endpoint

 	// The following are only used from the protocol goroutine, and
 	// therefore don't need locks to protect them.
 	rcv *receiver
 	snd *sender

 	// The goroutine drain completion notification channel.
 	drainDone chan struct{}

 	// The goroutine undrain notification channel. This is currently used as
 	// a way to block the worker goroutines. Today nothing closes/writes
 	// this channel and this causes any goroutines waiting on this to just
 	// block. This is used during save/restore to prevent worker goroutines
 	// from mutating state as it's being saved.
 	undrain chan struct{}

 	// probe if not nil is invoked on every received segment. It is passed
 	// a copy of the current state of the endpoint.
 	probe stack.TCPProbeFunc

 	// The following are only used to assist the restore run to re-connect.
 	connectingAddress tcpip.Address

 	// amss is the advertised MSS to the peer by this endpoint.
 	amss uint16

 	// sendTOS represents IPv4 TOS or IPv6 TrafficClass,
 	// applied while sending packets. Defaults to 0 as on Linux.
 	sendTOS uint8

 	gso *stack.GSO

 	// TODO(b/142022063): Add ability to save and restore per endpoint stats.
 	stats Stats
 }

 // StopWork halts packet processing. Only to be used in tests.
 func (e *endpoint) StopWork() {
 	e.workMu.Lock()
 }

 // ResumeWork resumes packet processing. Only to be used in tests.
 func (e *endpoint) ResumeWork() {
 	e.workMu.Unlock()
 }

 // keepalive is a synchronization wrapper used to appease stateify. See the
 // comment in endpoint, where it is used.
 //
 // +stateify savable
 type keepalive struct {
 	sync.Mutex
 	enabled  bool
 	idle     time.Duration
 	interval time.Duration
 	count    int
 	unacked  int
 	timer    timer
 	waker    sleep.Waker
 }

 func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) *endpoint {
 	e := &endpoint{
 		stack: s,
 		EndpointInfo: EndpointInfo{
 			TransportEndpointInfo: stack.TransportEndpointInfo{
 				NetProto:   netProto,
 				TransProto: header.TCPProtocolNumber,
 			},
 		},
 		waiterQueue: waiterQueue,
 		state:       StateInitial,
 		rcvBufSize:  DefaultReceiveBufferSize,
 		sndBufSize:  DefaultSendBufferSize,
 		sndMTU:      int(math.MaxInt32),
 		reuseAddr:   true,
 		keepalive: keepalive{
 			// Linux defaults.
 			idle:     2 * time.Hour,
 			interval: 75 * time.Second,
 			count:    9,
 		},
 	}

 	var ss SendBufferSizeOption
 	if err := s.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
 		e.sndBufSize = ss.Default
 	}

 	var rs ReceiveBufferSizeOption
 	if err := s.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
 		e.rcvBufSize = rs.Default
 	}

 	var cs tcpip.CongestionControlOption
 	if err := s.TransportProtocolOption(ProtocolNumber, &cs); err == nil {
 		e.cc = cs
 	}

 	var mrb tcpip.ModerateReceiveBufferOption
 	if err := s.TransportProtocolOption(ProtocolNumber, &mrb); err == nil {
 		e.rcvAutoParams.disabled = !bool(mrb)
 	}

 	if p := s.GetTCPProbe(); p != nil {
 		e.probe = p
 	}

 	e.segmentQueue.setLimit(MaxUnprocessedSegments)
 	e.workMu.Init()
 	e.workMu.Lock()
 	e.tsOffset = timeStampOffset()

 	return e
 }

 // Readiness returns the current readiness of the endpoint. For example, if
 // waiter.EventIn is set, the endpoint is immediately readable.
 func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 	result := waiter.EventMask(0)

 	e.mu.RLock()
 	defer e.mu.RUnlock()

 	switch e.state {
 	case StateInitial, StateBound, StateConnecting, StateSynSent, StateSynRecv:
 		// Ready for nothing.

 	case StateClose, StateError:
 		// Ready for anything.
 		result = mask

 	case StateListen:
 		// Check if there's anything in the accepted channel.
 		if (mask & waiter.EventIn) != 0 {
 			if len(e.acceptedChan) > 0 {
 				result |= waiter.EventIn
 			}
 		}
 	}
 	if e.state.connected() {
 		// Determine if the endpoint is writable if requested.
 		if (mask & waiter.EventOut) != 0 {
 			e.sndBufMu.Lock()
 			if e.sndClosed || e.sndBufUsed < e.sndBufSize {
 				result |= waiter.EventOut
 			}
 			e.sndBufMu.Unlock()
 		}

 		// Determine if the endpoint is readable if requested.
 		if (mask & waiter.EventIn) != 0 {
 			e.rcvListMu.Lock()
 			if e.rcvBufUsed > 0 || e.rcvClosed {
 				result |= waiter.EventIn
 			}
 			e.rcvListMu.Unlock()
 		}
 	}

 	return result
 }

 func (e *endpoint) fetchNotifications() uint32 {
 	return atomic.SwapUint32(&e.notifyFlags, 0)
 }

 func (e *endpoint) notifyProtocolGoroutine(n uint32) {
 	for {
 		v := atomic.LoadUint32(&e.notifyFlags)
 		if v&n == n {
 			// The flags are already set.
 			return
 		}

 		if atomic.CompareAndSwapUint32(&e.notifyFlags, v, v|n) {
 			if v == 0 {
 				// We are causing a transition from no flags to
 				// at least one flag set, so we must cause the
 				// protocol goroutine to wake up.
 				e.notificationWaker.Assert()
 			}
 			return
 		}
 	}
 }

 // Close puts the endpoint in a closed state and frees all resources associated
 // with it. It must be called only once and with no other concurrent calls to
 // the endpoint.
 func (e *endpoint) Close() {
 	// Issue a shutdown so that the peer knows we won't send any more data
 	// if we're connected, or stop accepting if we're listening.
 	e.Shutdown(tcpip.ShutdownWrite | tcpip.ShutdownRead)

 	e.mu.Lock()

 	// For listening sockets, we always release ports inline so that they
 	// are immediately available for reuse after Close() is called. If also
 	// registered, we unregister as well otherwise the next user would fail
 	// in Listen() when trying to register.
 	if e.state == StateListen && e.isPortReserved {
 		if e.isRegistered {
 			e.stack.UnregisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.bindToDevice)
 			e.isRegistered = false
 		}

 		e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.bindToDevice)
 		e.isPortReserved = false
 	}

 	// Either perform the local cleanup or kick the worker to make sure it
 	// knows it needs to cleanup.
 	tcpip.AddDanglingEndpoint(e)
 	if !e.workerRunning {
 		e.cleanupLocked()
 	} else {
 		e.workerCleanup = true
 		e.notifyProtocolGoroutine(notifyClose)
 	}

 	e.mu.Unlock()
 }

 // closePendingAcceptableConnections closes all connections that have completed
 // handshake but not yet been delivered to the application.
 func (e *endpoint) closePendingAcceptableConnectionsLocked() {
 	done := make(chan struct{})
 	// Spin a goroutine up as ranging on e.acceptedChan will just block when
 	// there are no more connections in the channel. Using a non-blocking
 	// select does not work as it can potentially select the default case
 	// even when there are pending writes but that are not yet written to
 	// the channel.
 	go func() {
 		defer close(done)
 		for n := range e.acceptedChan {
 			n.mu.Lock()
 			n.resetConnectionLocked(tcpip.ErrConnectionAborted)
 			n.mu.Unlock()
 			n.Close()
 		}
 	}()
 	// pendingAccepted(see endpoint.deliverAccepted) tracks the number of
 	// endpoints which have completed handshake but are not yet written to
 	// the e.acceptedChan. We wait here till the goroutine above can drain
 	// all such connections from e.acceptedChan.
 	e.pendingAccepted.Wait()
 	close(e.acceptedChan)
 	<-done
 	e.acceptedChan = nil
 }

 // cleanupLocked frees all resources associated with the endpoint. It is called
 // after Close() is called and the worker goroutine (if any) is done with its
 // work.
 func (e *endpoint) cleanupLocked() {
 	// Close all endpoints that might have been accepted by TCP but not by
 	// the client.
 	if e.acceptedChan != nil {
 		e.closePendingAcceptableConnectionsLocked()
 	}
 	e.workerCleanup = false

 	if e.isRegistered {
 		e.stack.UnregisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.bindToDevice)
 		e.isRegistered = false
 	}

 	if e.isPortReserved {
 		e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.bindToDevice)
 		e.isPortReserved = false
 	}

 	e.route.Release()
 	tcpip.DeleteDanglingEndpoint(e)
 }

 // initialReceiveWindow returns the initial receive window to advertise in the
 // SYN/SYN-ACK.
 func (e *endpoint) initialReceiveWindow() int {
 	rcvWnd := e.receiveBufferAvailable()
 	if rcvWnd > math.MaxUint16 {
 		rcvWnd = math.MaxUint16
 	}
 	routeWnd := InitialCwnd * int(mssForRoute(&e.route)) * 2
 	if rcvWnd > routeWnd {
 		rcvWnd = routeWnd
 	}
 	return rcvWnd
 }

 // ModerateRecvBuf adjusts the receive buffer and the advertised window
 // based on the number of bytes copied to user space.
 func (e *endpoint) ModerateRecvBuf(copied int) {
 	e.rcvListMu.Lock()
 	if e.rcvAutoParams.disabled {
 		e.rcvListMu.Unlock()
 		return
 	}
 	now := time.Now()
 	if rtt := e.rcvAutoParams.rtt; rtt == 0 || now.Sub(e.rcvAutoParams.measureTime) < rtt {
 		e.rcvAutoParams.copied += copied
 		e.rcvListMu.Unlock()
 		return
 	}
 	prevRTTCopied := e.rcvAutoParams.copied + copied
 	prevCopied := e.rcvAutoParams.prevCopied
 	rcvWnd := 0
 	if prevRTTCopied > prevCopied {
 		// The minimal receive window based on what was copied by the app
 		// in the immediate preceding RTT and some extra buffer for 16
 		// segments to account for variations.
 		// We multiply by 2 to account for packet losses.
 		rcvWnd = prevRTTCopied*2 + 16*int(e.amss)

 		// Scale for slow start based on bytes copied in this RTT vs previous.
 		grow := (rcvWnd * (prevRTTCopied - prevCopied)) / prevCopied

 		// Multiply growth factor by 2 again to account for sender being
 		// in slow-start where the sender grows it's congestion window
 		// by 100% per RTT.
 		rcvWnd += grow * 2

 		// Make sure auto tuned buffer size can always receive upto 2x
 		// the initial window of 10 segments.
 		if minRcvWnd := int(e.amss) * InitialCwnd * 2; rcvWnd < minRcvWnd {
 			rcvWnd = minRcvWnd
 		}

 		// Cap the auto tuned buffer size by the maximum permissible
 		// receive buffer size.
 		if max := e.maxReceiveBufferSize(); rcvWnd > max {
 			rcvWnd = max
 		}

 		// We do not adjust downwards as that can cause the receiver to
 		// reject valid data that might already be in flight as the
 		// acceptable window will shrink.
 		if rcvWnd > e.rcvBufSize {
 			e.rcvBufSize = rcvWnd
 			e.notifyProtocolGoroutine(notifyReceiveWindowChanged)
 		}

 		// We only update prevCopied when we grow the buffer because in cases
 		// where prevCopied > prevRTTCopied the existing buffer is already big
 		// enough to handle the current rate and we don't need to do any
 		// adjustments.
 		e.rcvAutoParams.prevCopied = prevRTTCopied
 	}
 	e.rcvAutoParams.measureTime = now
 	e.rcvAutoParams.copied = 0
 	e.rcvListMu.Unlock()
 }

 // IPTables implements tcpip.Endpoint.IPTables.
 func (e *endpoint) IPTables() (iptables.IPTables, error) {
 	return e.stack.IPTables(), nil
 }

 // Read reads data from the endpoint.
 func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
 	e.mu.RLock()
 	// The endpoint can be read if it's connected, or if it's already closed
 	// but has some pending unread data. Also note that a RST being received
 	// would cause the state to become StateError so we should allow the
 	// reads to proceed before returning a ECONNRESET.
 	e.rcvListMu.Lock()
 	bufUsed := e.rcvBufUsed
 	if s := e.state; !s.connected() && s != StateClose && bufUsed == 0 {
 		e.rcvListMu.Unlock()
 		he := e.HardError
 		e.mu.RUnlock()
 		if s == StateError {
 			return buffer.View{}, tcpip.ControlMessages{}, he
 		}
 		e.stats.ReadErrors.InvalidEndpointState.Increment()
 		return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidEndpointState
 	}

 	v, err := e.readLocked()
 	e.rcvListMu.Unlock()

 	e.mu.RUnlock()

 	if err == tcpip.ErrClosedForReceive {
 		e.stats.ReadErrors.ReadClosed.Increment()
 	}
 	return v, tcpip.ControlMessages{}, err
 }

 func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) {
 	if e.rcvBufUsed == 0 {
 		if e.rcvClosed || !e.state.connected() {
 			return buffer.View{}, tcpip.ErrClosedForReceive
 		}
 		return buffer.View{}, tcpip.ErrWouldBlock
 	}

 	s := e.rcvList.Front()
 	views := s.data.Views()
 	v := views[s.viewToDeliver]
 	s.viewToDeliver++

 	if s.viewToDeliver >= len(views) {
 		e.rcvList.Remove(s)
 		s.decRef()
 	}

 	e.rcvBufUsed -= len(v)
 	// If the window was zero before this read and if the read freed up
 	// enough buffer space for the scaled window to be non-zero then notify
 	// the protocol goroutine to send a window update.
 	if e.zeroWindow && !e.zeroReceiveWindow(e.rcv.rcvWndScale) {
 		e.zeroWindow = false
 		e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow)
 	}

 	return v, nil
 }

 // isEndpointWritableLocked checks if a given endpoint is writable
 // and also returns the number of bytes that can be written at this
 // moment. If the endpoint is not writable then it returns an error
 // indicating the reason why it's not writable.
 // Caller must hold e.mu and e.sndBufMu
 func (e *endpoint) isEndpointWritableLocked() (int, *tcpip.Error) {
 	// The endpoint cannot be written to if it's not connected.
 	if !e.state.connected() {
 		switch e.state {
 		case StateError:
 			return 0, e.HardError
 		default:
 			return 0, tcpip.ErrClosedForSend
 		}
 	}

 	// Check if the connection has already been closed for sends.
 	if e.sndClosed {
 		return 0, tcpip.ErrClosedForSend
 	}

 	avail := e.sndBufSize - e.sndBufUsed
 	if avail <= 0 {
 		return 0, tcpip.ErrWouldBlock
 	}
 	return avail, nil
 }

 // Write writes data to the endpoint's peer.
 func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
 	// Linux completely ignores any address passed to sendto(2) for TCP sockets
 	// (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More
 	// and opts.EndOfRecord are also ignored.

 	e.mu.RLock()
 	e.sndBufMu.Lock()

 	avail, err := e.isEndpointWritableLocked()
 	if err != nil {
 		e.sndBufMu.Unlock()
 		e.mu.RUnlock()
 		e.stats.WriteErrors.WriteClosed.Increment()
 		return 0, nil, err
 	}

 	// We can release locks while copying data.
 	//
 	// This is not possible if atomic is set, because we can't allow the
 	// available buffer space to be consumed by some other caller while we
 	// are copying data in.
 	if !opts.Atomic {
 		e.sndBufMu.Unlock()
 		e.mu.RUnlock()
 	}

 	// Fetch data.
 	v, perr := p.Payload(avail)
 	if perr != nil || len(v) == 0 {
 		if opts.Atomic { // See above.
 			e.sndBufMu.Unlock()
 			e.mu.RUnlock()
 		}
 		// Note that perr may be nil if len(v) == 0.
 		return 0, nil, perr
 	}

 	if !opts.Atomic { // See above.
 		e.mu.RLock()
 		e.sndBufMu.Lock()

 		// Because we released the lock before copying, check state again
 		// to make sure the endpoint is still in a valid state for a write.
 		avail, err = e.isEndpointWritableLocked()
 		if err != nil {
 			e.sndBufMu.Unlock()
 			e.mu.RUnlock()
 			e.stats.WriteErrors.WriteClosed.Increment()
 			return 0, nil, err
 		}

 		// Discard any excess data copied in due to avail being reduced due
 		// to a simultaneous write call to the socket.
 		if avail < len(v) {
 			v = v[:avail]
 		}
 	}

 	// Add data to the send queue.
 	s := newSegmentFromView(&e.route, e.ID, v)
 	e.sndBufUsed += len(v)
 	e.sndBufInQueue += seqnum.Size(len(v))
 	e.sndQueue.PushBack(s)
 	e.sndBufMu.Unlock()
 	// Release the endpoint lock to prevent deadlocks due to lock
 	// order inversion when acquiring workMu.
 	e.mu.RUnlock()

 	if e.workMu.TryLock() {
 		// Do the work inline.
 		e.handleWrite()
 		e.workMu.Unlock()
 	} else {
 		// Let the protocol goroutine do the work.
 		e.sndWaker.Assert()
 	}

 	return int64(len(v)), nil, nil
 }

 // Peek reads data without consuming it from the endpoint.
 //
 // This method does not block if there is no data pending.
 func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
 	e.mu.RLock()
 	defer e.mu.RUnlock()

 	// The endpoint can be read if it's connected, or if it's already closed
 	// but has some pending unread data.
 	if s := e.state; !s.connected() && s != StateClose {
 		if s == StateError {
 			return 0, tcpip.ControlMessages{}, e.HardError
 		}
 		e.stats.ReadErrors.InvalidEndpointState.Increment()
 		return 0, tcpip.ControlMessages{}, tcpip.ErrInvalidEndpointState
 	}

 	e.rcvListMu.Lock()
 	defer e.rcvListMu.Unlock()

 	if e.rcvBufUsed == 0 {
 		if e.rcvClosed || !e.state.connected() {
 			e.stats.ReadErrors.ReadClosed.Increment()
 			return 0, tcpip.ControlMessages{}, tcpip.ErrClosedForReceive
 		}
 		return 0, tcpip.ControlMessages{}, tcpip.ErrWouldBlock
 	}

 	// Make a copy of vec so we can modify the slide headers.
 	vec = append([][]byte(nil), vec...)

 	var num int64
 	for s := e.rcvList.Front(); s != nil; s = s.Next() {
 		views := s.data.Views()

 		for i := s.viewToDeliver; i < len(views); i++ {
 			v := views[i]

 			for len(v) > 0 {
 				if len(vec) == 0 {
 					return num, tcpip.ControlMessages{}, nil
 				}
 				if len(vec[0]) == 0 {
 					vec = vec[1:]
 					continue
 				}

 				n := copy(vec[0], v)
 				v = v[n:]
 				vec[0] = vec[0][n:]
 				num += int64(n)
 			}
 		}
 	}

 	return num, tcpip.ControlMessages{}, nil
 }

 // zeroReceiveWindow checks if the receive window to be announced now would be
 // zero, based on the amount of available buffer and the receive window scaling.
 //
 // It must be called with rcvListMu held.
 func (e *endpoint) zeroReceiveWindow(scale uint8) bool {
 	if e.rcvBufUsed >= e.rcvBufSize {
 		return true
 	}

 	return ((e.rcvBufSize - e.rcvBufUsed) >> scale) == 0
 }

 // SetSockOptInt sets a socket option.
 func (e *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error {
 	switch opt {
 	case tcpip.ReceiveBufferSizeOption:
 		// Make sure the receive buffer size is within the min and max
 		// allowed.
 		var rs ReceiveBufferSizeOption
 		size := int(v)
 		if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
 			if size < rs.Min {
 				size = rs.Min
 			}
 			if size > rs.Max {
 				size = rs.Max
 			}
 		}

 		mask := uint32(notifyReceiveWindowChanged)

 		e.rcvListMu.Lock()

 		// Make sure the receive buffer size allows us to send a
 		// non-zero window size.
 		scale := uint8(0)
 		if e.rcv != nil {
 			scale = e.rcv.rcvWndScale
 		}
 		if size>>scale == 0 {
 			size = 1 << scale
 		}

 		// Make sure 2*size doesn't overflow.
 		if size > math.MaxInt32/2 {
 			size = math.MaxInt32 / 2
 		}

 		e.rcvBufSize = size
 		e.rcvAutoParams.disabled = true
 		if e.zeroWindow && !e.zeroReceiveWindow(scale) {
 			e.zeroWindow = false
 			mask |= notifyNonZeroReceiveWindow
 		}
 		e.rcvListMu.Unlock()

 		e.notifyProtocolGoroutine(mask)
 		return nil

 	case tcpip.SendBufferSizeOption:
 		// Make sure the send buffer size is within the min and max
 		// allowed.
 		size := int(v)
 		var ss SendBufferSizeOption
 		if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
 			if size < ss.Min {
 				size = ss.Min
 			}
 			if size > ss.Max {
 				size = ss.Max
 			}
 		}

 		e.sndBufMu.Lock()
 		e.sndBufSize = size
 		e.sndBufMu.Unlock()
 		return nil

 	default:
 		return nil
 	}
 }

 // SetSockOpt sets a socket option.
 func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 	// Lower 2 bits represents ECN bits. RFC 3168, section 23.1
 	const inetECNMask = 3
 	switch v := opt.(type) {
 	case tcpip.DelayOption:
 		if v == 0 {
 			atomic.StoreUint32(&e.delay, 0)

 			// Handle delayed data.
 			e.sndWaker.Assert()
 		} else {
 			atomic.StoreUint32(&e.delay, 1)
 		}
 		return nil

 	case tcpip.CorkOption:
 		if v == 0 {
 			atomic.StoreUint32(&e.cork, 0)

 			// Handle the corked data.
 			e.sndWaker.Assert()
 		} else {
 			atomic.StoreUint32(&e.cork, 1)
 		}
 		return nil

 	case tcpip.ReuseAddressOption:
 		e.mu.Lock()
 		e.reuseAddr = v != 0
 		e.mu.Unlock()
 		return nil

 	case tcpip.ReusePortOption:
 		e.mu.Lock()
 		e.reusePort = v != 0
 		e.mu.Unlock()
 		return nil

 	case tcpip.BindToDeviceOption:
 		e.mu.Lock()
 		defer e.mu.Unlock()
 		if v == "" {
 			e.bindToDevice = 0
 			return nil
 		}
 		for nicid, nic := range e.stack.NICInfo() {
 			if nic.Name == string(v) {
 				e.bindToDevice = nicid
 				return nil
 			}
 		}
 		return tcpip.ErrUnknownDevice

 	case tcpip.QuickAckOption:
 		if v == 0 {
 			atomic.StoreUint32(&e.slowAck, 1)
 		} else {
 			atomic.StoreUint32(&e.slowAck, 0)
 		}
 		return nil

 	case tcpip.MaxSegOption:
 		userMSS := v
 		if userMSS < header.TCPMinimumMSS || userMSS > header.TCPMaximumMSS {
 			return tcpip.ErrInvalidOptionValue
 		}
 		e.mu.Lock()
 		e.userMSS = int(userMSS)
 		e.mu.Unlock()
 		e.notifyProtocolGoroutine(notifyMSSChanged)
 		return nil

 	case tcpip.V6OnlyOption:
 		// We only recognize this option on v6 endpoints.
 		if e.NetProto != header.IPv6ProtocolNumber {
 			return tcpip.ErrInvalidEndpointState
 		}

 		e.mu.Lock()
 		defer e.mu.Unlock()

 		// We only allow this to be set when we're in the initial state.
 		if e.state != StateInitial {
 			return tcpip.ErrInvalidEndpointState
 		}

 		e.v6only = v != 0
 		return nil

 	case tcpip.TTLOption:
 		e.mu.Lock()
 		e.ttl = uint8(v)
 		e.mu.Unlock()
 		return nil

 	case tcpip.KeepaliveEnabledOption:
 		e.keepalive.Lock()
 		e.keepalive.enabled = v != 0
 		e.keepalive.Unlock()
 		e.notifyProtocolGoroutine(notifyKeepaliveChanged)
 		return nil

 	case tcpip.KeepaliveIdleOption:
 		e.keepalive.Lock()
 		e.keepalive.idle = time.Duration(v)
 		e.keepalive.Unlock()
 		e.notifyProtocolGoroutine(notifyKeepaliveChanged)
 		return nil

 	case tcpip.KeepaliveIntervalOption:
 		e.keepalive.Lock()
 		e.keepalive.interval = time.Duration(v)
 		e.keepalive.Unlock()
 		e.notifyProtocolGoroutine(notifyKeepaliveChanged)
 		return nil

 	case tcpip.KeepaliveCountOption:
 		e.keepalive.Lock()
 		e.keepalive.count = int(v)
 		e.keepalive.Unlock()
 		e.notifyProtocolGoroutine(notifyKeepaliveChanged)
 		return nil

 	case tcpip.BroadcastOption:
 		e.mu.Lock()
 		e.broadcast = v != 0
 		e.mu.Unlock()
 		return nil

 	case tcpip.CongestionControlOption:
 		// Query the available cc algorithms in the stack and
 		// validate that the specified algorithm is actually
 		// supported in the stack.
 		var avail tcpip.AvailableCongestionControlOption
 		if err := e.stack.TransportProtocolOption(ProtocolNumber, &avail); err != nil {
 			return err
 		}
 		availCC := strings.Split(string(avail), " ")
 		for _, cc := range availCC {
 			if v == tcpip.CongestionControlOption(cc) {
 				// Acquire the work mutex as we may need to
 				// reinitialize the congestion control state.
 				e.mu.Lock()
 				state := e.state
 				e.cc = v
 				e.mu.Unlock()
 				switch state {
 				case StateEstablished:
 					e.workMu.Lock()
 					e.mu.Lock()
 					if e.state == state {
 						e.snd.cc = e.snd.initCongestionControl(e.cc)
 					}
 					e.mu.Unlock()
 					e.workMu.Unlock()
 				}
 				return nil
 			}
 		}

 		// Linux returns ENOENT when an invalid congestion
 		// control algorithm is specified.
 		return tcpip.ErrNoSuchFile

 	case tcpip.IPv4TOSOption:
 		e.mu.Lock()
 		// TODO(gvisor.dev/issue/995): ECN is not currently supported,
 		// ignore the bits for now.
 		e.sendTOS = uint8(v) & ^uint8(inetECNMask)
 		e.mu.Unlock()
 		return nil

 	case tcpip.IPv6TrafficClassOption:
 		e.mu.Lock()
 		// TODO(gvisor.dev/issue/995): ECN is not currently supported,
 		// ignore the bits for now.
 		e.sendTOS = uint8(v) & ^uint8(inetECNMask)
 		e.mu.Unlock()
 		return nil

 	default:
 		return nil
 	}
 }

 // readyReceiveSize returns the number of bytes ready to be received.
 func (e *endpoint) readyReceiveSize() (int, *tcpip.Error) {
 	e.mu.RLock()
 	defer e.mu.RUnlock()

 	// The endpoint cannot be in listen state.
 	if e.state == StateListen {
 		return 0, tcpip.ErrInvalidEndpointState
 	}

 	e.rcvListMu.Lock()
 	defer e.rcvListMu.Unlock()

 	return e.rcvBufUsed, nil
 }

 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
 func (e *endpoint) GetSockOptInt(opt tcpip.SockOpt) (int, *tcpip.Error) {
 	switch opt {
 	case tcpip.ReceiveQueueSizeOption:
 		return e.readyReceiveSize()
 	case tcpip.SendBufferSizeOption:
 		e.sndBufMu.Lock()
 		v := e.sndBufSize
 		e.sndBufMu.Unlock()
 		return v, nil

 	case tcpip.ReceiveBufferSizeOption:
 		e.rcvListMu.Lock()
 		v := e.rcvBufSize
 		e.rcvListMu.Unlock()
 		return v, nil

 	}
 	return -1, tcpip.ErrUnknownProtocolOption
 }

 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
 	switch o := opt.(type) {
 	case tcpip.ErrorOption:
 		e.lastErrorMu.Lock()
 		err := e.lastError
 		e.lastError = nil
 		e.lastErrorMu.Unlock()
 		return err

 	case *tcpip.MaxSegOption:
 		// This is just stubbed out. Linux never returns the user_mss
 		// value as it either returns the defaultMSS or returns the
 		// actual current MSS. Netstack just returns the defaultMSS
 		// always for now.
 		*o = header.TCPDefaultMSS
 		return nil

 	case *tcpip.DelayOption:
 		*o = 0
 		if v := atomic.LoadUint32(&e.delay); v != 0 {
 			*o = 1
 		}
 		return nil

 	case *tcpip.CorkOption:
 		*o = 0
 		if v := atomic.LoadUint32(&e.cork); v != 0 {
 			*o = 1
 		}
 		return nil

 	case *tcpip.ReuseAddressOption:
 		e.mu.RLock()
 		v := e.reuseAddr
 		e.mu.RUnlock()

 		*o = 0
 		if v {
 			*o = 1
 		}
 		return nil

 	case *tcpip.ReusePortOption:
 		e.mu.RLock()
 		v := e.reusePort
 		e.mu.RUnlock()

 		*o = 0
 		if v {
 			*o = 1
 		}
 		return nil

 	case *tcpip.BindToDeviceOption:
 		e.mu.RLock()
 		defer e.mu.RUnlock()
 		if nic, ok := e.stack.NICInfo()[e.bindToDevice]; ok {
 			*o = tcpip.BindToDeviceOption(nic.Name)
 			return nil
 		}
 		*o = ""
 		return nil

 	case *tcpip.QuickAckOption:
 		*o = 1
 		if v := atomic.LoadUint32(&e.slowAck); v != 0 {
 			*o = 0
 		}
 		return nil

 	case *tcpip.V6OnlyOption:
 		// We only recognize this option on v6 endpoints.
 		if e.NetProto != header.IPv6ProtocolNumber {
 			return tcpip.ErrUnknownProtocolOption
 		}

 		e.mu.Lock()
 		v := e.v6only
 		e.mu.Unlock()

 		*o = 0
 		if v {
 			*o = 1
 		}
 		return nil

 	case *tcpip.TTLOption:
 		e.mu.Lock()
 		*o = tcpip.TTLOption(e.ttl)
 		e.mu.Unlock()
 		return nil

 	case *tcpip.TCPInfoOption:
 		*o = tcpip.TCPInfoOption{}
 		e.mu.RLock()
 		snd := e.snd
 		e.mu.RUnlock()
 		if snd != nil {
 			snd.rtt.Lock()
 			o.RTT = snd.rtt.srtt
 			o.RTTVar = snd.rtt.rttvar
 			snd.rtt.Unlock()
 		}
 		return nil

 	case *tcpip.KeepaliveEnabledOption:
 		e.keepalive.Lock()
 		v := e.keepalive.enabled
 		e.keepalive.Unlock()

 		*o = 0
 		if v {
 			*o = 1
 		}
 		return nil

 	case *tcpip.KeepaliveIdleOption:
 		e.keepalive.Lock()
 		*o = tcpip.KeepaliveIdleOption(e.keepalive.idle)
 		e.keepalive.Unlock()
 		return nil

 	case *tcpip.KeepaliveIntervalOption:
 		e.keepalive.Lock()
 		*o = tcpip.KeepaliveIntervalOption(e.keepalive.interval)
 		e.keepalive.Unlock()
 		return nil

 	case *tcpip.KeepaliveCountOption:
 		e.keepalive.Lock()
 		*o = tcpip.KeepaliveCountOption(e.keepalive.count)
 		e.keepalive.Unlock()
 		return nil

 	case *tcpip.OutOfBandInlineOption:
 		// We don't currently support disabling this option.
 		*o = 1
 		return nil

 	case *tcpip.BroadcastOption:
 		e.mu.Lock()
 		v := e.broadcast
 		e.mu.Unlock()

 		*o = 0
 		if v {
 			*o = 1
 		}
 		return nil

 	case *tcpip.CongestionControlOption:
 		e.mu.Lock()
 		*o = e.cc
 		e.mu.Unlock()
 		return nil

 	case *tcpip.IPv4TOSOption:
 		e.mu.RLock()
 		*o = tcpip.IPv4TOSOption(e.sendTOS)
 		e.mu.RUnlock()
 		return nil

 	case *tcpip.IPv6TrafficClassOption:
 		e.mu.RLock()
 		*o = tcpip.IPv6TrafficClassOption(e.sendTOS)
 		e.mu.RUnlock()
 		return nil

 	default:
 		return tcpip.ErrUnknownProtocolOption
 	}
 }

 func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocolNumber, *tcpip.Error) {
 	netProto := e.NetProto
 	if header.IsV4MappedAddress(addr.Addr) {
 		// Fail if using a v4 mapped address on a v6only endpoint.
 		if e.v6only {
 			return 0, tcpip.ErrNoRoute
 		}

 		netProto = header.IPv4ProtocolNumber
 		addr.Addr = addr.Addr[header.IPv6AddressSize-header.IPv4AddressSize:]
 		if addr.Addr == header.IPv4Any {
 			addr.Addr = ""
 		}
 	}

 	// Fail if we're bound to an address length different from the one we're
 	// checking.
 	if l := len(e.ID.LocalAddress); l != 0 && len(addr.Addr) != 0 && l != len(addr.Addr) {
 		return 0, tcpip.ErrInvalidEndpointState
 	}

 	return netProto, nil
 }

 // Disconnect implements tcpip.Endpoint.Disconnect.
 func (*endpoint) Disconnect() *tcpip.Error {
 	return tcpip.ErrNotSupported
 }

 // Connect connects the endpoint to its peer.
 func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 	err := e.connect(addr, true, true)
 	if err != nil && !err.IgnoreStats() {
 		e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 		e.stats.FailedConnectionAttempts.Increment()
 	}
 	return err
 }

 // connect connects the endpoint to its peer. In the normal non-S/R case, the
 // new connection is expected to run the main goroutine and perform handshake.
 // In restore of previously connected endpoints, both ends will be passively
 // created (so no new handshaking is done); for stack-accepted connections not
 // yet accepted by the app, they are restored without running the main goroutine
 // here.
 func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tcpip.Error {
 	e.mu.Lock()
 	defer e.mu.Unlock()

 	connectingAddr := addr.Addr

 	netProto, err := e.checkV4Mapped(&addr)
 	if err != nil {
 		return err
 	}

 	if e.state.connected() {
 		// The endpoint is already connected. If caller hasn't been
 		// notified yet, return success.
 		if !e.isConnectNotified {
 			e.isConnectNotified = true
 			return nil
 		}
 		// Otherwise return that it's already connected.
 		return tcpip.ErrAlreadyConnected
 	}

 	nicid := addr.NIC
 	switch e.state {
 	case StateBound:
 		// If we're already bound to a NIC but the caller is requesting
 		// that we use a different one now, we cannot proceed.
 		if e.boundNICID == 0 {
 			break
 		}

 		if nicid != 0 && nicid != e.boundNICID {
 			return tcpip.ErrNoRoute
 		}

 		nicid = e.boundNICID

 	case StateInitial:
 		// Nothing to do. We'll eventually fill-in the gaps in the ID (if any)
 		// when we find a route.

 	case StateConnecting, StateSynSent, StateSynRecv:
 		// A connection request has already been issued but hasn't completed
 		// yet.
 		return tcpip.ErrAlreadyConnecting

 	case StateError:
 		return e.HardError

 	default:
 		return tcpip.ErrInvalidEndpointState
 	}

 	// Find a route to the desired destination.
 	r, err := e.stack.FindRoute(nicid, e.ID.LocalAddress, addr.Addr, netProto, false /* multicastLoop */)
 	if err != nil {
 		return err
 	}
 	defer r.Release()

 	origID := e.ID

 	netProtos := []tcpip.NetworkProtocolNumber{netProto}
 	e.ID.LocalAddress = r.LocalAddress
 	e.ID.RemoteAddress = r.RemoteAddress
 	e.ID.RemotePort = addr.Port

 	if e.ID.LocalPort != 0 {
 		// The endpoint is bound to a port, attempt to register it.
 		err := e.stack.RegisterTransportEndpoint(nicid, netProtos, ProtocolNumber, e.ID, e, e.reusePort, e.bindToDevice)
 		if err != nil {
 			return err
 		}
 	} else {
 		// The endpoint doesn't have a local port yet, so try to get
 		// one. Make sure that it isn't one that will result in the same
 		// address/port for both local and remote (otherwise this
 		// endpoint would be trying to connect to itself).
 		sameAddr := e.ID.LocalAddress == e.ID.RemoteAddress

 		// Calculate a port offset based on the destination IP/port and
 		// src IP to ensure that for a given tuple (srcIP, destIP,
 		// destPort) the offset used as a starting point is the same to
 		// ensure that we can cycle through the port space effectively.
 		h := jenkins.Sum32(e.stack.PortSeed())
 		h.Write([]byte(e.ID.LocalAddress))
 		h.Write([]byte(e.ID.RemoteAddress))
 		portBuf := make([]byte, 2)
 		binary.LittleEndian.PutUint16(portBuf, e.ID.RemotePort)
 		h.Write(portBuf)
 		portOffset := h.Sum32()

 		if _, err := e.stack.PickEphemeralPortStable(portOffset, func(p uint16) (bool, *tcpip.Error) {
 			if sameAddr && p == e.ID.RemotePort {
 				return false, nil
 			}
 			// reusePort is false below because connect cannot reuse a port even if
 			// reusePort was set.
 			if !e.stack.IsPortAvailable(netProtos, ProtocolNumber, e.ID.LocalAddress, p, false /* reusePort */, e.bindToDevice) {
 				return false, nil
 			}

 			id := e.ID
 			id.LocalPort = p
 			switch e.stack.RegisterTransportEndpoint(nicid, netProtos, ProtocolNumber, id, e, e.reusePort, e.bindToDevice) {
 			case nil:
 				e.ID = id
 				return true, nil
 			case tcpip.ErrPortInUse:
 				return false, nil
 			default:
 				return false, err
 			}
 		}); err != nil {
 			return err
 		}
 	}

 	// Remove the port reservation. This can happen when Bind is called
 	// before Connect: in such a case we don't want to hold on to
 	// reservations anymore.
 	if e.isPortReserved {
 		e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, origID.LocalAddress, origID.LocalPort, e.bindToDevice)
 		e.isPortReserved = false
 	}

 	e.isRegistered = true
 	e.state = StateConnecting
 	e.route = r.Clone()
 	e.boundNICID = nicid
 	e.effectiveNetProtos = netProtos
 	e.connectingAddress = connectingAddr

 	e.initGSO()

 	// Connect in the restore phase does not perform handshake. Restore its
 	// connection setting here.
 	if !handshake {
 		e.segmentQueue.mu.Lock()
 		for _, l := range []segmentList{e.segmentQueue.list, e.sndQueue, e.snd.writeList} {
 			for s := l.Front(); s != nil; s = s.Next() {
 				s.id = e.ID
 				s.route = r.Clone()
 				e.sndWaker.Assert()
 			}
 		}
 		e.segmentQueue.mu.Unlock()
 		e.snd.updateMaxPayloadSize(int(e.route.MTU()), 0)
 		e.state = StateEstablished
 		e.stack.Stats().TCP.CurrentEstablished.Increment()
 	}

 	if run {
 		e.workerRunning = true
 		e.stack.Stats().TCP.ActiveConnectionOpenings.Increment()
 		go e.protocolMainLoop(handshake)
 	}

 	return tcpip.ErrConnectStarted
 }

 // ConnectEndpoint is not supported.
 func (*endpoint) ConnectEndpoint(tcpip.Endpoint) *tcpip.Error {
 	return tcpip.ErrInvalidEndpointState
 }

 // Shutdown closes the read and/or write end of the endpoint connection to its
 // peer.
 func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error {
 	e.mu.Lock()
 	defer e.mu.Unlock()
 	e.shutdownFlags |= flags

 	switch {
 	case e.state.connected():
 		// Close for read.
 		if (e.shutdownFlags & tcpip.ShutdownRead) != 0 {
 			// Mark read side as closed.
 			e.rcvListMu.Lock()
 			e.rcvClosed = true
 			rcvBufUsed := e.rcvBufUsed
 			e.rcvListMu.Unlock()

 			// If we're fully closed and we have unread data we need to abort
 			// the connection with a RST.
 			if (e.shutdownFlags&tcpip.ShutdownWrite) != 0 && rcvBufUsed > 0 {
 				e.notifyProtocolGoroutine(notifyReset)
 				return nil
 			}
 		}

 		// Close for write.
 		if (e.shutdownFlags & tcpip.ShutdownWrite) != 0 {
 			e.sndBufMu.Lock()

 			if e.sndClosed {
 				// Already closed.
 				e.sndBufMu.Unlock()
 				break
 			}

 			// Queue fin segment.
 			s := newSegmentFromView(&e.route, e.ID, nil)
 			e.sndQueue.PushBack(s)
 			e.sndBufInQueue++

 			// Mark endpoint as closed.
 			e.sndClosed = true

 			e.sndBufMu.Unlock()

 			// Tell protocol goroutine to close.
 			e.sndCloseWaker.Assert()
 		}

 	case e.state == StateListen:
 		// Tell protocolListenLoop to stop.
 		if flags&tcpip.ShutdownRead != 0 {
 			e.notifyProtocolGoroutine(notifyClose)
 		}

 	default:
 		return tcpip.ErrNotConnected
 	}

 	return nil
 }

 // Listen puts the endpoint in "listen" mode, which allows it to accept
 // new connections.
 func (e *endpoint) Listen(backlog int) *tcpip.Error {
 	err := e.listen(backlog)
 	if err != nil && !err.IgnoreStats() {
 		e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 		e.stats.FailedConnectionAttempts.Increment()
 	}
 	return err
 }

 func (e *endpoint) listen(backlog int) *tcpip.Error {
 	e.mu.Lock()
 	defer e.mu.Unlock()

 	// Allow the backlog to be adjusted if the endpoint is not shutting down.
 	// When the endpoint shuts down, it sets workerCleanup to true, and from
 	// that point onward, acceptedChan is the responsibility of the cleanup()
 	// method (and should not be touched anywhere else, including here).
 	if e.state == StateListen && !e.workerCleanup {
 		// Adjust the size of the channel iff we can fix existing
 		// pending connections into the new one.
 		if len(e.acceptedChan) > backlog {
 			return tcpip.ErrInvalidEndpointState
 		}
 		if cap(e.acceptedChan) == backlog {
 			return nil
 		}
 		origChan := e.acceptedChan
 		e.acceptedChan = make(chan *endpoint, backlog)
 		close(origChan)
 		for ep := range origChan {
 			e.acceptedChan <- ep
 		}
 		return nil
 	}

 	// Endpoint must be bound before it can transition to listen mode.
 	if e.state != StateBound {
 		e.stats.ReadErrors.InvalidEndpointState.Increment()
 		return tcpip.ErrInvalidEndpointState
 	}

 	// Register the endpoint.
 	if err := e.stack.RegisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.reusePort, e.bindToDevice); err != nil {
 		return err
 	}

 	e.isRegistered = true
 	e.state = StateListen
 	if e.acceptedChan == nil {
 		e.acceptedChan = make(chan *endpoint, backlog)
 	}
 	e.workerRunning = true

 	go e.protocolListenLoop(
 		seqnum.Size(e.receiveBufferAvailable()))

 	return nil
 }

 // startAcceptedLoop sets up required state and starts a goroutine with the
 // main loop for accepted connections.
 func (e *endpoint) startAcceptedLoop(waiterQueue *waiter.Queue) {
 	e.waiterQueue = waiterQueue
 	e.workerRunning = true
 	go e.protocolMainLoop(false)
 }

 // Accept returns a new endpoint if a peer has established a connection
 // to an endpoint previously set to listen mode.
 func (e *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
 	e.mu.RLock()
 	defer e.mu.RUnlock()

 	// Endpoint must be in listen state before it can accept connections.
 	if e.state != StateListen {
 		return nil, nil, tcpip.ErrInvalidEndpointState
 	}

 	// Get the new accepted endpoint.
 	var n *endpoint
 	select {
 	case n = <-e.acceptedChan:
 	default:
 		return nil, nil, tcpip.ErrWouldBlock
 	}

 	// Start the protocol goroutine.
 	wq := &waiter.Queue{}
 	n.startAcceptedLoop(wq)
 	e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()

 	return n, wq, nil
 }

 // Bind binds the endpoint to a specific local port and optionally address.
 func (e *endpoint) Bind(addr tcpip.FullAddress) (err *tcpip.Error) {
 	e.mu.Lock()
 	defer e.mu.Unlock()

 	// Don't allow binding once endpoint is not in the initial state
 	// anymore. This is because once the endpoint goes into a connected or
 	// listen state, it is already bound.
 	if e.state != StateInitial {
 		return tcpip.ErrAlreadyBound
 	}

 	e.BindAddr = addr.Addr
 	netProto, err := e.checkV4Mapped(&addr)
 	if err != nil {
 		return err
 	}

 	// Expand netProtos to include v4 and v6 if the caller is binding to a
 	// wildcard (empty) address, and this is an IPv6 endpoint with v6only
 	// set to false.
 	netProtos := []tcpip.NetworkProtocolNumber{netProto}
 	if netProto == header.IPv6ProtocolNumber && !e.v6only && addr.Addr == "" {
 		netProtos = []tcpip.NetworkProtocolNumber{
 			header.IPv6ProtocolNumber,
 			header.IPv4ProtocolNumber,
 		}
 	}

 	port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port, e.reusePort, e.bindToDevice)
 	if err != nil {
 		return err
 	}

 	e.isPortReserved = true
 	e.effectiveNetProtos = netProtos
 	e.ID.LocalPort = port

 	// Any failures beyond this point must remove the port registration.
 	defer func(bindToDevice tcpip.NICID) {
 		if err != nil {
 			e.stack.ReleasePort(netProtos, ProtocolNumber, addr.Addr, port, bindToDevice)
 			e.isPortReserved = false
 			e.effectiveNetProtos = nil
 			e.ID.LocalPort = 0
 			e.ID.LocalAddress = ""
 			e.boundNICID = 0
 		}
 	}(e.bindToDevice)

 	// If an address is specified, we must ensure that it's one of our
 	// local addresses.
 	if len(addr.Addr) != 0 {
 		nic := e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr)
 		if nic == 0 {
 			return tcpip.ErrBadLocalAddress
 		}

 		e.boundNICID = nic
 		e.ID.LocalAddress = addr.Addr
 	}

 	// Mark endpoint as bound.
 	e.state = StateBound

 	return nil
 }

 // GetLocalAddress returns the address to which the endpoint is bound.
 func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
 	e.mu.RLock()
 	defer e.mu.RUnlock()

 	return tcpip.FullAddress{
 		Addr: e.ID.LocalAddress,
 		Port: e.ID.LocalPort,
 		NIC:  e.boundNICID,
 	}, nil
 }

 // GetRemoteAddress returns the address to which the endpoint is connected.
 func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
 	e.mu.RLock()
 	defer e.mu.RUnlock()

 	if !e.state.connected() {
 		return tcpip.FullAddress{}, tcpip.ErrNotConnected
 	}

 	return tcpip.FullAddress{
 		Addr: e.ID.RemoteAddress,
 		Port: e.ID.RemotePort,
 		NIC:  e.boundNICID,
 	}, nil
 }

 // HandlePacket is called by the stack when new packets arrive to this transport
 // endpoint.
 func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv buffer.VectorisedView) {
 	s := newSegment(r, id, vv)
 	if !s.parse() {
 		e.stack.Stats().MalformedRcvdPackets.Increment()
 		e.stack.Stats().TCP.InvalidSegmentsReceived.Increment()
 		e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
 		s.decRef()
 		return
 	}

 	if !s.csumValid {
 		e.stack.Stats().MalformedRcvdPackets.Increment()
 		e.stack.Stats().TCP.ChecksumErrors.Increment()
 		e.stats.ReceiveErrors.ChecksumErrors.Increment()
 		s.decRef()
 		return
 	}

 	e.stack.Stats().TCP.ValidSegmentsReceived.Increment()
 	e.stats.SegmentsReceived.Increment()
 	if (s.flags & header.TCPFlagRst) != 0 {
 		e.stack.Stats().TCP.ResetsReceived.Increment()
 	}

 	// Send packet to worker goroutine.
 	if e.segmentQueue.enqueue(s) {
 		e.newSegmentWaker.Assert()
 	} else {
 		// The queue is full, so we drop the segment.
 		e.stack.Stats().DroppedPackets.Increment()
 		e.stats.ReceiveErrors.SegmentQueueDropped.Increment()
 		s.decRef()
 	}
 }

 // HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
 func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, vv buffer.VectorisedView) {
 	switch typ {
 	case stack.ControlPacketTooBig:
 		e.sndBufMu.Lock()
 		e.packetTooBigCount++
 		if v := int(extra); v < e.sndMTU {
 			e.sndMTU = v
 		}
 		e.sndBufMu.Unlock()

 		e.notifyProtocolGoroutine(notifyMTUChanged)
 	}
 }

 // updateSndBufferUsage is called by the protocol goroutine when room opens up
 // in the send buffer. The number of newly available bytes is v.
 func (e *endpoint) updateSndBufferUsage(v int) {
 	e.sndBufMu.Lock()
 	notify := e.sndBufUsed >= e.sndBufSize>>1
 	e.sndBufUsed -= v
 	// We only notify when there is half the sndBufSize available after
 	// a full buffer event occurs. This ensures that we don't wake up
 	// writers to queue just 1-2 segments and go back to sleep.
 	notify = notify && e.sndBufUsed < e.sndBufSize>>1
 	e.sndBufMu.Unlock()

 	if notify {
 		e.waiterQueue.Notify(waiter.EventOut)
 	}
 }

 // readyToRead is called by the protocol goroutine when a new segment is ready
 // to be read, or when the connection is closed for receiving (in which case
 // s will be nil).
 func (e *endpoint) readyToRead(s *segment) {
 	e.rcvListMu.Lock()
 	if s != nil {
 		s.incRef()
 		e.rcvBufUsed += s.data.Size()
 		// Check if the receive window is now closed. If so make sure
 		// we set the zero window before we deliver the segment to ensure
 		// that a subsequent read of the segment will correctly trigger
 		// a non-zero notification.
 		if avail := e.receiveBufferAvailableLocked(); avail>>e.rcv.rcvWndScale == 0 {
 			e.stats.ReceiveErrors.ZeroRcvWindowState.Increment()
 			e.zeroWindow = true
 		}
 		e.rcvList.PushBack(s)
 	} else {
 		e.rcvClosed = true
 	}
 	e.rcvListMu.Unlock()

 	e.waiterQueue.Notify(waiter.EventIn)
 }

 // receiveBufferAvailableLocked calculates how many bytes are still available
 // in the receive buffer.
 // rcvListMu must be held when this function is called.
 func (e *endpoint) receiveBufferAvailableLocked() int {
 	// We may use more bytes than the buffer size when the receive buffer
 	// shrinks.
 	if e.rcvBufUsed >= e.rcvBufSize {
 		return 0
 	}

 	return e.rcvBufSize - e.rcvBufUsed
 }

 // receiveBufferAvailable calculates how many bytes are still available in the
 // receive buffer.
 func (e *endpoint) receiveBufferAvailable() int {
 	e.rcvListMu.Lock()
 	available := e.receiveBufferAvailableLocked()
 	e.rcvListMu.Unlock()
 	return available
 }

 func (e *endpoint) receiveBufferSize() int {
 	e.rcvListMu.Lock()
 	size := e.rcvBufSize
 	e.rcvListMu.Unlock()

 	return size
 }

 func (e *endpoint) maxReceiveBufferSize() int {
 	var rs ReceiveBufferSizeOption
 	if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil {
 		// As a fallback return the hardcoded max buffer size.
 		return MaxBufferSize
 	}
 	return rs.Max
 }

 // rcvWndScaleForHandshake computes the receive window scale to offer to the
 // peer when window scaling is enabled (true by default). If auto-tuning is
 // disabled then the window scaling factor is based on the size of the
 // receiveBuffer otherwise we use the max permissible receive buffer size to
 // compute the scale.
 func (e *endpoint) rcvWndScaleForHandshake() int {
 	bufSizeForScale := e.receiveBufferSize()

 	e.rcvListMu.Lock()
 	autoTuningDisabled := e.rcvAutoParams.disabled
 	e.rcvListMu.Unlock()
 	if autoTuningDisabled {
 		return FindWndScale(seqnum.Size(bufSizeForScale))
 	}

 	return FindWndScale(seqnum.Size(e.maxReceiveBufferSize()))
 }

 // updateRecentTimestamp updates the recent timestamp using the algorithm
 // described in https://tools.ietf.org/html/rfc7323#section-4.3
 func (e *endpoint) updateRecentTimestamp(tsVal uint32, maxSentAck seqnum.Value, segSeq seqnum.Value) {
 	if e.sendTSOk && seqnum.Value(e.recentTS).LessThan(seqnum.Value(tsVal)) && segSeq.LessThanEq(maxSentAck) {
 		e.recentTS = tsVal
 	}
 }

 // maybeEnableTimestamp marks the timestamp option enabled for this endpoint if
 // the SYN options indicate that timestamp option was negotiated. It also
 // initializes the recentTS with the value provided in synOpts.TSval.
 func (e *endpoint) maybeEnableTimestamp(synOpts *header.TCPSynOptions) {
 	if synOpts.TS {
 		e.sendTSOk = true
 		e.recentTS = synOpts.TSVal
 	}
 }

 // timestamp returns the timestamp value to be used in the TSVal field of the
 // timestamp option for outgoing TCP segments for a given endpoint.
 func (e *endpoint) timestamp() uint32 {
 	return tcpTimeStamp(e.tsOffset)
 }

 // tcpTimeStamp returns a timestamp offset by the provided offset. This is
 // not inlined above as it's used when SYN cookies are in use and endpoint
 // is not created at the time when the SYN cookie is sent.
 func tcpTimeStamp(offset uint32) uint32 {
 	now := time.Now()
 	return uint32(now.Unix()*1000+int64(now.Nanosecond()/1e6)) + offset
 }

 // timeStampOffset returns a randomized timestamp offset to be used when sending
 // timestamp values in a timestamp option for a TCP segment.
 func timeStampOffset() uint32 {
 	b := make([]byte, 4)
 	if _, err := rand.Read(b); err != nil {
 		panic(err)
 	}
 	// Initialize a random tsOffset that will be added to the recentTS
 	// everytime the timestamp is sent when the Timestamp option is enabled.
 	//
 	// See https://tools.ietf.org/html/rfc7323#section-5.4 for details on
 	// why this is required.
 	//
 	// NOTE: This is not completely to spec as normally this should be
 	// initialized in a manner analogous to how sequence numbers are
 	// randomized per connection basis. But for now this is sufficient.
 	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
 }

 // maybeEnableSACKPermitted marks the SACKPermitted option enabled for this endpoint
 // if the SYN options indicate that the SACK option was negotiated and the TCP
 // stack is configured to enable TCP SACK option.
 func (e *endpoint) maybeEnableSACKPermitted(synOpts *header.TCPSynOptions) {
 	var v SACKEnabled
 	if err := e.stack.TransportProtocolOption(ProtocolNumber, &v); err != nil {
 		// Stack doesn't support SACK. So just return.
 		return
 	}
 	if bool(v) && synOpts.SACKPermitted {
 		e.sackPermitted = true
 	}
 }

 // maxOptionSize return the maximum size of TCP options.
 func (e *endpoint) maxOptionSize() (size int) {
 	var maxSackBlocks [header.TCPMaxSACKBlocks]header.SACKBlock
 	options := e.makeOptions(maxSackBlocks[:])
 	size = len(options)
 	putOptions(options)

 	return size
 }

 // completeState makes a full copy of the endpoint and returns it. This is used
 // before invoking the probe. The state returned may not be fully consistent if
 // there are intervening syscalls when the state is being copied.
 func (e *endpoint) completeState() stack.TCPEndpointState {
 	var s stack.TCPEndpointState
 	s.SegTime = time.Now()

 	// Copy EndpointID.
 	e.mu.Lock()
 	s.ID = stack.TCPEndpointID(e.ID)
 	e.mu.Unlock()

 	// Copy endpoint rcv state.
 	e.rcvListMu.Lock()
 	s.RcvBufSize = e.rcvBufSize
 	s.RcvBufUsed = e.rcvBufUsed
 	s.RcvClosed = e.rcvClosed
 	s.RcvAutoParams.MeasureTime = e.rcvAutoParams.measureTime
 	s.RcvAutoParams.CopiedBytes = e.rcvAutoParams.copied
 	s.RcvAutoParams.PrevCopiedBytes = e.rcvAutoParams.prevCopied
 	s.RcvAutoParams.RTT = e.rcvAutoParams.rtt
 	s.RcvAutoParams.RTTMeasureSeqNumber = e.rcvAutoParams.rttMeasureSeqNumber
 	s.RcvAutoParams.RTTMeasureTime = e.rcvAutoParams.rttMeasureTime
 	s.RcvAutoParams.Disabled = e.rcvAutoParams.disabled
 	e.rcvListMu.Unlock()

 	// Endpoint TCP Option state.
 	s.SendTSOk = e.sendTSOk
 	s.RecentTS = e.recentTS
 	s.TSOffset = e.tsOffset
 	s.SACKPermitted = e.sackPermitted
 	s.SACK.Blocks = make([]header.SACKBlock, e.sack.NumBlocks)
 	copy(s.SACK.Blocks, e.sack.Blocks[:e.sack.NumBlocks])
 	s.SACK.ReceivedBlocks, s.SACK.MaxSACKED = e.scoreboard.Copy()

 	// Copy endpoint send state.
 	e.sndBufMu.Lock()
 	s.SndBufSize = e.sndBufSize
 	s.SndBufUsed = e.sndBufUsed
 	s.SndClosed = e.sndClosed
 	s.SndBufInQueue = e.sndBufInQueue
 	s.PacketTooBigCount = e.packetTooBigCount
 	s.SndMTU = e.sndMTU
 	e.sndBufMu.Unlock()

 	// Copy receiver state.
 	s.Receiver = stack.TCPReceiverState{
 		RcvNxt:         e.rcv.rcvNxt,
 		RcvAcc:         e.rcv.rcvAcc,
 		RcvWndScale:    e.rcv.rcvWndScale,
 		PendingBufUsed: e.rcv.pendingBufUsed,
 		PendingBufSize: e.rcv.pendingBufSize,
 	}

 	// Copy sender state.
 	s.Sender = stack.TCPSenderState{
 		LastSendTime: e.snd.lastSendTime,
 		DupAckCount:  e.snd.dupAckCount,
 		FastRecovery: stack.TCPFastRecoveryState{
 			Active:    e.snd.fr.active,
 			First:     e.snd.fr.first,
 			Last:      e.snd.fr.last,
 			MaxCwnd:   e.snd.fr.maxCwnd,
 			HighRxt:   e.snd.fr.highRxt,
 			RescueRxt: e.snd.fr.rescueRxt,
 		},
 		SndCwnd:          e.snd.sndCwnd,
 		Ssthresh:         e.snd.sndSsthresh,
 		SndCAAckCount:    e.snd.sndCAAckCount,
 		Outstanding:      e.snd.outstanding,
 		SndWnd:           e.snd.sndWnd,
 		SndUna:           e.snd.sndUna,
 		SndNxt:           e.snd.sndNxt,
 		RTTMeasureSeqNum: e.snd.rttMeasureSeqNum,
 		RTTMeasureTime:   e.snd.rttMeasureTime,
 		Closed:           e.snd.closed,
 		RTO:              e.snd.rto,
 		MaxPayloadSize:   e.snd.maxPayloadSize,
 		SndWndScale:      e.snd.sndWndScale,
 		MaxSentAck:       e.snd.maxSentAck,
 	}
 	e.snd.rtt.Lock()
 	s.Sender.SRTT = e.snd.rtt.srtt
 	s.Sender.SRTTInited = e.snd.rtt.srttInited
 	e.snd.rtt.Unlock()

 	if cubic, ok := e.snd.cc.(*cubicState); ok {
 		s.Sender.Cubic = stack.TCPCubicState{
 			WMax:                    cubic.wMax,
 			WLastMax:                cubic.wLastMax,
 			T:                       cubic.t,
 			TimeSinceLastCongestion: time.Since(cubic.t),
 			C:                       cubic.c,
 			K:                       cubic.k,
 			Beta:                    cubic.beta,
 			WC:                      cubic.wC,
 			WEst:                    cubic.wEst,
 		}
 	}
 	return s
 }

 func (e *endpoint) initHardwareGSO() {
 	gso := &stack.GSO{}
 	switch e.route.NetProto {
 	case header.IPv4ProtocolNumber:
 		gso.Type = stack.GSOTCPv4
 		gso.L3HdrLen = header.IPv4MinimumSize
 	case header.IPv6ProtocolNumber:
 		gso.Type = stack.GSOTCPv6
 		gso.L3HdrLen = header.IPv6MinimumSize
 	default:
 		panic(fmt.Sprintf("Unknown netProto: %v", e.NetProto))
 	}
 	gso.NeedsCsum = true
 	gso.CsumOffset = header.TCPChecksumOffset
 	gso.MaxSize = e.route.GSOMaxSize()
 	e.gso = gso
 }

 func (e *endpoint) initGSO() {
 	if e.route.Capabilities()&stack.CapabilityHardwareGSO != 0 {
 		e.initHardwareGSO()
 	} else if e.route.Capabilities()&stack.CapabilitySoftwareGSO != 0 {
 		e.gso = &stack.GSO{
 			MaxSize:   e.route.GSOMaxSize(),
 			Type:      stack.GSOSW,
 			NeedsCsum: false,
 		}
 	}
 }

 // State implements tcpip.Endpoint.State. It exports the endpoint's protocol
 // state for diagnostics.
 func (e *endpoint) State() uint32 {
 	e.mu.Lock()
 	defer e.mu.Unlock()
 	return uint32(e.state)
 }

 // Info returns a copy of the endpoint info.
 func (e *endpoint) Info() tcpip.EndpointInfo {
 	e.mu.RLock()
 	// Make a copy of the endpoint info.
 	ret := e.EndpointInfo
 	e.mu.RUnlock()
 	return &ret
 }

 // Stats returns a pointer to the endpoint stats.
 func (e *endpoint) Stats() tcpip.EndpointStats {
 	return &e.stats
 }

 func mssForRoute(r *stack.Route) uint16 {
 	return uint16(r.MTU() - header.TCPMinimumSize)
 }