blob: dc37e61a40bbd5edd18e07b44f90dc7ecee58261 [file] [log] [blame]
// Copyright 2020 The gVisor Authors.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package tcpip
import (
// PacketOverheadFactor is used to multiply the value provided by the user on a
// SetSockOpt for setting the send/receive buffer sizes sockets.
const PacketOverheadFactor = 2
// SocketOptionsHandler holds methods that help define endpoint specific
// behavior for socket level socket options. These must be implemented by
// endpoints to get notified when socket level options are set.
type SocketOptionsHandler interface {
// OnReuseAddressSet is invoked when SO_REUSEADDR is set for an endpoint.
OnReuseAddressSet(v bool)
// OnReusePortSet is invoked when SO_REUSEPORT is set for an endpoint.
OnReusePortSet(v bool)
// OnKeepAliveSet is invoked when SO_KEEPALIVE is set for an endpoint.
OnKeepAliveSet(v bool)
// OnDelayOptionSet is invoked when TCP_NODELAY is set for an endpoint.
// Note that v will be the inverse of TCP_NODELAY option.
OnDelayOptionSet(v bool)
// OnCorkOptionSet is invoked when TCP_CORK is set for an endpoint.
OnCorkOptionSet(v bool)
// LastError is invoked when SO_ERROR is read for an endpoint.
LastError() Error
// UpdateLastError updates the endpoint specific last error field.
UpdateLastError(err Error)
// HasNIC is invoked to check if the NIC is valid for SO_BINDTODEVICE.
HasNIC(v int32) bool
// OnSetSendBufferSize is invoked when the send buffer size for an endpoint is
// changed. The handler is invoked with the new value for the socket send
// buffer size. It also returns the newly set value.
OnSetSendBufferSize(v int64) (newSz int64)
// DefaultSocketOptionsHandler is an embeddable type that implements no-op
// implementations for SocketOptionsHandler methods.
type DefaultSocketOptionsHandler struct{}
var _ SocketOptionsHandler = (*DefaultSocketOptionsHandler)(nil)
// OnReuseAddressSet implements SocketOptionsHandler.OnReuseAddressSet.
func (*DefaultSocketOptionsHandler) OnReuseAddressSet(bool) {}
// OnReusePortSet implements SocketOptionsHandler.OnReusePortSet.
func (*DefaultSocketOptionsHandler) OnReusePortSet(bool) {}
// OnKeepAliveSet implements SocketOptionsHandler.OnKeepAliveSet.
func (*DefaultSocketOptionsHandler) OnKeepAliveSet(bool) {}
// OnDelayOptionSet implements SocketOptionsHandler.OnDelayOptionSet.
func (*DefaultSocketOptionsHandler) OnDelayOptionSet(bool) {}
// OnCorkOptionSet implements SocketOptionsHandler.OnCorkOptionSet.
func (*DefaultSocketOptionsHandler) OnCorkOptionSet(bool) {}
// LastError implements SocketOptionsHandler.LastError.
func (*DefaultSocketOptionsHandler) LastError() Error {
return nil
// UpdateLastError implements SocketOptionsHandler.UpdateLastError.
func (*DefaultSocketOptionsHandler) UpdateLastError(Error) {}
// HasNIC implements SocketOptionsHandler.HasNIC.
func (*DefaultSocketOptionsHandler) HasNIC(int32) bool {
return false
// OnSetSendBufferSize implements SocketOptionsHandler.OnSetSendBufferSize.
func (*DefaultSocketOptionsHandler) OnSetSendBufferSize(v int64) (newSz int64) {
return v
// StackHandler holds methods to access the stack options. These must be
// implemented by the stack.
type StackHandler interface {
// Option allows retrieving stack wide options.
Option(option interface{}) Error
// TransportProtocolOption allows retrieving individual protocol level
// option values.
TransportProtocolOption(proto TransportProtocolNumber, option GettableTransportProtocolOption) Error
// SocketOptions contains all the variables which store values for SOL_SOCKET,
// SOL_IP, SOL_IPV6 and SOL_TCP level options.
// +stateify savable
type SocketOptions struct {
handler SocketOptionsHandler
// StackHandler is initialized at the creation time and will not change.
stackHandler StackHandler `state:"manual"`
// These fields are accessed and modified using atomic operations.
// broadcastEnabled determines whether datagram sockets are allowed to
// send packets to a broadcast address.
broadcastEnabled uint32
// passCredEnabled determines whether SCM_CREDENTIALS socket control
// messages are enabled.
passCredEnabled uint32
// noChecksumEnabled determines whether UDP checksum is disabled while
// transmitting for this socket.
noChecksumEnabled uint32
// reuseAddressEnabled determines whether Bind() should allow reuse of
// local address.
reuseAddressEnabled uint32
// reusePortEnabled determines whether to permit multiple sockets to be
// bound to an identical socket address.
reusePortEnabled uint32
// keepAliveEnabled determines whether TCP keepalive is enabled for this
// socket.
keepAliveEnabled uint32
// multicastLoopEnabled determines whether multicast packets sent over a
// non-loopback interface will be looped back.
multicastLoopEnabled uint32
// receiveTOSEnabled is used to specify if the TOS ancillary message is
// passed with incoming packets.
receiveTOSEnabled uint32
// receiveTClassEnabled is used to specify if the IPV6_TCLASS ancillary
// message is passed with incoming packets.
receiveTClassEnabled uint32
// receivePacketInfoEnabled is used to specify if more inforamtion is
// provided with incoming packets such as interface index and address.
receivePacketInfoEnabled uint32
// hdrIncludeEnabled is used to indicate for a raw endpoint that all packets
// being written have an IP header and the endpoint should not attach an IP
// header.
hdrIncludedEnabled uint32
// v6OnlyEnabled is used to determine whether an IPv6 socket is to be
// restricted to sending and receiving IPv6 packets only.
v6OnlyEnabled uint32
// quickAckEnabled is used to represent the value of TCP_QUICKACK option.
// It currently does not have any effect on the TCP endpoint.
quickAckEnabled uint32
// delayOptionEnabled is used to specify if data should be sent out immediately
// by the transport protocol. For TCP, it determines if the Nagle algorithm
// is on or off.
delayOptionEnabled uint32
// corkOptionEnabled is used to specify if data should be held until segments
// are full by the TCP transport protocol.
corkOptionEnabled uint32
// receiveOriginalDstAddress is used to specify if the original destination of
// the incoming packet should be returned as an ancillary message.
receiveOriginalDstAddress uint32
// recvErrEnabled determines whether extended reliable error message passing
// is enabled.
recvErrEnabled uint32
// errQueue is the per-socket error queue. It is protected by errQueueMu.
errQueueMu sync.Mutex `state:"nosave"`
errQueue sockErrorList
// bindToDevice determines the device to which the socket is bound.
bindToDevice int32
// getSendBufferLimits provides the handler to get the min, default and
// max size for send buffer. It is initialized at the creation time and
// will not change.
getSendBufferLimits GetSendBufferLimits `state:"manual"`
// sendBufferSize determines the send buffer size for this socket.
sendBufferSize int64
// mu protects the access to the below fields.
mu sync.Mutex `state:"nosave"`
// linger determines the amount of time the socket should linger before
// close. We currently implement this option for TCP socket only.
linger LingerOption
// InitHandler initializes the handler. This must be called before using the
// socket options utility.
func (so *SocketOptions) InitHandler(handler SocketOptionsHandler, stack StackHandler, getSendBufferLimits GetSendBufferLimits) {
so.handler = handler
so.stackHandler = stack
so.getSendBufferLimits = getSendBufferLimits
func storeAtomicBool(addr *uint32, v bool) {
var val uint32
if v {
val = 1
atomic.StoreUint32(addr, val)
// SetLastError sets the last error for a socket.
func (so *SocketOptions) SetLastError(err Error) {
// GetBroadcast gets value for SO_BROADCAST option.
func (so *SocketOptions) GetBroadcast() bool {
return atomic.LoadUint32(&so.broadcastEnabled) != 0
// SetBroadcast sets value for SO_BROADCAST option.
func (so *SocketOptions) SetBroadcast(v bool) {
storeAtomicBool(&so.broadcastEnabled, v)
// GetPassCred gets value for SO_PASSCRED option.
func (so *SocketOptions) GetPassCred() bool {
return atomic.LoadUint32(&so.passCredEnabled) != 0
// SetPassCred sets value for SO_PASSCRED option.
func (so *SocketOptions) SetPassCred(v bool) {
storeAtomicBool(&so.passCredEnabled, v)
// GetNoChecksum gets value for SO_NO_CHECK option.
func (so *SocketOptions) GetNoChecksum() bool {
return atomic.LoadUint32(&so.noChecksumEnabled) != 0
// SetNoChecksum sets value for SO_NO_CHECK option.
func (so *SocketOptions) SetNoChecksum(v bool) {
storeAtomicBool(&so.noChecksumEnabled, v)
// GetReuseAddress gets value for SO_REUSEADDR option.
func (so *SocketOptions) GetReuseAddress() bool {
return atomic.LoadUint32(&so.reuseAddressEnabled) != 0
// SetReuseAddress sets value for SO_REUSEADDR option.
func (so *SocketOptions) SetReuseAddress(v bool) {
storeAtomicBool(&so.reuseAddressEnabled, v)
// GetReusePort gets value for SO_REUSEPORT option.
func (so *SocketOptions) GetReusePort() bool {
return atomic.LoadUint32(&so.reusePortEnabled) != 0
// SetReusePort sets value for SO_REUSEPORT option.
func (so *SocketOptions) SetReusePort(v bool) {
storeAtomicBool(&so.reusePortEnabled, v)
// GetKeepAlive gets value for SO_KEEPALIVE option.
func (so *SocketOptions) GetKeepAlive() bool {
return atomic.LoadUint32(&so.keepAliveEnabled) != 0
// SetKeepAlive sets value for SO_KEEPALIVE option.
func (so *SocketOptions) SetKeepAlive(v bool) {
storeAtomicBool(&so.keepAliveEnabled, v)
// GetMulticastLoop gets value for IP_MULTICAST_LOOP option.
func (so *SocketOptions) GetMulticastLoop() bool {
return atomic.LoadUint32(&so.multicastLoopEnabled) != 0
// SetMulticastLoop sets value for IP_MULTICAST_LOOP option.
func (so *SocketOptions) SetMulticastLoop(v bool) {
storeAtomicBool(&so.multicastLoopEnabled, v)
// GetReceiveTOS gets value for IP_RECVTOS option.
func (so *SocketOptions) GetReceiveTOS() bool {
return atomic.LoadUint32(&so.receiveTOSEnabled) != 0
// SetReceiveTOS sets value for IP_RECVTOS option.
func (so *SocketOptions) SetReceiveTOS(v bool) {
storeAtomicBool(&so.receiveTOSEnabled, v)
// GetReceiveTClass gets value for IPV6_RECVTCLASS option.
func (so *SocketOptions) GetReceiveTClass() bool {
return atomic.LoadUint32(&so.receiveTClassEnabled) != 0
// SetReceiveTClass sets value for IPV6_RECVTCLASS option.
func (so *SocketOptions) SetReceiveTClass(v bool) {
storeAtomicBool(&so.receiveTClassEnabled, v)
// GetReceivePacketInfo gets value for IP_PKTINFO option.
func (so *SocketOptions) GetReceivePacketInfo() bool {
return atomic.LoadUint32(&so.receivePacketInfoEnabled) != 0
// SetReceivePacketInfo sets value for IP_PKTINFO option.
func (so *SocketOptions) SetReceivePacketInfo(v bool) {
storeAtomicBool(&so.receivePacketInfoEnabled, v)
// GetHeaderIncluded gets value for IP_HDRINCL option.
func (so *SocketOptions) GetHeaderIncluded() bool {
return atomic.LoadUint32(&so.hdrIncludedEnabled) != 0
// SetHeaderIncluded sets value for IP_HDRINCL option.
func (so *SocketOptions) SetHeaderIncluded(v bool) {
storeAtomicBool(&so.hdrIncludedEnabled, v)
// GetV6Only gets value for IPV6_V6ONLY option.
func (so *SocketOptions) GetV6Only() bool {
return atomic.LoadUint32(&so.v6OnlyEnabled) != 0
// SetV6Only sets value for IPV6_V6ONLY option.
// Preconditions: the backing TCP or UDP endpoint must be in initial state.
func (so *SocketOptions) SetV6Only(v bool) {
storeAtomicBool(&so.v6OnlyEnabled, v)
// GetQuickAck gets value for TCP_QUICKACK option.
func (so *SocketOptions) GetQuickAck() bool {
return atomic.LoadUint32(&so.quickAckEnabled) != 0
// SetQuickAck sets value for TCP_QUICKACK option.
func (so *SocketOptions) SetQuickAck(v bool) {
storeAtomicBool(&so.quickAckEnabled, v)
// GetDelayOption gets inverted value for TCP_NODELAY option.
func (so *SocketOptions) GetDelayOption() bool {
return atomic.LoadUint32(&so.delayOptionEnabled) != 0
// SetDelayOption sets inverted value for TCP_NODELAY option.
func (so *SocketOptions) SetDelayOption(v bool) {
storeAtomicBool(&so.delayOptionEnabled, v)
// GetCorkOption gets value for TCP_CORK option.
func (so *SocketOptions) GetCorkOption() bool {
return atomic.LoadUint32(&so.corkOptionEnabled) != 0
// SetCorkOption sets value for TCP_CORK option.
func (so *SocketOptions) SetCorkOption(v bool) {
storeAtomicBool(&so.corkOptionEnabled, v)
// GetReceiveOriginalDstAddress gets value for IP(V6)_RECVORIGDSTADDR option.
func (so *SocketOptions) GetReceiveOriginalDstAddress() bool {
return atomic.LoadUint32(&so.receiveOriginalDstAddress) != 0
// SetReceiveOriginalDstAddress sets value for IP(V6)_RECVORIGDSTADDR option.
func (so *SocketOptions) SetReceiveOriginalDstAddress(v bool) {
storeAtomicBool(&so.receiveOriginalDstAddress, v)
// GetRecvError gets value for IP*_RECVERR option.
func (so *SocketOptions) GetRecvError() bool {
return atomic.LoadUint32(&so.recvErrEnabled) != 0
// SetRecvError sets value for IP*_RECVERR option.
func (so *SocketOptions) SetRecvError(v bool) {
storeAtomicBool(&so.recvErrEnabled, v)
if !v {
// GetLastError gets value for SO_ERROR option.
func (so *SocketOptions) GetLastError() Error {
return so.handler.LastError()
// GetOutOfBandInline gets value for SO_OOBINLINE option.
func (*SocketOptions) GetOutOfBandInline() bool {
return true
// SetOutOfBandInline sets value for SO_OOBINLINE option. We currently do not
// support disabling this option.
func (*SocketOptions) SetOutOfBandInline(bool) {}
// GetLinger gets value for SO_LINGER option.
func (so *SocketOptions) GetLinger() LingerOption {
linger := so.linger
return linger
// SetLinger sets value for SO_LINGER option.
func (so *SocketOptions) SetLinger(linger LingerOption) {
so.linger = linger
// SockErrOrigin represents the constants for error origin.
type SockErrOrigin uint8
const (
// SockExtErrorOriginNone represents an unknown error origin.
SockExtErrorOriginNone SockErrOrigin = iota
// SockExtErrorOriginLocal indicates a local error.
// SockExtErrorOriginICMP indicates an IPv4 ICMP error.
// SockExtErrorOriginICMP6 indicates an IPv6 ICMP error.
// IsICMPErr indicates if the error originated from an ICMP error.
func (origin SockErrOrigin) IsICMPErr() bool {
return origin == SockExtErrorOriginICMP || origin == SockExtErrorOriginICMP6
// SockErrorCause is the cause of a socket error.
type SockErrorCause interface {
// Origin is the source of the error.
Origin() SockErrOrigin
// Type is the origin specific type of error.
Type() uint8
// Code is the origin and type specific error code.
Code() uint8
// Info is any extra information about the error.
Info() uint32
// LocalSockError is a socket error that originated from the local host.
// +stateify savable
type LocalSockError struct {
info uint32
// Origin implements SockErrorCause.
func (*LocalSockError) Origin() SockErrOrigin {
return SockExtErrorOriginLocal
// Type implements SockErrorCause.
func (*LocalSockError) Type() uint8 {
return 0
// Code implements SockErrorCause.
func (*LocalSockError) Code() uint8 {
return 0
// Info implements SockErrorCause.
func (l *LocalSockError) Info() uint32 {
// SockError represents a queue entry in the per-socket error queue.
// +stateify savable
type SockError struct {
// Err is the error caused by the errant packet.
Err Error
// Cause is the detailed cause of the error.
Cause SockErrorCause
// Payload is the errant packet's payload.
Payload []byte
// Dst is the original destination address of the errant packet.
Dst FullAddress
// Offender is the original sender address of the errant packet.
Offender FullAddress
// NetProto is the network protocol being used to transmit the packet.
NetProto NetworkProtocolNumber
// pruneErrQueue resets the queue.
func (so *SocketOptions) pruneErrQueue() {
// DequeueErr dequeues a socket extended error from the error queue and returns
// it. Returns nil if queue is empty.
func (so *SocketOptions) DequeueErr() *SockError {
defer so.errQueueMu.Unlock()
err := so.errQueue.Front()
if err != nil {
return err
// PeekErr returns the error in the front of the error queue. Returns nil if
// the error queue is empty.
func (so *SocketOptions) PeekErr() *SockError {
defer so.errQueueMu.Unlock()
return so.errQueue.Front()
// QueueErr inserts the error at the back of the error queue.
// Preconditions: so.GetRecvError() == true.
func (so *SocketOptions) QueueErr(err *SockError) {
defer so.errQueueMu.Unlock()
// QueueLocalErr queues a local error onto the local queue.
func (so *SocketOptions) QueueLocalErr(err Error, net NetworkProtocolNumber, info uint32, dst FullAddress, payload []byte) {
Err: err,
Cause: &LocalSockError{info: info},
Payload: payload,
Dst: dst,
NetProto: net,
// GetBindToDevice gets value for SO_BINDTODEVICE option.
func (so *SocketOptions) GetBindToDevice() int32 {
return atomic.LoadInt32(&so.bindToDevice)
// SetBindToDevice sets value for SO_BINDTODEVICE option.
func (so *SocketOptions) SetBindToDevice(bindToDevice int32) Error {
if !so.handler.HasNIC(bindToDevice) {
return &ErrUnknownDevice{}
atomic.StoreInt32(&so.bindToDevice, bindToDevice)
return nil
// GetSendBufferSize gets value for SO_SNDBUF option.
func (so *SocketOptions) GetSendBufferSize() int64 {
return atomic.LoadInt64(&so.sendBufferSize)
// SetSendBufferSize sets value for SO_SNDBUF option. notify indicates if the
// stack handler should be invoked to set the send buffer size.
func (so *SocketOptions) SetSendBufferSize(sendBufferSize int64, notify bool) {
v := sendBufferSize
if !notify {
atomic.StoreInt64(&so.sendBufferSize, v)
// Make sure the send buffer size is within the min and max
// allowed.
ss := so.getSendBufferLimits(so.stackHandler)
min := int64(ss.Min)
max := int64(ss.Max)
// Validate the send buffer size with min and max values.
// Multiply it by factor of 2.
if v > max {
v = max
if v < math.MaxInt32/PacketOverheadFactor {
v *= PacketOverheadFactor
if v < min {
v = min
} else {
v = math.MaxInt32
// Notify endpoint about change in buffer size.
newSz := so.handler.OnSetSendBufferSize(v)
atomic.StoreInt64(&so.sendBufferSize, newSz)