blob: 23ca9ee03639dda8fc14571102f79ae05926090a [file] [log] [blame]
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stack
import (
"gvisor.dev/gvisor/pkg/sleep"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/waiter"
)
// NetworkEndpointID is the identifier of a network layer protocol endpoint.
// Currently the local address is sufficient because all supported protocols
// (i.e., IPv4 and IPv6) have different sizes for their addresses.
type NetworkEndpointID struct {
LocalAddress tcpip.Address
}
// TransportEndpointID is the identifier of a transport layer protocol endpoint.
//
// +stateify savable
type TransportEndpointID struct {
// LocalPort is the local port associated with the endpoint.
LocalPort uint16
// LocalAddress is the local [network layer] address associated with
// the endpoint.
LocalAddress tcpip.Address
// RemotePort is the remote port associated with the endpoint.
RemotePort uint16
// RemoteAddress it the remote [network layer] address associated with
// the endpoint.
RemoteAddress tcpip.Address
}
// ControlType is the type of network control message.
type ControlType int
// The following are the allowed values for ControlType values.
const (
ControlPacketTooBig ControlType = iota
ControlPortUnreachable
ControlUnknown
)
// TransportEndpoint is the interface that needs to be implemented by transport
// protocol (e.g., tcp, udp) endpoints that can handle packets.
type TransportEndpoint interface {
// UniqueID returns an unique ID for this transport endpoint.
UniqueID() uint64
// HandlePacket is called by the stack when new packets arrive to
// this transport endpoint. It sets pkt.TransportHeader.
//
// HandlePacket takes ownership of pkt.
HandlePacket(r *Route, id TransportEndpointID, pkt PacketBuffer)
// HandleControlPacket is called by the stack when new control (e.g.
// ICMP) packets arrive to this transport endpoint.
// HandleControlPacket takes ownership of pkt.
HandleControlPacket(id TransportEndpointID, typ ControlType, extra uint32, pkt PacketBuffer)
// Abort initiates an expedited endpoint teardown. It puts the endpoint
// in a closed state and frees all resources associated with it. This
// cleanup may happen asynchronously. Wait can be used to block on this
// asynchronous cleanup.
Abort()
// Wait waits for any worker goroutines owned by the endpoint to stop.
//
// An endpoint can be requested to stop its worker goroutines by calling
// its Close method.
//
// Wait will not block if the endpoint hasn't started any goroutines
// yet, even if it might later.
Wait()
}
// RawTransportEndpoint is the interface that needs to be implemented by raw
// transport protocol endpoints. RawTransportEndpoints receive the entire
// packet - including the network and transport headers - as delivered to
// netstack.
type RawTransportEndpoint interface {
// HandlePacket is called by the stack when new packets arrive to
// this transport endpoint. The packet contains all data from the link
// layer up.
//
// HandlePacket takes ownership of pkt.
HandlePacket(r *Route, pkt PacketBuffer)
}
// PacketEndpoint is the interface that needs to be implemented by packet
// transport protocol endpoints. These endpoints receive link layer headers in
// addition to whatever they contain (usually network and transport layer
// headers and a payload).
type PacketEndpoint interface {
// HandlePacket is called by the stack when new packets arrive that
// match the endpoint.
//
// Implementers should treat packet as immutable and should copy it
// before before modification.
//
// linkHeader may have a length of 0, in which case the PacketEndpoint
// should construct its own ethernet header for applications.
//
// HandlePacket takes ownership of pkt.
HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt PacketBuffer)
}
// TransportProtocol is the interface that needs to be implemented by transport
// protocols (e.g., tcp, udp) that want to be part of the networking stack.
type TransportProtocol interface {
// Number returns the transport protocol number.
Number() tcpip.TransportProtocolNumber
// NewEndpoint creates a new endpoint of the transport protocol.
NewEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
// NewRawEndpoint creates a new raw endpoint of the transport protocol.
NewRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
// MinimumPacketSize returns the minimum valid packet size of this
// transport protocol. The stack automatically drops any packets smaller
// than this targeted at this protocol.
MinimumPacketSize() int
// ParsePorts returns the source and destination ports stored in a
// packet of this protocol.
ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error)
// HandleUnknownDestinationPacket handles packets targeted at this
// protocol but that don't match any existing endpoint. For example,
// it is targeted at a port that have no listeners.
//
// The return value indicates whether the packet was well-formed (for
// stats purposes only).
//
// HandleUnknownDestinationPacket takes ownership of pkt.
HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, pkt PacketBuffer) bool
// SetOption allows enabling/disabling protocol specific features.
// SetOption returns an error if the option is not supported or the
// provided option value is invalid.
SetOption(option interface{}) *tcpip.Error
// Option allows retrieving protocol specific option values.
// Option returns an error if the option is not supported or the
// provided option value is invalid.
Option(option interface{}) *tcpip.Error
// Close requests that any worker goroutines owned by the protocol
// stop.
Close()
// Wait waits for any worker goroutines owned by the protocol to stop.
Wait()
}
// TransportDispatcher contains the methods used by the network stack to deliver
// packets to the appropriate transport endpoint after it has been handled by
// the network layer.
type TransportDispatcher interface {
// DeliverTransportPacket delivers packets to the appropriate
// transport protocol endpoint.
//
// pkt.NetworkHeader must be set before calling DeliverTransportPacket.
//
// DeliverTransportPacket takes ownership of pkt.
DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt PacketBuffer)
// DeliverTransportControlPacket delivers control packets to the
// appropriate transport protocol endpoint.
//
// pkt.NetworkHeader must be set before calling
// DeliverTransportControlPacket.
//
// DeliverTransportControlPacket takes ownership of pkt.
DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt PacketBuffer)
}
// PacketLooping specifies where an outbound packet should be sent.
type PacketLooping byte
const (
// PacketOut indicates that the packet should be passed to the link
// endpoint.
PacketOut PacketLooping = 1 << iota
// PacketLoop indicates that the packet should be handled locally.
PacketLoop
)
// NetworkHeaderParams are the header parameters given as input by the
// transport endpoint to the network.
type NetworkHeaderParams struct {
// Protocol refers to the transport protocol number.
Protocol tcpip.TransportProtocolNumber
// TTL refers to Time To Live field of the IP-header.
TTL uint8
// TOS refers to TypeOfService or TrafficClass field of the IP-header.
TOS uint8
}
// NetworkEndpoint is the interface that needs to be implemented by endpoints
// of network layer protocols (e.g., ipv4, ipv6).
type NetworkEndpoint interface {
// DefaultTTL is the default time-to-live value (or hop limit, in ipv6)
// for this endpoint.
DefaultTTL() uint8
// MTU is the maximum transmission unit for this endpoint. This is
// generally calculated as the MTU of the underlying data link endpoint
// minus the network endpoint max header length.
MTU() uint32
// Capabilities returns the set of capabilities supported by the
// underlying link-layer endpoint.
Capabilities() LinkEndpointCapabilities
// MaxHeaderLength returns the maximum size the network (and lower
// level layers combined) headers can have. Higher levels use this
// information to reserve space in the front of the packets they're
// building.
MaxHeaderLength() uint16
// WritePacket writes a packet to the given destination address and
// protocol. It sets pkt.NetworkHeader. pkt.TransportHeader must have
// already been set.
WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt PacketBuffer) *tcpip.Error
// WritePackets writes packets to the given destination address and
// protocol. pkts must not be zero length.
WritePackets(r *Route, gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, *tcpip.Error)
// WriteHeaderIncludedPacket writes a packet that includes a network
// header to the given destination address.
WriteHeaderIncludedPacket(r *Route, pkt PacketBuffer) *tcpip.Error
// ID returns the network protocol endpoint ID.
ID() *NetworkEndpointID
// PrefixLen returns the network endpoint's subnet prefix length in bits.
PrefixLen() int
// NICID returns the id of the NIC this endpoint belongs to.
NICID() tcpip.NICID
// HandlePacket is called by the link layer when new packets arrive to
// this network endpoint. It sets pkt.NetworkHeader.
//
// HandlePacket takes ownership of pkt.
HandlePacket(r *Route, pkt PacketBuffer)
// Close is called when the endpoint is reomved from a stack.
Close()
}
// NetworkProtocol is the interface that needs to be implemented by network
// protocols (e.g., ipv4, ipv6) that want to be part of the networking stack.
type NetworkProtocol interface {
// Number returns the network protocol number.
Number() tcpip.NetworkProtocolNumber
// MinimumPacketSize returns the minimum valid packet size of this
// network protocol. The stack automatically drops any packets smaller
// than this targeted at this protocol.
MinimumPacketSize() int
// DefaultPrefixLen returns the protocol's default prefix length.
DefaultPrefixLen() int
// ParseAddresses returns the source and destination addresses stored in a
// packet of this protocol.
ParseAddresses(v buffer.View) (src, dst tcpip.Address)
// NewEndpoint creates a new endpoint of this protocol.
NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) (NetworkEndpoint, *tcpip.Error)
// SetOption allows enabling/disabling protocol specific features.
// SetOption returns an error if the option is not supported or the
// provided option value is invalid.
SetOption(option interface{}) *tcpip.Error
// Option allows retrieving protocol specific option values.
// Option returns an error if the option is not supported or the
// provided option value is invalid.
Option(option interface{}) *tcpip.Error
// Close requests that any worker goroutines owned by the protocol
// stop.
Close()
// Wait waits for any worker goroutines owned by the protocol to stop.
Wait()
}
// NetworkDispatcher contains the methods used by the network stack to deliver
// packets to the appropriate network endpoint after it has been handled by
// the data link layer.
type NetworkDispatcher interface {
// DeliverNetworkPacket finds the appropriate network protocol endpoint
// and hands the packet over for further processing.
//
// pkt.LinkHeader may or may not be set before calling
// DeliverNetworkPacket. Some packets do not have link headers (e.g.
// packets sent via loopback), and won't have the field set.
//
// DeliverNetworkPacket takes ownership of pkt.
DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer)
}
// LinkEndpointCapabilities is the type associated with the capabilities
// supported by a link-layer endpoint. It is a set of bitfields.
type LinkEndpointCapabilities uint
// The following are the supported link endpoint capabilities.
const (
CapabilityNone LinkEndpointCapabilities = 0
// CapabilityTXChecksumOffload indicates that the link endpoint supports
// checksum computation for outgoing packets and the stack can skip
// computing checksums when sending packets.
CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota
// CapabilityRXChecksumOffload indicates that the link endpoint supports
// checksum verification on received packets and that it's safe for the
// stack to skip checksum verification.
CapabilityRXChecksumOffload
CapabilityResolutionRequired
CapabilitySaveRestore
CapabilityDisconnectOk
CapabilityLoopback
CapabilityHardwareGSO
// CapabilitySoftwareGSO indicates the link endpoint supports of sending
// multiple packets using a single call (LinkEndpoint.WritePackets).
CapabilitySoftwareGSO
)
// LinkEndpoint is the interface implemented by data link layer protocols (e.g.,
// ethernet, loopback, raw) and used by network layer protocols to send packets
// out through the implementer's data link endpoint. When a link header exists,
// it sets each PacketBuffer's LinkHeader field before passing it up the
// stack.
type LinkEndpoint interface {
// MTU is the maximum transmission unit for this endpoint. This is
// usually dictated by the backing physical network; when such a
// physical network doesn't exist, the limit is generally 64k, which
// includes the maximum size of an IP packet.
MTU() uint32
// Capabilities returns the set of capabilities supported by the
// endpoint.
Capabilities() LinkEndpointCapabilities
// MaxHeaderLength returns the maximum size the data link (and
// lower level layers combined) headers can have. Higher levels use this
// information to reserve space in the front of the packets they're
// building.
MaxHeaderLength() uint16
// LinkAddress returns the link address (typically a MAC) of the
// link endpoint.
LinkAddress() tcpip.LinkAddress
// WritePacket writes a packet with the given protocol through the
// given route. It sets pkt.LinkHeader if a link layer header exists.
// pkt.NetworkHeader and pkt.TransportHeader must have already been
// set.
//
// To participate in transparent bridging, a LinkEndpoint implementation
// should call eth.Encode with header.EthernetFields.SrcAddr set to
// r.LocalLinkAddress if it is provided.
WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) *tcpip.Error
// WritePackets writes packets with the given protocol through the
// given route. pkts must not be zero length.
//
// Right now, WritePackets is used only when the software segmentation
// offload is enabled. If it will be used for something else, it may
// require to change syscall filters.
WritePackets(r *Route, gso *GSO, pkts PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error)
// WriteRawPacket writes a packet directly to the link. The packet
// should already have an ethernet header.
WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error
// Attach attaches the data link layer endpoint to the network-layer
// dispatcher of the stack.
//
// Attach will be called with a nil dispatcher if the receiver's associated
// NIC is being removed.
Attach(dispatcher NetworkDispatcher)
// IsAttached returns whether a NetworkDispatcher is attached to the
// endpoint.
IsAttached() bool
// Wait waits for any worker goroutines owned by the endpoint to stop.
//
// For now, requesting that an endpoint's worker goroutine(s) stop is
// implementation specific.
//
// Wait will not block if the endpoint hasn't started any goroutines
// yet, even if it might later.
Wait()
}
// InjectableLinkEndpoint is a LinkEndpoint where inbound packets are
// delivered via the Inject method.
type InjectableLinkEndpoint interface {
LinkEndpoint
// InjectInbound injects an inbound packet.
InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer)
// InjectOutbound writes a fully formed outbound packet directly to the
// link.
//
// dest is used by endpoints with multiple raw destinations.
InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error
}
// A LinkAddressResolver is an extension to a NetworkProtocol that
// can resolve link addresses.
type LinkAddressResolver interface {
// LinkAddressRequest sends a request for the LinkAddress of addr.
// The request is sent on linkEP with localAddr as the source.
//
// A valid response will cause the discovery protocol's network
// endpoint to call AddLinkAddress.
LinkAddressRequest(addr, localAddr tcpip.Address, linkEP LinkEndpoint) *tcpip.Error
// ResolveStaticAddress attempts to resolve address without sending
// requests. It either resolves the name immediately or returns the
// empty LinkAddress.
//
// It can be used to resolve broadcast addresses for example.
ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool)
// LinkAddressProtocol returns the network protocol of the
// addresses this this resolver can resolve.
LinkAddressProtocol() tcpip.NetworkProtocolNumber
}
// A LinkAddressCache caches link addresses.
type LinkAddressCache interface {
// CheckLocalAddress determines if the given local address exists, and if it
// does not exist.
CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID
// AddLinkAddress adds a link address to the cache.
AddLinkAddress(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress)
// GetLinkAddress looks up the cache to translate address to link address (e.g. IP -> MAC).
// If the LinkEndpoint requests address resolution and there is a LinkAddressResolver
// registered with the network protocol, the cache attempts to resolve the address
// and returns ErrWouldBlock. Waker is notified when address resolution is
// complete (success or not).
//
// If address resolution is required, ErrNoLinkAddress and a notification channel is
// returned for the top level caller to block. Channel is closed once address resolution
// is complete (success or not).
GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, w *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error)
// RemoveWaker removes a waker that has been added in GetLinkAddress().
RemoveWaker(nicID tcpip.NICID, addr tcpip.Address, waker *sleep.Waker)
}
// RawFactory produces endpoints for writing various types of raw packets.
type RawFactory interface {
// NewUnassociatedEndpoint produces endpoints for writing packets not
// associated with a particular transport protocol. Such endpoints can
// be used to write arbitrary packets that include the network header.
NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
// NewPacketEndpoint produces endpoints for reading and writing packets
// that include network and (when cooked is false) link layer headers.
NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
}
// GSOType is the type of GSO segments.
//
// +stateify savable
type GSOType int
// Types of gso segments.
const (
GSONone GSOType = iota
// Hardware GSO types:
GSOTCPv4
GSOTCPv6
// GSOSW is used for software GSO segments which have to be sent by
// endpoint.WritePackets.
GSOSW
)
// GSO contains generic segmentation offload properties.
//
// +stateify savable
type GSO struct {
// Type is one of GSONone, GSOTCPv4, etc.
Type GSOType
// NeedsCsum is set if the checksum offload is enabled.
NeedsCsum bool
// CsumOffset is offset after that to place checksum.
CsumOffset uint16
// Mss is maximum segment size.
MSS uint16
// L3Len is L3 (IP) header length.
L3HdrLen uint16
// MaxSize is maximum GSO packet size.
MaxSize uint32
}
// GSOEndpoint provides access to GSO properties.
type GSOEndpoint interface {
// GSOMaxSize returns the maximum GSO packet size.
GSOMaxSize() uint32
}
// SoftwareGSOMaxSize is a maximum allowed size of a software GSO segment.
// This isn't a hard limit, because it is never set into packet headers.
const SoftwareGSOMaxSize = (1 << 16)