| // Copyright 2018 The gVisor Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package stack |
| |
| import ( |
| "gvisor.dev/gvisor/pkg/sleep" |
| "gvisor.dev/gvisor/pkg/tcpip" |
| "gvisor.dev/gvisor/pkg/tcpip/buffer" |
| "gvisor.dev/gvisor/pkg/waiter" |
| ) |
| |
| // NetworkEndpointID is the identifier of a network layer protocol endpoint. |
| // Currently the local address is sufficient because all supported protocols |
| // (i.e., IPv4 and IPv6) have different sizes for their addresses. |
| type NetworkEndpointID struct { |
| LocalAddress tcpip.Address |
| } |
| |
| // TransportEndpointID is the identifier of a transport layer protocol endpoint. |
| // |
| // +stateify savable |
| type TransportEndpointID struct { |
| // LocalPort is the local port associated with the endpoint. |
| LocalPort uint16 |
| |
| // LocalAddress is the local [network layer] address associated with |
| // the endpoint. |
| LocalAddress tcpip.Address |
| |
| // RemotePort is the remote port associated with the endpoint. |
| RemotePort uint16 |
| |
| // RemoteAddress it the remote [network layer] address associated with |
| // the endpoint. |
| RemoteAddress tcpip.Address |
| } |
| |
| // ControlType is the type of network control message. |
| type ControlType int |
| |
| // The following are the allowed values for ControlType values. |
| const ( |
| ControlPacketTooBig ControlType = iota |
| ControlPortUnreachable |
| ControlUnknown |
| ) |
| |
| // TransportEndpoint is the interface that needs to be implemented by transport |
| // protocol (e.g., tcp, udp) endpoints that can handle packets. |
| type TransportEndpoint interface { |
| // UniqueID returns an unique ID for this transport endpoint. |
| UniqueID() uint64 |
| |
| // HandlePacket is called by the stack when new packets arrive to |
| // this transport endpoint. It sets pkt.TransportHeader. |
| // |
| // HandlePacket takes ownership of pkt. |
| HandlePacket(r *Route, id TransportEndpointID, pkt PacketBuffer) |
| |
| // HandleControlPacket is called by the stack when new control (e.g. |
| // ICMP) packets arrive to this transport endpoint. |
| // HandleControlPacket takes ownership of pkt. |
| HandleControlPacket(id TransportEndpointID, typ ControlType, extra uint32, pkt PacketBuffer) |
| |
| // Abort initiates an expedited endpoint teardown. It puts the endpoint |
| // in a closed state and frees all resources associated with it. This |
| // cleanup may happen asynchronously. Wait can be used to block on this |
| // asynchronous cleanup. |
| Abort() |
| |
| // Wait waits for any worker goroutines owned by the endpoint to stop. |
| // |
| // An endpoint can be requested to stop its worker goroutines by calling |
| // its Close method. |
| // |
| // Wait will not block if the endpoint hasn't started any goroutines |
| // yet, even if it might later. |
| Wait() |
| } |
| |
| // RawTransportEndpoint is the interface that needs to be implemented by raw |
| // transport protocol endpoints. RawTransportEndpoints receive the entire |
| // packet - including the network and transport headers - as delivered to |
| // netstack. |
| type RawTransportEndpoint interface { |
| // HandlePacket is called by the stack when new packets arrive to |
| // this transport endpoint. The packet contains all data from the link |
| // layer up. |
| // |
| // HandlePacket takes ownership of pkt. |
| HandlePacket(r *Route, pkt PacketBuffer) |
| } |
| |
| // PacketEndpoint is the interface that needs to be implemented by packet |
| // transport protocol endpoints. These endpoints receive link layer headers in |
| // addition to whatever they contain (usually network and transport layer |
| // headers and a payload). |
| type PacketEndpoint interface { |
| // HandlePacket is called by the stack when new packets arrive that |
| // match the endpoint. |
| // |
| // Implementers should treat packet as immutable and should copy it |
| // before before modification. |
| // |
| // linkHeader may have a length of 0, in which case the PacketEndpoint |
| // should construct its own ethernet header for applications. |
| // |
| // HandlePacket takes ownership of pkt. |
| HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt PacketBuffer) |
| } |
| |
| // TransportProtocol is the interface that needs to be implemented by transport |
| // protocols (e.g., tcp, udp) that want to be part of the networking stack. |
| type TransportProtocol interface { |
| // Number returns the transport protocol number. |
| Number() tcpip.TransportProtocolNumber |
| |
| // NewEndpoint creates a new endpoint of the transport protocol. |
| NewEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) |
| |
| // NewRawEndpoint creates a new raw endpoint of the transport protocol. |
| NewRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) |
| |
| // MinimumPacketSize returns the minimum valid packet size of this |
| // transport protocol. The stack automatically drops any packets smaller |
| // than this targeted at this protocol. |
| MinimumPacketSize() int |
| |
| // ParsePorts returns the source and destination ports stored in a |
| // packet of this protocol. |
| ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) |
| |
| // HandleUnknownDestinationPacket handles packets targeted at this |
| // protocol but that don't match any existing endpoint. For example, |
| // it is targeted at a port that have no listeners. |
| // |
| // The return value indicates whether the packet was well-formed (for |
| // stats purposes only). |
| // |
| // HandleUnknownDestinationPacket takes ownership of pkt. |
| HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, pkt PacketBuffer) bool |
| |
| // SetOption allows enabling/disabling protocol specific features. |
| // SetOption returns an error if the option is not supported or the |
| // provided option value is invalid. |
| SetOption(option interface{}) *tcpip.Error |
| |
| // Option allows retrieving protocol specific option values. |
| // Option returns an error if the option is not supported or the |
| // provided option value is invalid. |
| Option(option interface{}) *tcpip.Error |
| |
| // Close requests that any worker goroutines owned by the protocol |
| // stop. |
| Close() |
| |
| // Wait waits for any worker goroutines owned by the protocol to stop. |
| Wait() |
| } |
| |
| // TransportDispatcher contains the methods used by the network stack to deliver |
| // packets to the appropriate transport endpoint after it has been handled by |
| // the network layer. |
| type TransportDispatcher interface { |
| // DeliverTransportPacket delivers packets to the appropriate |
| // transport protocol endpoint. |
| // |
| // pkt.NetworkHeader must be set before calling DeliverTransportPacket. |
| // |
| // DeliverTransportPacket takes ownership of pkt. |
| DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt PacketBuffer) |
| |
| // DeliverTransportControlPacket delivers control packets to the |
| // appropriate transport protocol endpoint. |
| // |
| // pkt.NetworkHeader must be set before calling |
| // DeliverTransportControlPacket. |
| // |
| // DeliverTransportControlPacket takes ownership of pkt. |
| DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt PacketBuffer) |
| } |
| |
| // PacketLooping specifies where an outbound packet should be sent. |
| type PacketLooping byte |
| |
| const ( |
| // PacketOut indicates that the packet should be passed to the link |
| // endpoint. |
| PacketOut PacketLooping = 1 << iota |
| |
| // PacketLoop indicates that the packet should be handled locally. |
| PacketLoop |
| ) |
| |
| // NetworkHeaderParams are the header parameters given as input by the |
| // transport endpoint to the network. |
| type NetworkHeaderParams struct { |
| // Protocol refers to the transport protocol number. |
| Protocol tcpip.TransportProtocolNumber |
| |
| // TTL refers to Time To Live field of the IP-header. |
| TTL uint8 |
| |
| // TOS refers to TypeOfService or TrafficClass field of the IP-header. |
| TOS uint8 |
| } |
| |
| // NetworkEndpoint is the interface that needs to be implemented by endpoints |
| // of network layer protocols (e.g., ipv4, ipv6). |
| type NetworkEndpoint interface { |
| // DefaultTTL is the default time-to-live value (or hop limit, in ipv6) |
| // for this endpoint. |
| DefaultTTL() uint8 |
| |
| // MTU is the maximum transmission unit for this endpoint. This is |
| // generally calculated as the MTU of the underlying data link endpoint |
| // minus the network endpoint max header length. |
| MTU() uint32 |
| |
| // Capabilities returns the set of capabilities supported by the |
| // underlying link-layer endpoint. |
| Capabilities() LinkEndpointCapabilities |
| |
| // MaxHeaderLength returns the maximum size the network (and lower |
| // level layers combined) headers can have. Higher levels use this |
| // information to reserve space in the front of the packets they're |
| // building. |
| MaxHeaderLength() uint16 |
| |
| // WritePacket writes a packet to the given destination address and |
| // protocol. It sets pkt.NetworkHeader. pkt.TransportHeader must have |
| // already been set. |
| WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt PacketBuffer) *tcpip.Error |
| |
| // WritePackets writes packets to the given destination address and |
| // protocol. pkts must not be zero length. |
| WritePackets(r *Route, gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, *tcpip.Error) |
| |
| // WriteHeaderIncludedPacket writes a packet that includes a network |
| // header to the given destination address. |
| WriteHeaderIncludedPacket(r *Route, pkt PacketBuffer) *tcpip.Error |
| |
| // ID returns the network protocol endpoint ID. |
| ID() *NetworkEndpointID |
| |
| // PrefixLen returns the network endpoint's subnet prefix length in bits. |
| PrefixLen() int |
| |
| // NICID returns the id of the NIC this endpoint belongs to. |
| NICID() tcpip.NICID |
| |
| // HandlePacket is called by the link layer when new packets arrive to |
| // this network endpoint. It sets pkt.NetworkHeader. |
| // |
| // HandlePacket takes ownership of pkt. |
| HandlePacket(r *Route, pkt PacketBuffer) |
| |
| // Close is called when the endpoint is reomved from a stack. |
| Close() |
| } |
| |
| // NetworkProtocol is the interface that needs to be implemented by network |
| // protocols (e.g., ipv4, ipv6) that want to be part of the networking stack. |
| type NetworkProtocol interface { |
| // Number returns the network protocol number. |
| Number() tcpip.NetworkProtocolNumber |
| |
| // MinimumPacketSize returns the minimum valid packet size of this |
| // network protocol. The stack automatically drops any packets smaller |
| // than this targeted at this protocol. |
| MinimumPacketSize() int |
| |
| // DefaultPrefixLen returns the protocol's default prefix length. |
| DefaultPrefixLen() int |
| |
| // ParseAddresses returns the source and destination addresses stored in a |
| // packet of this protocol. |
| ParseAddresses(v buffer.View) (src, dst tcpip.Address) |
| |
| // NewEndpoint creates a new endpoint of this protocol. |
| NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) (NetworkEndpoint, *tcpip.Error) |
| |
| // SetOption allows enabling/disabling protocol specific features. |
| // SetOption returns an error if the option is not supported or the |
| // provided option value is invalid. |
| SetOption(option interface{}) *tcpip.Error |
| |
| // Option allows retrieving protocol specific option values. |
| // Option returns an error if the option is not supported or the |
| // provided option value is invalid. |
| Option(option interface{}) *tcpip.Error |
| |
| // Close requests that any worker goroutines owned by the protocol |
| // stop. |
| Close() |
| |
| // Wait waits for any worker goroutines owned by the protocol to stop. |
| Wait() |
| } |
| |
| // NetworkDispatcher contains the methods used by the network stack to deliver |
| // packets to the appropriate network endpoint after it has been handled by |
| // the data link layer. |
| type NetworkDispatcher interface { |
| // DeliverNetworkPacket finds the appropriate network protocol endpoint |
| // and hands the packet over for further processing. |
| // |
| // pkt.LinkHeader may or may not be set before calling |
| // DeliverNetworkPacket. Some packets do not have link headers (e.g. |
| // packets sent via loopback), and won't have the field set. |
| // |
| // DeliverNetworkPacket takes ownership of pkt. |
| DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) |
| } |
| |
| // LinkEndpointCapabilities is the type associated with the capabilities |
| // supported by a link-layer endpoint. It is a set of bitfields. |
| type LinkEndpointCapabilities uint |
| |
| // The following are the supported link endpoint capabilities. |
| const ( |
| CapabilityNone LinkEndpointCapabilities = 0 |
| // CapabilityTXChecksumOffload indicates that the link endpoint supports |
| // checksum computation for outgoing packets and the stack can skip |
| // computing checksums when sending packets. |
| CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota |
| // CapabilityRXChecksumOffload indicates that the link endpoint supports |
| // checksum verification on received packets and that it's safe for the |
| // stack to skip checksum verification. |
| CapabilityRXChecksumOffload |
| CapabilityResolutionRequired |
| CapabilitySaveRestore |
| CapabilityDisconnectOk |
| CapabilityLoopback |
| CapabilityHardwareGSO |
| |
| // CapabilitySoftwareGSO indicates the link endpoint supports of sending |
| // multiple packets using a single call (LinkEndpoint.WritePackets). |
| CapabilitySoftwareGSO |
| ) |
| |
| // LinkEndpoint is the interface implemented by data link layer protocols (e.g., |
| // ethernet, loopback, raw) and used by network layer protocols to send packets |
| // out through the implementer's data link endpoint. When a link header exists, |
| // it sets each PacketBuffer's LinkHeader field before passing it up the |
| // stack. |
| type LinkEndpoint interface { |
| // MTU is the maximum transmission unit for this endpoint. This is |
| // usually dictated by the backing physical network; when such a |
| // physical network doesn't exist, the limit is generally 64k, which |
| // includes the maximum size of an IP packet. |
| MTU() uint32 |
| |
| // Capabilities returns the set of capabilities supported by the |
| // endpoint. |
| Capabilities() LinkEndpointCapabilities |
| |
| // MaxHeaderLength returns the maximum size the data link (and |
| // lower level layers combined) headers can have. Higher levels use this |
| // information to reserve space in the front of the packets they're |
| // building. |
| MaxHeaderLength() uint16 |
| |
| // LinkAddress returns the link address (typically a MAC) of the |
| // link endpoint. |
| LinkAddress() tcpip.LinkAddress |
| |
| // WritePacket writes a packet with the given protocol through the |
| // given route. It sets pkt.LinkHeader if a link layer header exists. |
| // pkt.NetworkHeader and pkt.TransportHeader must have already been |
| // set. |
| // |
| // To participate in transparent bridging, a LinkEndpoint implementation |
| // should call eth.Encode with header.EthernetFields.SrcAddr set to |
| // r.LocalLinkAddress if it is provided. |
| WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) *tcpip.Error |
| |
| // WritePackets writes packets with the given protocol through the |
| // given route. pkts must not be zero length. |
| // |
| // Right now, WritePackets is used only when the software segmentation |
| // offload is enabled. If it will be used for something else, it may |
| // require to change syscall filters. |
| WritePackets(r *Route, gso *GSO, pkts PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) |
| |
| // WriteRawPacket writes a packet directly to the link. The packet |
| // should already have an ethernet header. |
| WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error |
| |
| // Attach attaches the data link layer endpoint to the network-layer |
| // dispatcher of the stack. |
| // |
| // Attach will be called with a nil dispatcher if the receiver's associated |
| // NIC is being removed. |
| Attach(dispatcher NetworkDispatcher) |
| |
| // IsAttached returns whether a NetworkDispatcher is attached to the |
| // endpoint. |
| IsAttached() bool |
| |
| // Wait waits for any worker goroutines owned by the endpoint to stop. |
| // |
| // For now, requesting that an endpoint's worker goroutine(s) stop is |
| // implementation specific. |
| // |
| // Wait will not block if the endpoint hasn't started any goroutines |
| // yet, even if it might later. |
| Wait() |
| } |
| |
| // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are |
| // delivered via the Inject method. |
| type InjectableLinkEndpoint interface { |
| LinkEndpoint |
| |
| // InjectInbound injects an inbound packet. |
| InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) |
| |
| // InjectOutbound writes a fully formed outbound packet directly to the |
| // link. |
| // |
| // dest is used by endpoints with multiple raw destinations. |
| InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error |
| } |
| |
| // A LinkAddressResolver is an extension to a NetworkProtocol that |
| // can resolve link addresses. |
| type LinkAddressResolver interface { |
| // LinkAddressRequest sends a request for the LinkAddress of addr. |
| // The request is sent on linkEP with localAddr as the source. |
| // |
| // A valid response will cause the discovery protocol's network |
| // endpoint to call AddLinkAddress. |
| LinkAddressRequest(addr, localAddr tcpip.Address, linkEP LinkEndpoint) *tcpip.Error |
| |
| // ResolveStaticAddress attempts to resolve address without sending |
| // requests. It either resolves the name immediately or returns the |
| // empty LinkAddress. |
| // |
| // It can be used to resolve broadcast addresses for example. |
| ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) |
| |
| // LinkAddressProtocol returns the network protocol of the |
| // addresses this this resolver can resolve. |
| LinkAddressProtocol() tcpip.NetworkProtocolNumber |
| } |
| |
| // A LinkAddressCache caches link addresses. |
| type LinkAddressCache interface { |
| // CheckLocalAddress determines if the given local address exists, and if it |
| // does not exist. |
| CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID |
| |
| // AddLinkAddress adds a link address to the cache. |
| AddLinkAddress(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress) |
| |
| // GetLinkAddress looks up the cache to translate address to link address (e.g. IP -> MAC). |
| // If the LinkEndpoint requests address resolution and there is a LinkAddressResolver |
| // registered with the network protocol, the cache attempts to resolve the address |
| // and returns ErrWouldBlock. Waker is notified when address resolution is |
| // complete (success or not). |
| // |
| // If address resolution is required, ErrNoLinkAddress and a notification channel is |
| // returned for the top level caller to block. Channel is closed once address resolution |
| // is complete (success or not). |
| GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, w *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) |
| |
| // RemoveWaker removes a waker that has been added in GetLinkAddress(). |
| RemoveWaker(nicID tcpip.NICID, addr tcpip.Address, waker *sleep.Waker) |
| } |
| |
| // RawFactory produces endpoints for writing various types of raw packets. |
| type RawFactory interface { |
| // NewUnassociatedEndpoint produces endpoints for writing packets not |
| // associated with a particular transport protocol. Such endpoints can |
| // be used to write arbitrary packets that include the network header. |
| NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) |
| |
| // NewPacketEndpoint produces endpoints for reading and writing packets |
| // that include network and (when cooked is false) link layer headers. |
| NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) |
| } |
| |
| // GSOType is the type of GSO segments. |
| // |
| // +stateify savable |
| type GSOType int |
| |
| // Types of gso segments. |
| const ( |
| GSONone GSOType = iota |
| |
| // Hardware GSO types: |
| GSOTCPv4 |
| GSOTCPv6 |
| |
| // GSOSW is used for software GSO segments which have to be sent by |
| // endpoint.WritePackets. |
| GSOSW |
| ) |
| |
| // GSO contains generic segmentation offload properties. |
| // |
| // +stateify savable |
| type GSO struct { |
| // Type is one of GSONone, GSOTCPv4, etc. |
| Type GSOType |
| // NeedsCsum is set if the checksum offload is enabled. |
| NeedsCsum bool |
| // CsumOffset is offset after that to place checksum. |
| CsumOffset uint16 |
| |
| // Mss is maximum segment size. |
| MSS uint16 |
| // L3Len is L3 (IP) header length. |
| L3HdrLen uint16 |
| |
| // MaxSize is maximum GSO packet size. |
| MaxSize uint32 |
| } |
| |
| // GSOEndpoint provides access to GSO properties. |
| type GSOEndpoint interface { |
| // GSOMaxSize returns the maximum GSO packet size. |
| GSOMaxSize() uint32 |
| } |
| |
| // SoftwareGSOMaxSize is a maximum allowed size of a software GSO segment. |
| // This isn't a hard limit, because it is never set into packet headers. |
| const SoftwareGSOMaxSize = (1 << 16) |