| // Copyright 2018 The gVisor Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package stack provides the glue between networking protocols and the |
| // consumers of the networking stack. |
| // |
| // For consumers, the only function of interest is New(), everything else is |
| // provided by the tcpip/public package. |
| // |
| // For protocol implementers, RegisterTransportProtocolFactory() and |
| // RegisterNetworkProtocolFactory() are used to register protocol factories with |
| // the stack, which will then be used to instantiate protocol objects when |
| // consumers interact with the stack. |
| package stack |
| |
| import ( |
| "sync" |
| "time" |
| |
| "github.com/google/netstack/sleep" |
| "github.com/google/netstack/tcpip" |
| "github.com/google/netstack/tcpip/buffer" |
| "github.com/google/netstack/tcpip/header" |
| "github.com/google/netstack/tcpip/ports" |
| "github.com/google/netstack/tcpip/seqnum" |
| "github.com/google/netstack/waiter" |
| ) |
| |
| const ( |
| // ageLimit is set to the same cache stale time used in Linux. |
| ageLimit = 1 * time.Minute |
| // resolutionTimeout is set to the same ARP timeout used in Linux. |
| resolutionTimeout = 1 * time.Second |
| // resolutionAttempts is set to the same ARP retries used in Linux. |
| resolutionAttempts = 3 |
| ) |
| |
| type transportProtocolState struct { |
| proto TransportProtocol |
| defaultHandler func(r *Route, id TransportEndpointID, netHeader buffer.View, vv buffer.VectorisedView) bool |
| } |
| |
| // TCPProbeFunc is the expected function type for a TCP probe function to be |
| // passed to stack.AddTCPProbe. |
| type TCPProbeFunc func(s TCPEndpointState) |
| |
| // TCPCubicState is used to hold a copy of the internal cubic state when the |
| // TCPProbeFunc is invoked. |
| type TCPCubicState struct { |
| WLastMax float64 |
| WMax float64 |
| T time.Time |
| TimeSinceLastCongestion time.Duration |
| C float64 |
| K float64 |
| Beta float64 |
| WC float64 |
| WEst float64 |
| } |
| |
| // TCPEndpointID is the unique 4 tuple that identifies a given endpoint. |
| type TCPEndpointID struct { |
| // LocalPort is the local port associated with the endpoint. |
| LocalPort uint16 |
| |
| // LocalAddress is the local [network layer] address associated with |
| // the endpoint. |
| LocalAddress tcpip.Address |
| |
| // RemotePort is the remote port associated with the endpoint. |
| RemotePort uint16 |
| |
| // RemoteAddress it the remote [network layer] address associated with |
| // the endpoint. |
| RemoteAddress tcpip.Address |
| } |
| |
| // TCPFastRecoveryState holds a copy of the internal fast recovery state of a |
| // TCP endpoint. |
| type TCPFastRecoveryState struct { |
| // Active if true indicates the endpoint is in fast recovery. |
| Active bool |
| |
| // First is the first unacknowledged sequence number being recovered. |
| First seqnum.Value |
| |
| // Last is the 'recover' sequence number that indicates the point at |
| // which we should exit recovery barring any timeouts etc. |
| Last seqnum.Value |
| |
| // MaxCwnd is the maximum value we are permitted to grow the congestion |
| // window during recovery. This is set at the time we enter recovery. |
| MaxCwnd int |
| |
| // HighRxt is the highest sequence number which has been retransmitted |
| // during the current loss recovery phase. |
| // See: RFC 6675 Section 2 for details. |
| HighRxt seqnum.Value |
| |
| // RescueRxt is the highest sequence number which has been |
| // optimistically retransmitted to prevent stalling of the ACK clock |
| // when there is loss at the end of the window and no new data is |
| // available for transmission. |
| // See: RFC 6675 Section 2 for details. |
| RescueRxt seqnum.Value |
| } |
| |
| // TCPReceiverState holds a copy of the internal state of the receiver for |
| // a given TCP endpoint. |
| type TCPReceiverState struct { |
| // RcvNxt is the TCP variable RCV.NXT. |
| RcvNxt seqnum.Value |
| |
| // RcvAcc is the TCP variable RCV.ACC. |
| RcvAcc seqnum.Value |
| |
| // RcvWndScale is the window scaling to use for inbound segments. |
| RcvWndScale uint8 |
| |
| // PendingBufUsed is the number of bytes pending in the receive |
| // queue. |
| PendingBufUsed seqnum.Size |
| |
| // PendingBufSize is the size of the socket receive buffer. |
| PendingBufSize seqnum.Size |
| } |
| |
| // TCPSenderState holds a copy of the internal state of the sender for |
| // a given TCP Endpoint. |
| type TCPSenderState struct { |
| // LastSendTime is the time at which we sent the last segment. |
| LastSendTime time.Time |
| |
| // DupAckCount is the number of Duplicate ACK's received. |
| DupAckCount int |
| |
| // SndCwnd is the size of the sending congestion window in packets. |
| SndCwnd int |
| |
| // Ssthresh is the slow start threshold in packets. |
| Ssthresh int |
| |
| // SndCAAckCount is the number of packets consumed in congestion |
| // avoidance mode. |
| SndCAAckCount int |
| |
| // Outstanding is the number of packets in flight. |
| Outstanding int |
| |
| // SndWnd is the send window size in bytes. |
| SndWnd seqnum.Size |
| |
| // SndUna is the next unacknowledged sequence number. |
| SndUna seqnum.Value |
| |
| // SndNxt is the sequence number of the next segment to be sent. |
| SndNxt seqnum.Value |
| |
| // RTTMeasureSeqNum is the sequence number being used for the latest RTT |
| // measurement. |
| RTTMeasureSeqNum seqnum.Value |
| |
| // RTTMeasureTime is the time when the RTTMeasureSeqNum was sent. |
| RTTMeasureTime time.Time |
| |
| // Closed indicates that the caller has closed the endpoint for sending. |
| Closed bool |
| |
| // SRTT is the smoothed round-trip time as defined in section 2 of |
| // RFC 6298. |
| SRTT time.Duration |
| |
| // RTO is the retransmit timeout as defined in section of 2 of RFC 6298. |
| RTO time.Duration |
| |
| // RTTVar is the round-trip time variation as defined in section 2 of |
| // RFC 6298. |
| RTTVar time.Duration |
| |
| // SRTTInited if true indicates take a valid RTT measurement has been |
| // completed. |
| SRTTInited bool |
| |
| // MaxPayloadSize is the maximum size of the payload of a given segment. |
| // It is initialized on demand. |
| MaxPayloadSize int |
| |
| // SndWndScale is the number of bits to shift left when reading the send |
| // window size from a segment. |
| SndWndScale uint8 |
| |
| // MaxSentAck is the highest acknowledgement number sent till now. |
| MaxSentAck seqnum.Value |
| |
| // FastRecovery holds the fast recovery state for the endpoint. |
| FastRecovery TCPFastRecoveryState |
| |
| // Cubic holds the state related to CUBIC congestion control. |
| Cubic TCPCubicState |
| } |
| |
| // TCPSACKInfo holds TCP SACK related information for a given TCP endpoint. |
| type TCPSACKInfo struct { |
| // Blocks is the list of SACK Blocks that identify the out of order segments |
| // held by a given TCP endpoint. |
| Blocks []header.SACKBlock |
| |
| // ReceivedBlocks are the SACK blocks received by this endpoint |
| // from the peer endpoint. |
| ReceivedBlocks []header.SACKBlock |
| |
| // MaxSACKED is the highest sequence number that has been SACKED |
| // by the peer. |
| MaxSACKED seqnum.Value |
| } |
| |
| // TCPEndpointState is a copy of the internal state of a TCP endpoint. |
| type TCPEndpointState struct { |
| // ID is a copy of the TransportEndpointID for the endpoint. |
| ID TCPEndpointID |
| |
| // SegTime denotes the absolute time when this segment was received. |
| SegTime time.Time |
| |
| // RcvBufSize is the size of the receive socket buffer for the endpoint. |
| RcvBufSize int |
| |
| // RcvBufUsed is the amount of bytes actually held in the receive socket |
| // buffer for the endpoint. |
| RcvBufUsed int |
| |
| // RcvClosed if true, indicates the endpoint has been closed for reading. |
| RcvClosed bool |
| |
| // SendTSOk is used to indicate when the TS Option has been negotiated. |
| // When sendTSOk is true every non-RST segment should carry a TS as per |
| // RFC7323#section-1.1. |
| SendTSOk bool |
| |
| // RecentTS is the timestamp that should be sent in the TSEcr field of |
| // the timestamp for future segments sent by the endpoint. This field is |
| // updated if required when a new segment is received by this endpoint. |
| RecentTS uint32 |
| |
| // TSOffset is a randomized offset added to the value of the TSVal field |
| // in the timestamp option. |
| TSOffset uint32 |
| |
| // SACKPermitted is set to true if the peer sends the TCPSACKPermitted |
| // option in the SYN/SYN-ACK. |
| SACKPermitted bool |
| |
| // SACK holds TCP SACK related information for this endpoint. |
| SACK TCPSACKInfo |
| |
| // SndBufSize is the size of the socket send buffer. |
| SndBufSize int |
| |
| // SndBufUsed is the number of bytes held in the socket send buffer. |
| SndBufUsed int |
| |
| // SndClosed indicates that the endpoint has been closed for sends. |
| SndClosed bool |
| |
| // SndBufInQueue is the number of bytes in the send queue. |
| SndBufInQueue seqnum.Size |
| |
| // PacketTooBigCount is used to notify the main protocol routine how |
| // many times a "packet too big" control packet is received. |
| PacketTooBigCount int |
| |
| // SndMTU is the smallest MTU seen in the control packets received. |
| SndMTU int |
| |
| // Receiver holds variables related to the TCP receiver for the endpoint. |
| Receiver TCPReceiverState |
| |
| // Sender holds state related to the TCP Sender for the endpoint. |
| Sender TCPSenderState |
| } |
| |
| // Stack is a networking stack, with all supported protocols, NICs, and route |
| // table. |
| type Stack struct { |
| transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState |
| networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol |
| linkAddrResolvers map[tcpip.NetworkProtocolNumber]LinkAddressResolver |
| |
| demux *transportDemuxer |
| |
| stats tcpip.Stats |
| |
| linkAddrCache *linkAddrCache |
| |
| // raw indicates whether raw sockets may be created. It is set during |
| // Stack creation and is immutable. |
| raw bool |
| |
| mu sync.RWMutex |
| nics map[tcpip.NICID]*NIC |
| forwarding bool |
| |
| // route is the route table passed in by the user via SetRouteTable(), |
| // it is used by FindRoute() to build a route for a specific |
| // destination. |
| routeTable []tcpip.Route |
| |
| *ports.PortManager |
| |
| // If not nil, then any new endpoints will have this probe function |
| // invoked everytime they receive a TCP segment. |
| tcpProbeFunc TCPProbeFunc |
| |
| // clock is used to generate user-visible times. |
| clock tcpip.Clock |
| |
| // handleLocal allows non-loopback interfaces to loop packets. |
| handleLocal bool |
| } |
| |
| // Options contains optional Stack configuration. |
| type Options struct { |
| // Clock is an optional clock source used for timestampping packets. |
| // |
| // If no Clock is specified, the clock source will be time.Now. |
| Clock tcpip.Clock |
| |
| // Stats are optional statistic counters. |
| Stats tcpip.Stats |
| |
| // HandleLocal indicates whether packets destined to their source |
| // should be handled by the stack internally (true) or outside the |
| // stack (false). |
| HandleLocal bool |
| |
| // Raw indicates whether raw sockets may be created. |
| Raw bool |
| } |
| |
| // New allocates a new networking stack with only the requested networking and |
| // transport protocols configured with default options. |
| // |
| // Protocol options can be changed by calling the |
| // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the |
| // stack. Please refer to individual protocol implementations as to what options |
| // are supported. |
| func New(network []string, transport []string, opts Options) *Stack { |
| clock := opts.Clock |
| if clock == nil { |
| clock = &tcpip.StdClock{} |
| } |
| |
| s := &Stack{ |
| transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState), |
| networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol), |
| linkAddrResolvers: make(map[tcpip.NetworkProtocolNumber]LinkAddressResolver), |
| nics: make(map[tcpip.NICID]*NIC), |
| linkAddrCache: newLinkAddrCache(ageLimit, resolutionTimeout, resolutionAttempts), |
| PortManager: ports.NewPortManager(), |
| clock: clock, |
| stats: opts.Stats.FillIn(), |
| handleLocal: opts.HandleLocal, |
| raw: opts.Raw, |
| } |
| |
| // Add specified network protocols. |
| for _, name := range network { |
| netProtoFactory, ok := networkProtocols[name] |
| if !ok { |
| continue |
| } |
| netProto := netProtoFactory() |
| s.networkProtocols[netProto.Number()] = netProto |
| if r, ok := netProto.(LinkAddressResolver); ok { |
| s.linkAddrResolvers[r.LinkAddressProtocol()] = r |
| } |
| } |
| |
| // Add specified transport protocols. |
| for _, name := range transport { |
| transProtoFactory, ok := transportProtocols[name] |
| if !ok { |
| continue |
| } |
| transProto := transProtoFactory() |
| s.transportProtocols[transProto.Number()] = &transportProtocolState{ |
| proto: transProto, |
| } |
| } |
| |
| // Create the global transport demuxer. |
| s.demux = newTransportDemuxer(s) |
| |
| return s |
| } |
| |
| // SetNetworkProtocolOption allows configuring individual protocol level |
| // options. This method returns an error if the protocol is not supported or |
| // option is not supported by the protocol implementation or the provided value |
| // is incorrect. |
| func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option interface{}) *tcpip.Error { |
| netProto, ok := s.networkProtocols[network] |
| if !ok { |
| return tcpip.ErrUnknownProtocol |
| } |
| return netProto.SetOption(option) |
| } |
| |
| // NetworkProtocolOption allows retrieving individual protocol level option |
| // values. This method returns an error if the protocol is not supported or |
| // option is not supported by the protocol implementation. |
| // e.g. |
| // var v ipv4.MyOption |
| // err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v) |
| // if err != nil { |
| // ... |
| // } |
| func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option interface{}) *tcpip.Error { |
| netProto, ok := s.networkProtocols[network] |
| if !ok { |
| return tcpip.ErrUnknownProtocol |
| } |
| return netProto.Option(option) |
| } |
| |
| // SetTransportProtocolOption allows configuring individual protocol level |
| // options. This method returns an error if the protocol is not supported or |
| // option is not supported by the protocol implementation or the provided value |
| // is incorrect. |
| func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option interface{}) *tcpip.Error { |
| transProtoState, ok := s.transportProtocols[transport] |
| if !ok { |
| return tcpip.ErrUnknownProtocol |
| } |
| return transProtoState.proto.SetOption(option) |
| } |
| |
| // TransportProtocolOption allows retrieving individual protocol level option |
| // values. This method returns an error if the protocol is not supported or |
| // option is not supported by the protocol implementation. |
| // var v tcp.SACKEnabled |
| // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil { |
| // ... |
| // } |
| func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option interface{}) *tcpip.Error { |
| transProtoState, ok := s.transportProtocols[transport] |
| if !ok { |
| return tcpip.ErrUnknownProtocol |
| } |
| return transProtoState.proto.Option(option) |
| } |
| |
| // SetTransportProtocolHandler sets the per-stack default handler for the given |
| // protocol. |
| // |
| // It must be called only during initialization of the stack. Changing it as the |
| // stack is operating is not supported. |
| func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(*Route, TransportEndpointID, buffer.View, buffer.VectorisedView) bool) { |
| state := s.transportProtocols[p] |
| if state != nil { |
| state.defaultHandler = h |
| } |
| } |
| |
| // NowNanoseconds implements tcpip.Clock.NowNanoseconds. |
| func (s *Stack) NowNanoseconds() int64 { |
| return s.clock.NowNanoseconds() |
| } |
| |
| // Stats returns a mutable copy of the current stats. |
| // |
| // This is not generally exported via the public interface, but is available |
| // internally. |
| func (s *Stack) Stats() tcpip.Stats { |
| return s.stats |
| } |
| |
| // SetForwarding enables or disables the packet forwarding between NICs. |
| func (s *Stack) SetForwarding(enable bool) { |
| // TODO(igudger, bgeffon): Expose via /proc/sys/net/ipv4/ip_forward. |
| s.mu.Lock() |
| s.forwarding = enable |
| s.mu.Unlock() |
| } |
| |
| // Forwarding returns if the packet forwarding between NICs is enabled. |
| func (s *Stack) Forwarding() bool { |
| // TODO(igudger, bgeffon): Expose via /proc/sys/net/ipv4/ip_forward. |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| return s.forwarding |
| } |
| |
| // SetRouteTable assigns the route table to be used by this stack. It |
| // specifies which NIC to use for given destination address ranges. |
| func (s *Stack) SetRouteTable(table []tcpip.Route) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| s.routeTable = table |
| } |
| |
| // GetRouteTable returns the route table which is currently in use. |
| func (s *Stack) GetRouteTable() []tcpip.Route { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| return append([]tcpip.Route(nil), s.routeTable...) |
| } |
| |
| // NewEndpoint creates a new transport layer endpoint of the given protocol. |
| func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { |
| t, ok := s.transportProtocols[transport] |
| if !ok { |
| return nil, tcpip.ErrUnknownProtocol |
| } |
| |
| return t.proto.NewEndpoint(s, network, waiterQueue) |
| } |
| |
| // NewRawEndpoint creates a new raw transport layer endpoint of the given |
| // protocol. Raw endpoints receive all traffic for a given protocol regardless |
| // of address. |
| func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { |
| if !s.raw { |
| return nil, tcpip.ErrNotPermitted |
| } |
| |
| t, ok := s.transportProtocols[transport] |
| if !ok { |
| return nil, tcpip.ErrUnknownProtocol |
| } |
| |
| return t.proto.NewRawEndpoint(s, network, waiterQueue) |
| } |
| |
| // createNIC creates a NIC with the provided id and link-layer endpoint, and |
| // optionally enable it. |
| func (s *Stack) createNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID, enabled, loopback bool) *tcpip.Error { |
| ep := FindLinkEndpoint(linkEP) |
| if ep == nil { |
| return tcpip.ErrBadLinkEndpoint |
| } |
| |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| // Make sure id is unique. |
| if _, ok := s.nics[id]; ok { |
| return tcpip.ErrDuplicateNICID |
| } |
| |
| n := newNIC(s, id, name, ep, loopback) |
| |
| s.nics[id] = n |
| if enabled { |
| n.attachLinkEndpoint() |
| } |
| |
| return nil |
| } |
| |
| // CreateNIC creates a NIC with the provided id and link-layer endpoint. |
| func (s *Stack) CreateNIC(id tcpip.NICID, linkEP tcpip.LinkEndpointID) *tcpip.Error { |
| return s.createNIC(id, "", linkEP, true, false) |
| } |
| |
| // CreateNamedNIC creates a NIC with the provided id and link-layer endpoint, |
| // and a human-readable name. |
| func (s *Stack) CreateNamedNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID) *tcpip.Error { |
| return s.createNIC(id, name, linkEP, true, false) |
| } |
| |
| // CreateNamedLoopbackNIC creates a NIC with the provided id and link-layer |
| // endpoint, and a human-readable name. |
| func (s *Stack) CreateNamedLoopbackNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID) *tcpip.Error { |
| return s.createNIC(id, name, linkEP, true, true) |
| } |
| |
| // CreateDisabledNIC creates a NIC with the provided id and link-layer endpoint, |
| // but leave it disable. Stack.EnableNIC must be called before the link-layer |
| // endpoint starts delivering packets to it. |
| func (s *Stack) CreateDisabledNIC(id tcpip.NICID, linkEP tcpip.LinkEndpointID) *tcpip.Error { |
| return s.createNIC(id, "", linkEP, false, false) |
| } |
| |
| // CreateDisabledNamedNIC is a combination of CreateNamedNIC and |
| // CreateDisabledNIC. |
| func (s *Stack) CreateDisabledNamedNIC(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID) *tcpip.Error { |
| return s.createNIC(id, name, linkEP, false, false) |
| } |
| |
| // EnableNIC enables the given NIC so that the link-layer endpoint can start |
| // delivering packets to it. |
| func (s *Stack) EnableNIC(id tcpip.NICID) *tcpip.Error { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nic := s.nics[id] |
| if nic == nil { |
| return tcpip.ErrUnknownNICID |
| } |
| |
| nic.attachLinkEndpoint() |
| |
| return nil |
| } |
| |
| // CheckNIC checks if a NIC is usable. |
| func (s *Stack) CheckNIC(id tcpip.NICID) bool { |
| s.mu.RLock() |
| nic, ok := s.nics[id] |
| s.mu.RUnlock() |
| if ok { |
| return nic.linkEP.IsAttached() |
| } |
| return false |
| } |
| |
| // NICSubnets returns a map of NICIDs to their associated subnets. |
| func (s *Stack) NICSubnets() map[tcpip.NICID][]tcpip.Subnet { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nics := map[tcpip.NICID][]tcpip.Subnet{} |
| |
| for id, nic := range s.nics { |
| nics[id] = append(nics[id], nic.Subnets()...) |
| } |
| return nics |
| } |
| |
| // NICInfo captures the name and addresses assigned to a NIC. |
| type NICInfo struct { |
| Name string |
| LinkAddress tcpip.LinkAddress |
| ProtocolAddresses []tcpip.ProtocolAddress |
| |
| // Flags indicate the state of the NIC. |
| Flags NICStateFlags |
| |
| // MTU is the maximum transmission unit. |
| MTU uint32 |
| |
| Stats NICStats |
| } |
| |
| // NICInfo returns a map of NICIDs to their associated information. |
| func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nics := make(map[tcpip.NICID]NICInfo) |
| for id, nic := range s.nics { |
| flags := NICStateFlags{ |
| Up: true, // Netstack interfaces are always up. |
| Running: nic.linkEP.IsAttached(), |
| Promiscuous: nic.isPromiscuousMode(), |
| Loopback: nic.linkEP.Capabilities()&CapabilityLoopback != 0, |
| } |
| nics[id] = NICInfo{ |
| Name: nic.name, |
| LinkAddress: nic.linkEP.LinkAddress(), |
| ProtocolAddresses: nic.Addresses(), |
| Flags: flags, |
| MTU: nic.linkEP.MTU(), |
| Stats: nic.stats, |
| } |
| } |
| return nics |
| } |
| |
| // NICStateFlags holds information about the state of an NIC. |
| type NICStateFlags struct { |
| // Up indicates whether the interface is running. |
| Up bool |
| |
| // Running indicates whether resources are allocated. |
| Running bool |
| |
| // Promiscuous indicates whether the interface is in promiscuous mode. |
| Promiscuous bool |
| |
| // Loopback indicates whether the interface is a loopback. |
| Loopback bool |
| } |
| |
| // AddAddress adds a new network-layer address to the specified NIC. |
| func (s *Stack) AddAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error { |
| return s.AddAddressWithOptions(id, protocol, addr, CanBePrimaryEndpoint) |
| } |
| |
| // AddAddressWithOptions is the same as AddAddress, but allows you to specify |
| // whether the new endpoint can be primary or not. |
| func (s *Stack) AddAddressWithOptions(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, peb PrimaryEndpointBehavior) *tcpip.Error { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nic := s.nics[id] |
| if nic == nil { |
| return tcpip.ErrUnknownNICID |
| } |
| |
| return nic.AddAddressWithOptions(protocol, addr, peb) |
| } |
| |
| // AddSubnet adds a subnet range to the specified NIC. |
| func (s *Stack) AddSubnet(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) *tcpip.Error { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| if nic, ok := s.nics[id]; ok { |
| nic.AddSubnet(protocol, subnet) |
| return nil |
| } |
| |
| return tcpip.ErrUnknownNICID |
| } |
| |
| // RemoveSubnet removes the subnet range from the specified NIC. |
| func (s *Stack) RemoveSubnet(id tcpip.NICID, subnet tcpip.Subnet) *tcpip.Error { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| if nic, ok := s.nics[id]; ok { |
| nic.RemoveSubnet(subnet) |
| return nil |
| } |
| |
| return tcpip.ErrUnknownNICID |
| } |
| |
| // ContainsSubnet reports whether the specified NIC contains the specified |
| // subnet. |
| func (s *Stack) ContainsSubnet(id tcpip.NICID, subnet tcpip.Subnet) (bool, *tcpip.Error) { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| if nic, ok := s.nics[id]; ok { |
| return nic.ContainsSubnet(subnet), nil |
| } |
| |
| return false, tcpip.ErrUnknownNICID |
| } |
| |
| // RemoveAddress removes an existing network-layer address from the specified |
| // NIC. |
| func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) *tcpip.Error { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| if nic, ok := s.nics[id]; ok { |
| return nic.RemoveAddress(addr) |
| } |
| |
| return tcpip.ErrUnknownNICID |
| } |
| |
| // GetMainNICAddress returns the first primary address (and the subnet that |
| // contains it) for the given NIC and protocol. Returns an arbitrary endpoint's |
| // address if no primary addresses exist. Returns an error if the NIC doesn't |
| // exist or has no endpoints. |
| func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.Address, tcpip.Subnet, *tcpip.Error) { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| if nic, ok := s.nics[id]; ok { |
| return nic.getMainNICAddress(protocol) |
| } |
| |
| return "", tcpip.Subnet{}, tcpip.ErrUnknownNICID |
| } |
| |
| func (s *Stack) getRefEP(nic *NIC, localAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) (ref *referencedNetworkEndpoint) { |
| if len(localAddr) == 0 { |
| return nic.primaryEndpoint(netProto) |
| } |
| return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint) |
| } |
| |
| // FindRoute creates a route to the given destination address, leaving through |
| // the given nic and local address (if provided). |
| func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (Route, *tcpip.Error) { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| isBroadcast := remoteAddr == header.IPv4Broadcast |
| isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr) |
| needRoute := !(isBroadcast || isMulticast || header.IsV6LinkLocalAddress(remoteAddr)) |
| if id != 0 && !needRoute { |
| if nic, ok := s.nics[id]; ok { |
| if ref := s.getRefEP(nic, localAddr, netProto); ref != nil { |
| return makeRoute(netProto, ref.ep.ID().LocalAddress, remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.loopback, multicastLoop && !nic.loopback), nil |
| } |
| } |
| } else { |
| for _, route := range s.routeTable { |
| if (id != 0 && id != route.NIC) || (len(remoteAddr) != 0 && !route.Match(remoteAddr)) { |
| continue |
| } |
| if nic, ok := s.nics[route.NIC]; ok { |
| if ref := s.getRefEP(nic, localAddr, netProto); ref != nil { |
| if len(remoteAddr) == 0 { |
| // If no remote address was provided, then the route |
| // provided will refer to the link local address. |
| remoteAddr = ref.ep.ID().LocalAddress |
| } |
| |
| r := makeRoute(netProto, ref.ep.ID().LocalAddress, remoteAddr, nic.linkEP.LinkAddress(), ref, s.handleLocal && !nic.loopback, multicastLoop && !nic.loopback) |
| if needRoute { |
| r.NextHop = route.Gateway |
| } |
| return r, nil |
| } |
| } |
| } |
| } |
| |
| if !needRoute { |
| return Route{}, tcpip.ErrNetworkUnreachable |
| } |
| |
| return Route{}, tcpip.ErrNoRoute |
| } |
| |
| // CheckNetworkProtocol checks if a given network protocol is enabled in the |
| // stack. |
| func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool { |
| _, ok := s.networkProtocols[protocol] |
| return ok |
| } |
| |
| // CheckLocalAddress determines if the given local address exists, and if it |
| // does, returns the id of the NIC it's bound to. Returns 0 if the address |
| // does not exist. |
| func (s *Stack) CheckLocalAddress(nicid tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| // If a NIC is specified, we try to find the address there only. |
| if nicid != 0 { |
| nic := s.nics[nicid] |
| if nic == nil { |
| return 0 |
| } |
| |
| ref := nic.findEndpoint(protocol, addr, CanBePrimaryEndpoint) |
| if ref == nil { |
| return 0 |
| } |
| |
| ref.decRef() |
| |
| return nic.id |
| } |
| |
| // Go through all the NICs. |
| for _, nic := range s.nics { |
| ref := nic.findEndpoint(protocol, addr, CanBePrimaryEndpoint) |
| if ref != nil { |
| ref.decRef() |
| return nic.id |
| } |
| } |
| |
| return 0 |
| } |
| |
| // SetPromiscuousMode enables or disables promiscuous mode in the given NIC. |
| func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) *tcpip.Error { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nic := s.nics[nicID] |
| if nic == nil { |
| return tcpip.ErrUnknownNICID |
| } |
| |
| nic.setPromiscuousMode(enable) |
| |
| return nil |
| } |
| |
| // SetSpoofing enables or disables address spoofing in the given NIC, allowing |
| // endpoints to bind to any address in the NIC. |
| func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) *tcpip.Error { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nic := s.nics[nicID] |
| if nic == nil { |
| return tcpip.ErrUnknownNICID |
| } |
| |
| nic.setSpoofing(enable) |
| |
| return nil |
| } |
| |
| // AddLinkAddress adds a link address to the stack link cache. |
| func (s *Stack) AddLinkAddress(nicid tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress) { |
| fullAddr := tcpip.FullAddress{NIC: nicid, Addr: addr} |
| s.linkAddrCache.add(fullAddr, linkAddr) |
| // TODO: provide a way for a transport endpoint to receive a signal |
| // that AddLinkAddress for a particular address has been called. |
| } |
| |
| // GetLinkAddress implements LinkAddressCache.GetLinkAddress. |
| func (s *Stack) GetLinkAddress(nicid tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, waker *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) { |
| s.mu.RLock() |
| nic := s.nics[nicid] |
| if nic == nil { |
| s.mu.RUnlock() |
| return "", nil, tcpip.ErrUnknownNICID |
| } |
| s.mu.RUnlock() |
| |
| fullAddr := tcpip.FullAddress{NIC: nicid, Addr: addr} |
| linkRes := s.linkAddrResolvers[protocol] |
| return s.linkAddrCache.get(fullAddr, linkRes, localAddr, nic.linkEP, waker) |
| } |
| |
| // RemoveWaker implements LinkAddressCache.RemoveWaker. |
| func (s *Stack) RemoveWaker(nicid tcpip.NICID, addr tcpip.Address, waker *sleep.Waker) { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| if nic := s.nics[nicid]; nic == nil { |
| fullAddr := tcpip.FullAddress{NIC: nicid, Addr: addr} |
| s.linkAddrCache.removeWaker(fullAddr, waker) |
| } |
| } |
| |
| // RegisterTransportEndpoint registers the given endpoint with the stack |
| // transport dispatcher. Received packets that match the provided id will be |
| // delivered to the given endpoint; specifying a nic is optional, but |
| // nic-specific IDs have precedence over global ones. |
| func (s *Stack) RegisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, reusePort bool) *tcpip.Error { |
| if nicID == 0 { |
| return s.demux.registerEndpoint(netProtos, protocol, id, ep, reusePort) |
| } |
| |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nic := s.nics[nicID] |
| if nic == nil { |
| return tcpip.ErrUnknownNICID |
| } |
| |
| return nic.demux.registerEndpoint(netProtos, protocol, id, ep, reusePort) |
| } |
| |
| // UnregisterTransportEndpoint removes the endpoint with the given id from the |
| // stack transport dispatcher. |
| func (s *Stack) UnregisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint) { |
| if nicID == 0 { |
| s.demux.unregisterEndpoint(netProtos, protocol, id, ep) |
| return |
| } |
| |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nic := s.nics[nicID] |
| if nic != nil { |
| nic.demux.unregisterEndpoint(netProtos, protocol, id, ep) |
| } |
| } |
| |
| // RegisterRawTransportEndpoint registers the given endpoint with the stack |
| // transport dispatcher. Received packets that match the provided transport |
| // protocol will be delivered to the given endpoint. |
| func (s *Stack) RegisterRawTransportEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) *tcpip.Error { |
| if nicID == 0 { |
| return s.demux.registerRawEndpoint(netProto, transProto, ep) |
| } |
| |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nic := s.nics[nicID] |
| if nic == nil { |
| return tcpip.ErrUnknownNICID |
| } |
| |
| return nic.demux.registerRawEndpoint(netProto, transProto, ep) |
| } |
| |
| // UnregisterRawTransportEndpoint removes the endpoint for the transport |
| // protocol from the stack transport dispatcher. |
| func (s *Stack) UnregisterRawTransportEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) { |
| if nicID == 0 { |
| s.demux.unregisterRawEndpoint(netProto, transProto, ep) |
| return |
| } |
| |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| nic := s.nics[nicID] |
| if nic != nil { |
| nic.demux.unregisterRawEndpoint(netProto, transProto, ep) |
| } |
| } |
| |
| // NetworkProtocolInstance returns the protocol instance in the stack for the |
| // specified network protocol. This method is public for protocol implementers |
| // and tests to use. |
| func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol { |
| if p, ok := s.networkProtocols[num]; ok { |
| return p |
| } |
| return nil |
| } |
| |
| // TransportProtocolInstance returns the protocol instance in the stack for the |
| // specified transport protocol. This method is public for protocol implementers |
| // and tests to use. |
| func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol { |
| if pState, ok := s.transportProtocols[num]; ok { |
| return pState.proto |
| } |
| return nil |
| } |
| |
| // AddTCPProbe installs a probe function that will be invoked on every segment |
| // received by a given TCP endpoint. The probe function is passed a copy of the |
| // TCP endpoint state before and after processing of the segment. |
| // |
| // NOTE: TCPProbe is added only to endpoints created after this call. Endpoints |
| // created prior to this call will not call the probe function. |
| // |
| // Further, installing two different probes back to back can result in some |
| // endpoints calling the first one and some the second one. There is no |
| // guarantee provided on which probe will be invoked. Ideally this should only |
| // be called once per stack. |
| func (s *Stack) AddTCPProbe(probe TCPProbeFunc) { |
| s.mu.Lock() |
| s.tcpProbeFunc = probe |
| s.mu.Unlock() |
| } |
| |
| // GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil |
| // otherwise. |
| func (s *Stack) GetTCPProbe() TCPProbeFunc { |
| s.mu.Lock() |
| p := s.tcpProbeFunc |
| s.mu.Unlock() |
| return p |
| } |
| |
| // RemoveTCPProbe removes an installed TCP probe. |
| // |
| // NOTE: This only ensures that endpoints created after this call do not |
| // have a probe attached. Endpoints already created will continue to invoke |
| // TCP probe. |
| func (s *Stack) RemoveTCPProbe() { |
| s.mu.Lock() |
| s.tcpProbeFunc = nil |
| s.mu.Unlock() |
| } |
| |
| // JoinGroup joins the given multicast group on the given NIC. |
| func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) *tcpip.Error { |
| // TODO: notify network of subscription via igmp protocol. |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| if nic, ok := s.nics[nicID]; ok { |
| return nic.joinGroup(protocol, multicastAddr) |
| } |
| return tcpip.ErrUnknownNICID |
| } |
| |
| // LeaveGroup leaves the given multicast group on the given NIC. |
| func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) *tcpip.Error { |
| s.mu.RLock() |
| defer s.mu.RUnlock() |
| |
| if nic, ok := s.nics[nicID]; ok { |
| return nic.leaveGroup(multicastAddr) |
| } |
| return tcpip.ErrUnknownNICID |
| } |