| // Copyright 2016 The Netstack Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package stack |
| |
| import ( |
| "strings" |
| "sync" |
| "sync/atomic" |
| |
| "github.com/google/netstack/ilist" |
| "github.com/google/netstack/tcpip" |
| "github.com/google/netstack/tcpip/buffer" |
| "github.com/google/netstack/tcpip/header" |
| ) |
| |
| // NIC represents a "network interface card" to which the networking stack is |
| // attached. |
| type NIC struct { |
| stack *Stack |
| id tcpip.NICID |
| linkEP LinkEndpoint |
| |
| demux *transportDemuxer |
| |
| mu sync.RWMutex |
| promiscuous bool |
| primary map[tcpip.NetworkProtocolNumber]*ilist.List |
| endpoints map[NetworkEndpointID]*referencedNetworkEndpoint |
| subnets []tcpip.Subnet |
| } |
| |
| // PrimaryEndpointBehavior specifies how a new address should behave as a primary endpoint. |
| type PrimaryEndpointBehavior int |
| |
| const ( |
| // CanBePrimaryEndpoint indicates the endpoint can be used as a primary |
| // endpoint for new connections with no local address. This is the |
| // default when calling NIC.AddAddress. |
| CanBePrimaryEndpoint PrimaryEndpointBehavior = iota |
| // FirstPrimaryEndpoint indicates the endpoint should be the first |
| // primary endpoint considered. If there are multiple endpoints with |
| // this behavior, the most recently-added one will be first. |
| FirstPrimaryEndpoint |
| // NeverPrimaryEndpoint indicates the endpoint should never be a |
| // primary endpoint. |
| NeverPrimaryEndpoint |
| ) |
| |
| func newNIC(stack *Stack, id tcpip.NICID, ep LinkEndpoint) *NIC { |
| return &NIC{ |
| stack: stack, |
| id: id, |
| linkEP: ep, |
| demux: newTransportDemuxer(stack), |
| primary: make(map[tcpip.NetworkProtocolNumber]*ilist.List), |
| endpoints: make(map[NetworkEndpointID]*referencedNetworkEndpoint), |
| } |
| } |
| |
| // attachLinkEndpoint attaches the NIC to the endpoint, which will enable it |
| // to start delivering packets. |
| func (n *NIC) attachLinkEndpoint() { |
| n.linkEP.Attach(n) |
| } |
| |
| // setPromiscuousMode enables or disables promiscuous mode. |
| func (n *NIC) setPromiscuousMode(enable bool) { |
| n.mu.Lock() |
| n.promiscuous = enable |
| n.mu.Unlock() |
| } |
| |
| // Get the primary network endpoint, if there is one; otherwise pick an arbitrary endpoint from the NIC's endpoints. |
| func (n *NIC) getMainNICAddress(protocol tcpip.NetworkProtocolNumber) (tcpip.Address, tcpip.Subnet) { |
| n.mu.RLock() |
| defer n.mu.RUnlock() |
| |
| var address tcpip.Address |
| var subnet tcpip.Subnet |
| |
| // Check for a primary endpoint. |
| var r *referencedNetworkEndpoint |
| list := n.primary[protocol] |
| if list != nil { |
| for e := list.Front(); e != nil; e = e.Next() { |
| ref := e.(*referencedNetworkEndpoint) |
| if ref.holdsInsertRef && ref.tryIncRef() { |
| r = ref |
| break |
| } |
| } |
| |
| } |
| |
| // If no primary endpoints then check for other endpoints. |
| if r == nil { |
| for _, ref := range n.endpoints { |
| if ref != nil && ref.holdsInsertRef && ref.tryIncRef() { |
| r = ref |
| break |
| } |
| } |
| } |
| |
| if r != nil { |
| address = r.ep.ID().LocalAddress |
| r.decRef() |
| } |
| |
| // Find the least-constrained matching subnet for the address, if one exists, and return it |
| if address != "" { |
| for _, s := range n.subnets { |
| if s.Contains(address) && !subnet.Contains(s.ID()) { |
| subnet = s |
| } |
| } |
| } |
| |
| return address, subnet |
| } |
| |
| // primaryEndpoint returns the primary endpoint of n for the given network |
| // protocol. |
| func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber) *referencedNetworkEndpoint { |
| n.mu.RLock() |
| defer n.mu.RUnlock() |
| |
| list := n.primary[protocol] |
| if list == nil { |
| return nil |
| } |
| |
| for e := list.Front(); e != nil; e = e.Next() { |
| r := e.(*referencedNetworkEndpoint) |
| if r.tryIncRef() { |
| return r |
| } |
| } |
| |
| return nil |
| } |
| |
| // findEndpoint finds the endpoint, if any, with the given address. |
| func (n *NIC) findEndpoint(address tcpip.Address) *referencedNetworkEndpoint { |
| n.mu.RLock() |
| defer n.mu.RUnlock() |
| |
| ref := n.endpoints[NetworkEndpointID{address}] |
| if ref == nil || !ref.tryIncRef() { |
| return nil |
| } |
| |
| return ref |
| } |
| |
| func (n *NIC) addAddressLocked(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, peb PrimaryEndpointBehavior, replace bool) (*referencedNetworkEndpoint, *tcpip.Error) { |
| netProto, ok := n.stack.networkProtocols[protocol] |
| if !ok { |
| return nil, tcpip.ErrUnknownProtocol |
| } |
| |
| // Create the new network endpoint. |
| ep, err := netProto.NewEndpoint(n.id, addr, n.stack, n, n.linkEP) |
| if err != nil { |
| return nil, err |
| } |
| |
| id := *ep.ID() |
| if ref, ok := n.endpoints[id]; ok { |
| if !replace { |
| return nil, tcpip.ErrDuplicateAddress |
| } |
| |
| n.removeEndpointLocked(ref) |
| } |
| |
| ref := &referencedNetworkEndpoint{ |
| refs: 1, |
| ep: ep, |
| nic: n, |
| protocol: protocol, |
| holdsInsertRef: true, |
| } |
| if linkRes := n.stack.linkAddrResolvers[protocol]; linkRes != nil { |
| ref.linkRes = linkRes |
| ref.linkCache = n.stack |
| ref.linkEP = n.linkEP |
| } |
| |
| n.endpoints[id] = ref |
| |
| l, ok := n.primary[protocol] |
| if !ok { |
| l = &ilist.List{} |
| n.primary[protocol] = l |
| } |
| |
| switch peb { |
| case CanBePrimaryEndpoint: |
| l.PushBack(ref) |
| case FirstPrimaryEndpoint: |
| l.PushFront(ref) |
| } |
| |
| return ref, nil |
| } |
| |
| // AddAddress adds a new address to n, so that it starts accepting packets |
| // targeted at the given address (and network protocol). |
| func (n *NIC) AddAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error { |
| return n.AddAddressWithOptions(protocol, addr, CanBePrimaryEndpoint) |
| } |
| |
| // AddAddressWithOptions is the same as AddAddress, but allows you to specify whether they new endpoint can be primary or not. |
| func (n *NIC) AddAddressWithOptions(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, peb PrimaryEndpointBehavior) *tcpip.Error { |
| // Add the endpoint. |
| n.mu.Lock() |
| _, err := n.addAddressLocked(protocol, addr, peb, false) |
| n.mu.Unlock() |
| |
| return err |
| } |
| |
| // AddSubnet adds a new subnet to n, so that it starts accepting packets |
| // targeted at the given address and network protocol. |
| func (n *NIC) AddSubnet(protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) { |
| n.mu.Lock() |
| n.subnets = append(n.subnets, subnet) |
| n.mu.Unlock() |
| } |
| |
| // RemoveSubnet removes the given subnet from n. |
| func (n *NIC) RemoveSubnet(subnet tcpip.Subnet) { |
| n.mu.Lock() |
| |
| var filtered []tcpip.Subnet |
| for _, sub := range n.subnets { |
| if sub != subnet { |
| filtered = append(filtered, sub) |
| } |
| } |
| |
| n.subnets = filtered |
| n.mu.Unlock() |
| return |
| } |
| |
| func (n *NIC) ContainsSubnet(subnet tcpip.Subnet) bool { |
| subnets := n.Subnets() |
| |
| for _, s := range subnets { |
| if s == subnet { |
| return true |
| } |
| } |
| return false |
| } |
| |
| // Subnets returns the Subnets associated with this NIC. |
| func (n *NIC) Subnets() []tcpip.Subnet { |
| n.mu.RLock() |
| defer n.mu.RUnlock() |
| sns := make([]tcpip.Subnet, 0, len(n.subnets)+len(n.endpoints)) |
| for nid := range n.endpoints { |
| sn, err := tcpip.NewSubnet(nid.LocalAddress, tcpip.AddressMask(strings.Repeat("\xff", len(nid.LocalAddress)))) |
| if err != nil { |
| // This should never happen as the mask has been carefully crafted to |
| // match the address. |
| panic("Invalid endpoint subnet: " + err.Error()) |
| } |
| sns = append(sns, sn) |
| } |
| return append(sns, n.subnets...) |
| } |
| |
| func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) { |
| id := *r.ep.ID() |
| |
| // Nothing to do if the reference has already been replaced with a |
| // different one. |
| if n.endpoints[id] != r { |
| return |
| } |
| |
| if r.holdsInsertRef { |
| panic("Reference count dropped to zero before being removed") |
| } |
| |
| delete(n.endpoints, id) |
| wasInList := r.Next() != nil || r.Prev() != nil || r == n.primary[r.protocol].Front() |
| if wasInList { |
| n.primary[r.protocol].Remove(r) |
| } |
| |
| r.ep.Close() |
| } |
| |
| func (n *NIC) removeEndpoint(r *referencedNetworkEndpoint) { |
| n.mu.Lock() |
| n.removeEndpointLocked(r) |
| n.mu.Unlock() |
| } |
| |
| // RemoveAddress removes an address from n. |
| func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error { |
| n.mu.Lock() |
| r := n.endpoints[NetworkEndpointID{addr}] |
| if r == nil || !r.holdsInsertRef { |
| n.mu.Unlock() |
| return tcpip.ErrBadLocalAddress |
| } |
| |
| r.holdsInsertRef = false |
| n.mu.Unlock() |
| |
| r.decRef() |
| |
| return nil |
| } |
| |
| // DeliverNetworkPacket finds the appropriate network protocol endpoint and |
| // hands the packet over for further processing. This function is called when |
| // the NIC receives a packet from the physical interface. |
| // Note that the ownership of the slice backing vv is retained by the caller. |
| // This rule applies only to the slice itself, not to the items of the slice; |
| // the ownership of the items is not retained by the caller. |
| func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, _, remoteLinkAddr tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv *buffer.VectorisedView) { |
| netProto, ok := n.stack.networkProtocols[protocol] |
| if !ok { |
| atomic.AddUint64(&n.stack.stats.IP.PacketsDiscarded, 1) |
| atomic.AddUint64(&n.stack.stats.UnknownProtocolRcvdPackets, 1) |
| return |
| } |
| |
| if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber { |
| atomic.AddUint64(&n.stack.stats.IP.PacketsReceived, 1) |
| } |
| |
| if len(vv.First()) < netProto.MinimumPacketSize() { |
| atomic.AddUint64(&n.stack.stats.IP.PacketsDiscarded, 1) |
| atomic.AddUint64(&n.stack.stats.MalformedRcvdPackets, 1) |
| return |
| } |
| |
| src, dst := netProto.ParseAddresses(vv.First()) |
| id := NetworkEndpointID{dst} |
| |
| n.mu.RLock() |
| ref := n.endpoints[id] |
| if ref != nil && !ref.tryIncRef() { |
| ref = nil |
| } |
| promiscuous := n.promiscuous |
| subnets := n.subnets |
| n.mu.RUnlock() |
| |
| if ref == nil { |
| // Check if the packet is for a subnet this NIC cares about. |
| if !promiscuous { |
| for _, sn := range subnets { |
| if sn.Contains(dst) { |
| promiscuous = true |
| break |
| } |
| } |
| } |
| if promiscuous { |
| // Try again with the lock in exclusive mode. If we still can't |
| // get the endpoint, create a new "temporary" one. It will only |
| // exist while there's a route through it. |
| n.mu.Lock() |
| ref = n.endpoints[id] |
| if ref == nil || !ref.tryIncRef() { |
| ref, _ = n.addAddressLocked(protocol, dst, CanBePrimaryEndpoint, true) |
| if ref != nil { |
| ref.holdsInsertRef = false |
| } |
| } |
| n.mu.Unlock() |
| } |
| } |
| |
| if ref == nil { |
| // This NIC doesn't care the packet. Find a NIC that cares about the packet and |
| // forward it to the NIC. |
| // TODO: Should forward the packet even if 'promiscuous' is enabled? |
| if n.stack.Forwarding() { |
| r, err := n.stack.FindRoute(0, "", dst, protocol) |
| if err != nil { |
| // Can't find a NIC. |
| atomic.AddUint64(&n.stack.stats.IP.InvalidAddressesReceived, 1) |
| return |
| } |
| defer r.Release() |
| |
| // Found a NIC. |
| n2 := r.ref.nic |
| n2.mu.RLock() |
| ref := n2.endpoints[id] |
| if ref != nil && !ref.tryIncRef() { |
| ref = nil |
| } |
| n2.mu.RUnlock() |
| |
| r.LocalLinkAddress = n2.linkEP.LinkAddress() |
| r.RemoteLinkAddress = remoteLinkAddr |
| |
| if ref == nil { |
| // n2 doesn't have a destination endpoint. |
| // Send the packet out of n2. |
| if ep, ok := n2.linkEP.(BufferWritingLinkEndpoint); ok { |
| ep.WriteBuffer(&r, vv, protocol) |
| } |
| } else { |
| ref.ep.HandlePacket(&r, vv) |
| ref.decRef() |
| } |
| return |
| } else { |
| atomic.AddUint64(&n.stack.stats.IP.InvalidAddressesReceived, 1) |
| return |
| } |
| } |
| |
| r := makeRoute(protocol, dst, src, ref) |
| r.LocalLinkAddress = linkEP.LinkAddress() |
| r.RemoteLinkAddress = remoteLinkAddr |
| ref.ep.HandlePacket(&r, vv) |
| ref.decRef() |
| } |
| |
| // DeliverTransportPacket delivers the packets to the appropriate transport |
| // protocol endpoint. |
| func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, vv *buffer.VectorisedView) { |
| state, ok := n.stack.transportProtocols[protocol] |
| if !ok { |
| atomic.AddUint64(&n.stack.stats.UnknownProtocolRcvdPackets, 1) |
| return |
| } |
| |
| transProto := state.proto |
| if len(vv.First()) < transProto.MinimumPacketSize() { |
| atomic.AddUint64(&n.stack.stats.MalformedRcvdPackets, 1) |
| return |
| } |
| |
| srcPort, dstPort, err := transProto.ParsePorts(vv.First()) |
| if err != nil { |
| atomic.AddUint64(&n.stack.stats.MalformedRcvdPackets, 1) |
| return |
| } |
| |
| id := TransportEndpointID{dstPort, r.LocalAddress, srcPort, r.RemoteAddress} |
| if n.demux.deliverPacket(r, protocol, vv, id) { |
| return |
| } |
| if n.stack.demux.deliverPacket(r, protocol, vv, id) { |
| return |
| } |
| |
| // Try to deliver to per-stack default handler. |
| if state.defaultHandler != nil { |
| if state.defaultHandler(r, id, vv) { |
| return |
| } |
| } |
| |
| // We could not find an appropriate destination for this packet, so |
| // deliver it to the global handler. |
| if !transProto.HandleUnknownDestinationPacket(r, id, vv) { |
| atomic.AddUint64(&n.stack.stats.MalformedRcvdPackets, 1) |
| } |
| } |
| |
| // ID returns the identifier of n. |
| func (n *NIC) ID() tcpip.NICID { |
| return n.id |
| } |
| |
| type referencedNetworkEndpoint struct { |
| ilist.Entry |
| refs int32 |
| ep NetworkEndpoint |
| nic *NIC |
| protocol tcpip.NetworkProtocolNumber |
| linkRes LinkAddressResolver |
| linkCache LinkAddressCache |
| linkEP LinkEndpoint |
| |
| // holdsInsertRef is protected by the NIC's mutex. It indicates whether |
| // the reference count is biased by 1 due to the insertion of the |
| // endpoint. It is reset to false when RemoveAddress is called on the |
| // NIC. |
| holdsInsertRef bool |
| } |
| |
| // decRef decrements the ref count and cleans up the endpoint once it reaches |
| // zero. |
| func (r *referencedNetworkEndpoint) decRef() { |
| if atomic.AddInt32(&r.refs, -1) == 0 { |
| r.nic.removeEndpoint(r) |
| } |
| } |
| |
| // incRef increments the ref count. It must only be called when the caller is |
| // known to be holding a reference to the endpoint, otherwise tryIncRef should |
| // be used. |
| func (r *referencedNetworkEndpoint) incRef() { |
| atomic.AddInt32(&r.refs, 1) |
| } |
| |
| // tryIncRef attempts to increment the ref count from n to n+1, but only if n is |
| // not zero. That is, it will increment the count if the endpoint is still |
| // alive, and do nothing if it has already been clean up. |
| func (r *referencedNetworkEndpoint) tryIncRef() bool { |
| for { |
| v := atomic.LoadInt32(&r.refs) |
| if v == 0 { |
| return false |
| } |
| |
| if atomic.CompareAndSwapInt32(&r.refs, v, v+1) { |
| return true |
| } |
| } |
| } |