blob: 9be5cc629a01a748f89939bf0e18ce1e6c1b4bcf [file] [log] [blame]
// Copyright 2016 The Netstack Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stack
import (
"strings"
"sync"
"sync/atomic"
"github.com/google/netstack/ilist"
"github.com/google/netstack/tcpip"
"github.com/google/netstack/tcpip/buffer"
"github.com/google/netstack/tcpip/header"
)
// NIC represents a "network interface card" to which the networking stack is
// attached.
type NIC struct {
stack *Stack
id tcpip.NICID
linkEP LinkEndpoint
demux *transportDemuxer
mu sync.RWMutex
promiscuous bool
primary map[tcpip.NetworkProtocolNumber]*ilist.List
endpoints map[NetworkEndpointID]*referencedNetworkEndpoint
subnets []tcpip.Subnet
}
// PrimaryEndpointBehavior specifies how a new address should behave as a primary endpoint.
type PrimaryEndpointBehavior int
const (
// CanBePrimaryEndpoint indicates the endpoint can be used as a primary
// endpoint for new connections with no local address. This is the
// default when calling NIC.AddAddress.
CanBePrimaryEndpoint PrimaryEndpointBehavior = iota
// FirstPrimaryEndpoint indicates the endpoint should be the first
// primary endpoint considered. If there are multiple endpoints with
// this behavior, the most recently-added one will be first.
FirstPrimaryEndpoint
// NeverPrimaryEndpoint indicates the endpoint should never be a
// primary endpoint.
NeverPrimaryEndpoint
)
func newNIC(stack *Stack, id tcpip.NICID, ep LinkEndpoint) *NIC {
return &NIC{
stack: stack,
id: id,
linkEP: ep,
demux: newTransportDemuxer(stack),
primary: make(map[tcpip.NetworkProtocolNumber]*ilist.List),
endpoints: make(map[NetworkEndpointID]*referencedNetworkEndpoint),
}
}
// attachLinkEndpoint attaches the NIC to the endpoint, which will enable it
// to start delivering packets.
func (n *NIC) attachLinkEndpoint() {
n.linkEP.Attach(n)
}
// setPromiscuousMode enables or disables promiscuous mode.
func (n *NIC) setPromiscuousMode(enable bool) {
n.mu.Lock()
n.promiscuous = enable
n.mu.Unlock()
}
// Get the primary network endpoint, if there is one; otherwise pick an arbitrary endpoint from the NIC's endpoints.
func (n *NIC) getMainNICAddress(protocol tcpip.NetworkProtocolNumber) (tcpip.Address, tcpip.Subnet) {
n.mu.RLock()
defer n.mu.RUnlock()
var address tcpip.Address
var subnet tcpip.Subnet
// Check for a primary endpoint.
var r *referencedNetworkEndpoint
list := n.primary[protocol]
if list != nil {
for e := list.Front(); e != nil; e = e.Next() {
ref := e.(*referencedNetworkEndpoint)
if ref.holdsInsertRef && ref.tryIncRef() {
r = ref
break
}
}
}
// If no primary endpoints then check for other endpoints.
if r == nil {
for _, ref := range n.endpoints {
if ref != nil && ref.holdsInsertRef && ref.tryIncRef() {
r = ref
break
}
}
}
if r != nil {
address = r.ep.ID().LocalAddress
r.decRef()
}
// Find the least-constrained matching subnet for the address, if one exists, and return it
if address != "" {
for _, s := range n.subnets {
if s.Contains(address) && !subnet.Contains(s.ID()) {
subnet = s
}
}
}
return address, subnet
}
// primaryEndpoint returns the primary endpoint of n for the given network
// protocol.
func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber) *referencedNetworkEndpoint {
n.mu.RLock()
defer n.mu.RUnlock()
list := n.primary[protocol]
if list == nil {
return nil
}
for e := list.Front(); e != nil; e = e.Next() {
r := e.(*referencedNetworkEndpoint)
if r.tryIncRef() {
return r
}
}
return nil
}
// findEndpoint finds the endpoint, if any, with the given address.
func (n *NIC) findEndpoint(address tcpip.Address) *referencedNetworkEndpoint {
n.mu.RLock()
defer n.mu.RUnlock()
ref := n.endpoints[NetworkEndpointID{address}]
if ref == nil || !ref.tryIncRef() {
return nil
}
return ref
}
func (n *NIC) addAddressLocked(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, peb PrimaryEndpointBehavior, replace bool) (*referencedNetworkEndpoint, *tcpip.Error) {
netProto, ok := n.stack.networkProtocols[protocol]
if !ok {
return nil, tcpip.ErrUnknownProtocol
}
// Create the new network endpoint.
ep, err := netProto.NewEndpoint(n.id, addr, n.stack, n, n.linkEP)
if err != nil {
return nil, err
}
id := *ep.ID()
if ref, ok := n.endpoints[id]; ok {
if !replace {
return nil, tcpip.ErrDuplicateAddress
}
n.removeEndpointLocked(ref)
}
ref := &referencedNetworkEndpoint{
refs: 1,
ep: ep,
nic: n,
protocol: protocol,
holdsInsertRef: true,
}
if linkRes := n.stack.linkAddrResolvers[protocol]; linkRes != nil {
ref.linkRes = linkRes
ref.linkCache = n.stack
ref.linkEP = n.linkEP
}
n.endpoints[id] = ref
l, ok := n.primary[protocol]
if !ok {
l = &ilist.List{}
n.primary[protocol] = l
}
switch peb {
case CanBePrimaryEndpoint:
l.PushBack(ref)
case FirstPrimaryEndpoint:
l.PushFront(ref)
}
return ref, nil
}
// AddAddress adds a new address to n, so that it starts accepting packets
// targeted at the given address (and network protocol).
func (n *NIC) AddAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
return n.AddAddressWithOptions(protocol, addr, CanBePrimaryEndpoint)
}
// AddAddressWithOptions is the same as AddAddress, but allows you to specify whether they new endpoint can be primary or not.
func (n *NIC) AddAddressWithOptions(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, peb PrimaryEndpointBehavior) *tcpip.Error {
// Add the endpoint.
n.mu.Lock()
_, err := n.addAddressLocked(protocol, addr, peb, false)
n.mu.Unlock()
return err
}
// AddSubnet adds a new subnet to n, so that it starts accepting packets
// targeted at the given address and network protocol.
func (n *NIC) AddSubnet(protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) {
n.mu.Lock()
n.subnets = append(n.subnets, subnet)
n.mu.Unlock()
}
// RemoveSubnet removes the given subnet from n.
func (n *NIC) RemoveSubnet(subnet tcpip.Subnet) {
n.mu.Lock()
var filtered []tcpip.Subnet
for _, sub := range n.subnets {
if sub != subnet {
filtered = append(filtered, sub)
}
}
n.subnets = filtered
n.mu.Unlock()
return
}
func (n *NIC) ContainsSubnet(subnet tcpip.Subnet) bool {
subnets := n.Subnets()
for _, s := range subnets {
if s == subnet {
return true
}
}
return false
}
// Subnets returns the Subnets associated with this NIC.
func (n *NIC) Subnets() []tcpip.Subnet {
n.mu.RLock()
defer n.mu.RUnlock()
sns := make([]tcpip.Subnet, 0, len(n.subnets)+len(n.endpoints))
for nid := range n.endpoints {
sn, err := tcpip.NewSubnet(nid.LocalAddress, tcpip.AddressMask(strings.Repeat("\xff", len(nid.LocalAddress))))
if err != nil {
// This should never happen as the mask has been carefully crafted to
// match the address.
panic("Invalid endpoint subnet: " + err.Error())
}
sns = append(sns, sn)
}
return append(sns, n.subnets...)
}
func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) {
id := *r.ep.ID()
// Nothing to do if the reference has already been replaced with a
// different one.
if n.endpoints[id] != r {
return
}
if r.holdsInsertRef {
panic("Reference count dropped to zero before being removed")
}
delete(n.endpoints, id)
wasInList := r.Next() != nil || r.Prev() != nil || r == n.primary[r.protocol].Front()
if wasInList {
n.primary[r.protocol].Remove(r)
}
r.ep.Close()
}
func (n *NIC) removeEndpoint(r *referencedNetworkEndpoint) {
n.mu.Lock()
n.removeEndpointLocked(r)
n.mu.Unlock()
}
// RemoveAddress removes an address from n.
func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error {
n.mu.Lock()
r := n.endpoints[NetworkEndpointID{addr}]
if r == nil || !r.holdsInsertRef {
n.mu.Unlock()
return tcpip.ErrBadLocalAddress
}
r.holdsInsertRef = false
n.mu.Unlock()
r.decRef()
return nil
}
// DeliverNetworkPacket finds the appropriate network protocol endpoint and
// hands the packet over for further processing. This function is called when
// the NIC receives a packet from the physical interface.
// Note that the ownership of the slice backing vv is retained by the caller.
// This rule applies only to the slice itself, not to the items of the slice;
// the ownership of the items is not retained by the caller.
func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, _, remoteLinkAddr tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv *buffer.VectorisedView) {
netProto, ok := n.stack.networkProtocols[protocol]
if !ok {
atomic.AddUint64(&n.stack.stats.IP.PacketsDiscarded, 1)
atomic.AddUint64(&n.stack.stats.UnknownProtocolRcvdPackets, 1)
return
}
if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber {
atomic.AddUint64(&n.stack.stats.IP.PacketsReceived, 1)
}
if len(vv.First()) < netProto.MinimumPacketSize() {
atomic.AddUint64(&n.stack.stats.IP.PacketsDiscarded, 1)
atomic.AddUint64(&n.stack.stats.MalformedRcvdPackets, 1)
return
}
src, dst := netProto.ParseAddresses(vv.First())
id := NetworkEndpointID{dst}
n.mu.RLock()
ref := n.endpoints[id]
if ref != nil && !ref.tryIncRef() {
ref = nil
}
promiscuous := n.promiscuous
subnets := n.subnets
n.mu.RUnlock()
if ref == nil {
// Check if the packet is for a subnet this NIC cares about.
if !promiscuous {
for _, sn := range subnets {
if sn.Contains(dst) {
promiscuous = true
break
}
}
}
if promiscuous {
// Try again with the lock in exclusive mode. If we still can't
// get the endpoint, create a new "temporary" one. It will only
// exist while there's a route through it.
n.mu.Lock()
ref = n.endpoints[id]
if ref == nil || !ref.tryIncRef() {
ref, _ = n.addAddressLocked(protocol, dst, CanBePrimaryEndpoint, true)
if ref != nil {
ref.holdsInsertRef = false
}
}
n.mu.Unlock()
}
}
if ref == nil {
// This NIC doesn't care the packet. Find a NIC that cares about the packet and
// forward it to the NIC.
// TODO: Should forward the packet even if 'promiscuous' is enabled?
if n.stack.Forwarding() {
r, err := n.stack.FindRoute(0, "", dst, protocol)
if err != nil {
// Can't find a NIC.
atomic.AddUint64(&n.stack.stats.IP.InvalidAddressesReceived, 1)
return
}
defer r.Release()
// Found a NIC.
n2 := r.ref.nic
n2.mu.RLock()
ref := n2.endpoints[id]
if ref != nil && !ref.tryIncRef() {
ref = nil
}
n2.mu.RUnlock()
r.LocalLinkAddress = n2.linkEP.LinkAddress()
r.RemoteLinkAddress = remoteLinkAddr
if ref == nil {
// n2 doesn't have a destination endpoint.
// Send the packet out of n2.
if ep, ok := n2.linkEP.(BufferWritingLinkEndpoint); ok {
ep.WriteBuffer(&r, vv, protocol)
}
} else {
ref.ep.HandlePacket(&r, vv)
ref.decRef()
}
return
} else {
atomic.AddUint64(&n.stack.stats.IP.InvalidAddressesReceived, 1)
return
}
}
r := makeRoute(protocol, dst, src, ref)
r.LocalLinkAddress = linkEP.LinkAddress()
r.RemoteLinkAddress = remoteLinkAddr
ref.ep.HandlePacket(&r, vv)
ref.decRef()
}
// DeliverTransportPacket delivers the packets to the appropriate transport
// protocol endpoint.
func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, vv *buffer.VectorisedView) {
state, ok := n.stack.transportProtocols[protocol]
if !ok {
atomic.AddUint64(&n.stack.stats.UnknownProtocolRcvdPackets, 1)
return
}
transProto := state.proto
if len(vv.First()) < transProto.MinimumPacketSize() {
atomic.AddUint64(&n.stack.stats.MalformedRcvdPackets, 1)
return
}
srcPort, dstPort, err := transProto.ParsePorts(vv.First())
if err != nil {
atomic.AddUint64(&n.stack.stats.MalformedRcvdPackets, 1)
return
}
id := TransportEndpointID{dstPort, r.LocalAddress, srcPort, r.RemoteAddress}
if n.demux.deliverPacket(r, protocol, vv, id) {
return
}
if n.stack.demux.deliverPacket(r, protocol, vv, id) {
return
}
// Try to deliver to per-stack default handler.
if state.defaultHandler != nil {
if state.defaultHandler(r, id, vv) {
return
}
}
// We could not find an appropriate destination for this packet, so
// deliver it to the global handler.
if !transProto.HandleUnknownDestinationPacket(r, id, vv) {
atomic.AddUint64(&n.stack.stats.MalformedRcvdPackets, 1)
}
}
// ID returns the identifier of n.
func (n *NIC) ID() tcpip.NICID {
return n.id
}
type referencedNetworkEndpoint struct {
ilist.Entry
refs int32
ep NetworkEndpoint
nic *NIC
protocol tcpip.NetworkProtocolNumber
linkRes LinkAddressResolver
linkCache LinkAddressCache
linkEP LinkEndpoint
// holdsInsertRef is protected by the NIC's mutex. It indicates whether
// the reference count is biased by 1 due to the insertion of the
// endpoint. It is reset to false when RemoveAddress is called on the
// NIC.
holdsInsertRef bool
}
// decRef decrements the ref count and cleans up the endpoint once it reaches
// zero.
func (r *referencedNetworkEndpoint) decRef() {
if atomic.AddInt32(&r.refs, -1) == 0 {
r.nic.removeEndpoint(r)
}
}
// incRef increments the ref count. It must only be called when the caller is
// known to be holding a reference to the endpoint, otherwise tryIncRef should
// be used.
func (r *referencedNetworkEndpoint) incRef() {
atomic.AddInt32(&r.refs, 1)
}
// tryIncRef attempts to increment the ref count from n to n+1, but only if n is
// not zero. That is, it will increment the count if the endpoint is still
// alive, and do nothing if it has already been clean up.
func (r *referencedNetworkEndpoint) tryIncRef() bool {
for {
v := atomic.LoadInt32(&r.refs)
if v == 0 {
return false
}
if atomic.CompareAndSwapInt32(&r.refs, v, v+1) {
return true
}
}
}