blob: 83e98bab97f41568c9f8e81ea352b5a029c03ae4 [file] [log] [blame]
// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package ipv6 contains the implementation of the ipv6 network protocol.
package ipv6
import (
"encoding/binary"
"fmt"
"hash/fnv"
"math"
"reflect"
"sort"
"sync/atomic"
"time"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/header/parse"
"gvisor.dev/gvisor/pkg/tcpip/network/hash"
"gvisor.dev/gvisor/pkg/tcpip/network/internal/fragmentation"
"gvisor.dev/gvisor/pkg/tcpip/network/internal/ip"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
const (
// ReassembleTimeout controls how long a fragment will be held.
// As per RFC 8200 section 4.5:
//
// If insufficient fragments are received to complete reassembly of a packet
// within 60 seconds of the reception of the first-arriving fragment of that
// packet, reassembly of that packet must be abandoned.
//
// Linux also uses 60 seconds for reassembly timeout:
// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ipv6.h#L456
ReassembleTimeout = 60 * time.Second
// ProtocolNumber is the ipv6 protocol number.
ProtocolNumber = header.IPv6ProtocolNumber
// maxPayloadSize is the maximum size that can be encoded in the 16-bit
// PayloadLength field of the ipv6 header.
maxPayloadSize = 0xffff
// DefaultTTL is the default hop limit for IPv6 Packets egressed by
// Netstack.
DefaultTTL = 64
// buckets for fragment identifiers
buckets = 2048
)
// policyTable is the default policy table defined in RFC 6724 section 2.1.
//
// A more human-readable version:
//
// Prefix Precedence Label
// ::1/128 50 0
// ::/0 40 1
// ::ffff:0:0/96 35 4
// 2002::/16 30 2
// 2001::/32 5 5
// fc00::/7 3 13
// ::/96 1 3
// fec0::/10 1 11
// 3ffe::/16 1 12
//
// The table is sorted by prefix length so longest-prefix match can be easily
// achieved.
//
// We willingly left out ::/96, fec0::/10 and 3ffe::/16 since those prefix
// assignments are deprecated.
//
// As per RFC 4291 section 2.5.5.1 (for ::/96),
//
// The "IPv4-Compatible IPv6 address" is now deprecated because the
// current IPv6 transition mechanisms no longer use these addresses.
// New or updated implementations are not required to support this
// address type.
//
// As per RFC 3879 section 4 (for fec0::/10),
//
// This document formally deprecates the IPv6 site-local unicast prefix
// defined in [RFC3513], i.e., 1111111011 binary or FEC0::/10.
//
// As per RFC 3701 section 1 (for 3ffe::/16),
//
// As clearly stated in [TEST-NEW], the addresses for the 6bone are
// temporary and will be reclaimed in the future. It further states
// that all users of these addresses (within the 3FFE::/16 prefix) will
// be required to renumber at some time in the future.
//
// and section 2,
//
// Thus after the pTLA allocation cutoff date January 1, 2004, it is
// REQUIRED that no new 6bone 3FFE pTLAs be allocated.
//
// MUST NOT BE MODIFIED.
var policyTable = [...]struct {
subnet tcpip.Subnet
label uint8
}{
// ::1/128
{
subnet: header.IPv6Loopback.WithPrefix().Subnet(),
label: 0,
},
// ::ffff:0:0/96
{
subnet: header.IPv4MappedIPv6Subnet,
label: 4,
},
// 2001::/32 (Teredo prefix as per RFC 4380 section 2.6).
{
subnet: tcpip.AddressWithPrefix{
Address: "\x20\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
PrefixLen: 32,
}.Subnet(),
label: 5,
},
// 2002::/16 (6to4 prefix as per RFC 3056 section 2).
{
subnet: tcpip.AddressWithPrefix{
Address: "\x20\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
PrefixLen: 16,
}.Subnet(),
label: 2,
},
// fc00::/7 (Unique local addresses as per RFC 4193 section 3.1).
{
subnet: tcpip.AddressWithPrefix{
Address: "\xfc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
PrefixLen: 7,
}.Subnet(),
label: 13,
},
// ::/0
{
subnet: header.IPv6EmptySubnet,
label: 1,
},
}
func getLabel(addr tcpip.Address) uint8 {
for _, p := range policyTable {
if p.subnet.Contains(addr) {
return p.label
}
}
panic(fmt.Sprintf("should have a label for address = %s", addr))
}
var _ stack.DuplicateAddressDetector = (*endpoint)(nil)
var _ stack.LinkAddressResolver = (*endpoint)(nil)
var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil)
var _ stack.GroupAddressableEndpoint = (*endpoint)(nil)
var _ stack.AddressableEndpoint = (*endpoint)(nil)
var _ stack.NetworkEndpoint = (*endpoint)(nil)
var _ stack.NDPEndpoint = (*endpoint)(nil)
var _ NDPEndpoint = (*endpoint)(nil)
type endpoint struct {
nic stack.NetworkInterface
dispatcher stack.TransportDispatcher
protocol *protocol
stack *stack.Stack
stats sharedStats
// enabled is set to 1 when the endpoint is enabled and 0 when it is
// disabled.
//
// Must be accessed using atomic operations.
enabled uint32
mu struct {
sync.RWMutex
addressableEndpointState stack.AddressableEndpointState
ndp ndpState
mld mldState
}
// dad is used to check if an arbitrary address is already assigned to some
// neighbor.
//
// Note: this is different from mu.ndp.dad which is used to perform DAD for
// addresses that are assigned to the interface. Removing an address aborts
// DAD; if we had used the same state, handlers for a removed address would
// not be called with the actual DAD result.
//
// LOCK ORDERING: mu > dad.mu.
dad struct {
mu struct {
sync.Mutex
dad ip.DAD
}
}
}
// NICNameFromID is a function that returns a stable name for the specified NIC,
// even if different NIC IDs are used to refer to the same NIC in different
// program runs. It is used when generating opaque interface identifiers (IIDs).
// If the NIC was created with a name, it is passed to NICNameFromID.
//
// NICNameFromID SHOULD return unique NIC names so unique opaque IIDs are
// generated for the same prefix on differnt NICs.
type NICNameFromID func(tcpip.NICID, string) string
// OpaqueInterfaceIdentifierOptions holds the options related to the generation
// of opaque interface indentifiers (IIDs) as defined by RFC 7217.
type OpaqueInterfaceIdentifierOptions struct {
// NICNameFromID is a function that returns a stable name for a specified NIC,
// even if the NIC ID changes over time.
//
// Must be specified to generate the opaque IID.
NICNameFromID NICNameFromID
// SecretKey is a pseudo-random number used as the secret key when generating
// opaque IIDs as defined by RFC 7217. The key SHOULD be at least
// header.OpaqueIIDSecretKeyMinBytes bytes and MUST follow minimum randomness
// requirements for security as outlined by RFC 4086. SecretKey MUST NOT
// change between program runs, unless explicitly changed.
//
// OpaqueInterfaceIdentifierOptions takes ownership of SecretKey. SecretKey
// MUST NOT be modified after Stack is created.
//
// May be nil, but a nil value is highly discouraged to maintain
// some level of randomness between nodes.
SecretKey []byte
}
// CheckDuplicateAddress implements stack.DuplicateAddressDetector.
func (e *endpoint) CheckDuplicateAddress(addr tcpip.Address, h stack.DADCompletionHandler) stack.DADCheckAddressDisposition {
e.dad.mu.Lock()
defer e.dad.mu.Unlock()
return e.dad.mu.dad.CheckDuplicateAddressLocked(addr, h)
}
// SetDADConfigurations implements stack.DuplicateAddressDetector.
func (e *endpoint) SetDADConfigurations(c stack.DADConfigurations) {
e.mu.Lock()
defer e.mu.Unlock()
e.dad.mu.Lock()
defer e.dad.mu.Unlock()
e.mu.ndp.dad.SetConfigsLocked(c)
e.dad.mu.dad.SetConfigsLocked(c)
}
// DuplicateAddressProtocol implements stack.DuplicateAddressDetector.
func (*endpoint) DuplicateAddressProtocol() tcpip.NetworkProtocolNumber {
return ProtocolNumber
}
// HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint.
func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) {
// handleControl expects the entire offending packet to be in the packet
// buffer's data field.
pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{
Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views()),
})
pkt.NICID = e.nic.ID()
pkt.NetworkProtocolNumber = ProtocolNumber
e.handleControl(&icmpv6DestinationAddressUnreachableSockError{}, pkt)
}
// onAddressAssignedLocked handles an address being assigned.
//
// Precondition: e.mu must be exclusively locked.
func (e *endpoint) onAddressAssignedLocked(addr tcpip.Address) {
// As per RFC 2710 section 3,
//
// All MLD messages described in this document are sent with a link-local
// IPv6 Source Address, ...
//
// If we just completed DAD for a link-local address, then attempt to send any
// queued MLD reports. Note, we may have sent reports already for some of the
// groups before we had a valid link-local address to use as the source for
// the MLD messages, but that was only so that MLD snooping switches are aware
// of our membership to groups - routers would not have handled those reports.
//
// As per RFC 3590 section 4,
//
// MLD Report and Done messages are sent with a link-local address as
// the IPv6 source address, if a valid address is available on the
// interface. If a valid link-local address is not available (e.g., one
// has not been configured), the message is sent with the unspecified
// address (::) as the IPv6 source address.
//
// Once a valid link-local address is available, a node SHOULD generate
// new MLD Report messages for all multicast addresses joined on the
// interface.
//
// Routers receiving an MLD Report or Done message with the unspecified
// address as the IPv6 source address MUST silently discard the packet
// without taking any action on the packets contents.
//
// Snooping switches MUST manage multicast forwarding state based on MLD
// Report and Done messages sent with the unspecified address as the
// IPv6 source address.
if header.IsV6LinkLocalAddress(addr) {
e.mu.mld.sendQueuedReports()
}
}
// InvalidateDefaultRouter implements stack.NDPEndpoint.
func (e *endpoint) InvalidateDefaultRouter(rtr tcpip.Address) {
e.mu.Lock()
defer e.mu.Unlock()
e.mu.ndp.invalidateDefaultRouter(rtr)
}
// SetNDPConfigurations implements NDPEndpoint.
func (e *endpoint) SetNDPConfigurations(c NDPConfigurations) {
c.validate()
e.mu.Lock()
defer e.mu.Unlock()
e.mu.ndp.configs = c
}
// hasTentativeAddr returns true if addr is tentative on e.
func (e *endpoint) hasTentativeAddr(addr tcpip.Address) bool {
e.mu.RLock()
addressEndpoint := e.getAddressRLocked(addr)
e.mu.RUnlock()
return addressEndpoint != nil && addressEndpoint.GetKind() == stack.PermanentTentative
}
// dupTentativeAddrDetected attempts to inform e that a tentative addr is a
// duplicate on a link.
//
// dupTentativeAddrDetected removes the tentative address if it exists. If the
// address was generated via SLAAC, an attempt is made to generate a new
// address.
func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address, holderLinkAddr tcpip.LinkAddress, nonce []byte) tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
addressEndpoint := e.getAddressRLocked(addr)
if addressEndpoint == nil {
return &tcpip.ErrBadAddress{}
}
if addressEndpoint.GetKind() != stack.PermanentTentative {
return &tcpip.ErrInvalidEndpointState{}
}
switch result := e.mu.ndp.dad.ExtendIfNonceEqualLocked(addr, nonce); result {
case ip.Extended:
// The nonce we got back was the same we sent so we know the message
// indicating a duplicate address was likely ours so do not consider
// the address duplicate here.
return nil
case ip.AlreadyExtended:
// See Extended.
//
// Our DAD message was looped back already.
return nil
case ip.NoDADStateFound:
panic(fmt.Sprintf("expected DAD state for tentative address %s", addr))
case ip.NonceDisabled:
// If nonce is disabled then we have no way to know if the packet was
// looped-back so we have to assume it indicates a duplicate address.
fallthrough
case ip.NonceNotEqual:
// If the address is a SLAAC address, do not invalidate its SLAAC prefix as an
// attempt will be made to generate a new address for it.
if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */, &stack.DADDupAddrDetected{HolderLinkAddress: holderLinkAddr}); err != nil {
return err
}
prefix := addressEndpoint.Subnet()
switch t := addressEndpoint.ConfigType(); t {
case stack.AddressConfigStatic:
case stack.AddressConfigSlaac:
e.mu.ndp.regenerateSLAACAddr(prefix)
case stack.AddressConfigSlaacTemp:
// Do not reset the generation attempts counter for the prefix as the
// temporary address is being regenerated in response to a DAD conflict.
e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
default:
panic(fmt.Sprintf("unrecognized address config type = %d", t))
}
return nil
default:
panic(fmt.Sprintf("unhandled result = %d", result))
}
}
// transitionForwarding transitions the endpoint's forwarding status to
// forwarding.
//
// Must only be called when the forwarding status changes.
func (e *endpoint) transitionForwarding(forwarding bool) {
e.mu.Lock()
defer e.mu.Unlock()
if !e.Enabled() {
return
}
if forwarding {
// When transitioning into an IPv6 router, host-only state (NDP discovered
// routers, discovered on-link prefixes, and auto-generated addresses) is
// cleaned up/invalidated and NDP router solicitations are stopped.
e.mu.ndp.stopSolicitingRouters()
e.mu.ndp.cleanupState(true /* hostOnly */)
} else {
// When transitioning into an IPv6 host, NDP router solicitations are
// started.
e.mu.ndp.startSolicitingRouters()
}
}
// Enable implements stack.NetworkEndpoint.
func (e *endpoint) Enable() tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
// If the NIC is not enabled, the endpoint can't do anything meaningful so
// don't enable the endpoint.
if !e.nic.Enabled() {
return &tcpip.ErrNotPermitted{}
}
// If the endpoint is already enabled, there is nothing for it to do.
if !e.setEnabled(true) {
return nil
}
// Groups may have been joined when the endpoint was disabled, or the
// endpoint may have left groups from the perspective of MLD when the
// endpoint was disabled. Either way, we need to let routers know to
// send us multicast traffic.
e.mu.mld.initializeAll()
// Join the IPv6 All-Nodes Multicast group if the stack is configured to
// use IPv6. This is required to ensure that this node properly receives
// and responds to the various NDP messages that are destined to the
// all-nodes multicast address. An example is the Neighbor Advertisement
// when we perform Duplicate Address Detection, or Router Advertisement
// when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861
// section 4.2 for more information.
//
// Also auto-generate an IPv6 link-local address based on the endpoint's
// link address if it is configured to do so. Note, each interface is
// required to have IPv6 link-local unicast address, as per RFC 4291
// section 2.1.
// Join the All-Nodes multicast group before starting DAD as responses to DAD
// (NDP NS) messages may be sent to the All-Nodes multicast group if the
// source address of the NDP NS is the unspecified address, as per RFC 4861
// section 7.2.4.
if err := e.joinGroupLocked(header.IPv6AllNodesMulticastAddress); err != nil {
// joinGroupLocked only returns an error if the group address is not a valid
// IPv6 multicast address.
panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv6AllNodesMulticastAddress, err))
}
// Perform DAD on the all the unicast IPv6 endpoints that are in the permanent
// state.
//
// Addresses may have aleady completed DAD but in the time since the endpoint
// was last enabled, other devices may have acquired the same addresses.
var err tcpip.Error
e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
addr := addressEndpoint.AddressWithPrefix().Address
if !header.IsV6UnicastAddress(addr) {
return true
}
switch addressEndpoint.GetKind() {
case stack.Permanent:
addressEndpoint.SetKind(stack.PermanentTentative)
fallthrough
case stack.PermanentTentative:
err = e.mu.ndp.startDuplicateAddressDetection(addr, addressEndpoint)
return err == nil
default:
return true
}
})
if err != nil {
return err
}
// Do not auto-generate an IPv6 link-local address for loopback devices.
if e.protocol.options.AutoGenLinkLocal && !e.nic.IsLoopback() {
// The valid and preferred lifetime is infinite for the auto-generated
// link-local address.
e.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime)
}
// If we are operating as a router, then do not solicit routers since we
// won't process the RAs anyway.
//
// Routers do not process Router Advertisements (RA) the same way a host
// does. That is, routers do not learn from RAs (e.g. on-link prefixes
// and default routers). Therefore, soliciting RAs from other routers on
// a link is unnecessary for routers.
if !e.protocol.Forwarding() {
e.mu.ndp.startSolicitingRouters()
}
return nil
}
// Enabled implements stack.NetworkEndpoint.
func (e *endpoint) Enabled() bool {
return e.nic.Enabled() && e.isEnabled()
}
// isEnabled returns true if the endpoint is enabled, regardless of the
// enabled status of the NIC.
func (e *endpoint) isEnabled() bool {
return atomic.LoadUint32(&e.enabled) == 1
}
// setEnabled sets the enabled status for the endpoint.
//
// Returns true if the enabled status was updated.
func (e *endpoint) setEnabled(v bool) bool {
if v {
return atomic.SwapUint32(&e.enabled, 1) == 0
}
return atomic.SwapUint32(&e.enabled, 0) == 1
}
// Disable implements stack.NetworkEndpoint.
func (e *endpoint) Disable() {
e.mu.Lock()
defer e.mu.Unlock()
e.disableLocked()
}
func (e *endpoint) disableLocked() {
if !e.Enabled() {
return
}
e.mu.ndp.stopSolicitingRouters()
// Stop DAD for all the tentative unicast addresses.
e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
if addressEndpoint.GetKind() != stack.PermanentTentative {
return true
}
addr := addressEndpoint.AddressWithPrefix().Address
if header.IsV6UnicastAddress(addr) {
e.mu.ndp.stopDuplicateAddressDetection(addr, &stack.DADAborted{})
}
return true
})
e.mu.ndp.cleanupState(false /* hostOnly */)
// The endpoint may have already left the multicast group.
switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress); err.(type) {
case nil, *tcpip.ErrBadLocalAddress:
default:
panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err))
}
// Leave groups from the perspective of MLD so that routers know that
// we are no longer interested in the group.
e.mu.mld.softLeaveAll()
if !e.setEnabled(false) {
panic("should have only done work to disable the endpoint if it was enabled")
}
}
// DefaultTTL is the default hop limit for this endpoint.
func (e *endpoint) DefaultTTL() uint8 {
return e.protocol.DefaultTTL()
}
// MTU implements stack.NetworkEndpoint.MTU. It returns the link-layer MTU minus
// the network layer max header length.
func (e *endpoint) MTU() uint32 {
networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv6MinimumSize)
if err != nil {
return 0
}
return networkMTU
}
// MaxHeaderLength returns the maximum length needed by ipv6 headers (and
// underlying protocols).
func (e *endpoint) MaxHeaderLength() uint16 {
// TODO(gvisor.dev/issues/5035): The maximum header length returned here does
// not open the possibility for the caller to know about size required for
// extension headers.
return e.nic.MaxHeaderLength() + header.IPv6MinimumSize
}
func addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, extensionHeaders header.IPv6ExtHdrSerializer) tcpip.Error {
extHdrsLen := extensionHeaders.Length()
length := pkt.Size() + extensionHeaders.Length()
if length > math.MaxUint16 {
return &tcpip.ErrMessageTooLong{}
}
ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + extHdrsLen))
ip.Encode(&header.IPv6Fields{
PayloadLength: uint16(length),
TransportProtocol: params.Protocol,
HopLimit: params.TTL,
TrafficClass: params.TOS,
SrcAddr: srcAddr,
DstAddr: dstAddr,
ExtensionHeaders: extensionHeaders,
})
pkt.NetworkProtocolNumber = ProtocolNumber
return nil
}
func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32, gso *stack.GSO) bool {
payload := pkt.TransportHeader().View().Size() + pkt.Data().Size()
return (gso == nil || gso.Type == stack.GSONone) && uint32(payload) > networkMTU
}
// handleFragments fragments pkt and calls the handler function on each
// fragment. It returns the number of fragments handled and the number of
// fragments left to be processed. The IP header must already be present in the
// original packet. The transport header protocol number is required to avoid
// parsing the IPv6 extension headers.
func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) {
networkHeader := header.IPv6(pkt.NetworkHeader().View())
// TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are
// supported for outbound packets, their length should not affect the fragment
// maximum payload length because they should only be transmitted once.
fragmentPayloadLen := (networkMTU - header.IPv6FragmentHeaderSize) &^ 7
if fragmentPayloadLen < header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit {
// We need at least 8 bytes of space left for the fragmentable part because
// the fragment payload must obviously be non-zero and must be a multiple
// of 8 as per RFC 8200 section 4.5:
// Each complete fragment, except possibly the last ("rightmost") one, is
// an integer multiple of 8 octets long.
return 0, 1, &tcpip.ErrMessageTooLong{}
}
if fragmentPayloadLen < uint32(pkt.TransportHeader().View().Size()) {
// As per RFC 8200 Section 4.5, the Transport Header is expected to be small
// enough to fit in the first fragment.
return 0, 1, &tcpip.ErrMessageTooLong{}
}
pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadLen, calculateFragmentReserve(pkt))
id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, e.protocol.hashIV)%buckets], 1)
var n int
for {
fragPkt, more := buildNextFragment(&pf, networkHeader, transProto, id)
if err := handler(fragPkt); err != nil {
return n, pf.RemainingFragmentCount() + 1, err
}
n++
if !more {
return n, pf.RemainingFragmentCount(), nil
}
}
}
// WritePacket writes a packet to the given destination address and protocol.
func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error {
if err := addIPHeader(r.LocalAddress, r.RemoteAddress, pkt, params, nil /* extensionHeaders */); err != nil {
return err
}
// iptables filtering. All packets that reach here are locally
// generated.
outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
if ok := e.protocol.stack.IPTables().Check(stack.Output, pkt, gso, r, "" /* preroutingAddr */, "" /* inNicName */, outNicName); !ok {
// iptables is telling us to drop the packet.
e.stats.ip.IPTablesOutputDropped.Increment()
return nil
}
// If the packet is manipulated as per NAT Output rules, handle packet
// based on destination address and do not send the packet to link
// layer.
//
// TODO(gvisor.dev/issue/170): We should do this for every
// packet, rather than only NATted packets, but removing this check
// short circuits broadcasts before they are sent out to other hosts.
if pkt.NatDone {
netHeader := header.IPv6(pkt.NetworkHeader().View())
if ep := e.protocol.findEndpointWithAddress(netHeader.DestinationAddress()); ep != nil {
// Since we rewrote the packet but it is being routed back to us, we
// can safely assume the checksum is valid.
ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */)
return nil
}
}
return e.writePacket(r, gso, pkt, params.Protocol, false /* headerIncluded */)
}
func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer, protocol tcpip.TransportProtocolNumber, headerIncluded bool) tcpip.Error {
if r.Loop&stack.PacketLoop != 0 {
// If the packet was generated by the stack (not a raw/packet endpoint
// where a packet may be written with the header included), then we can
// safely assume the checksum is valid.
e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */)
}
if r.Loop&stack.PacketOut == 0 {
return nil
}
stats := e.stats.ip
networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
if err != nil {
stats.OutgoingPacketErrors.Increment()
return err
}
if packetMustBeFragmented(pkt, networkMTU, gso) {
sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error {
// TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each
// fragment one by one using WritePacket() (current strategy) or if we
// want to create a PacketBufferList from the fragments and feed it to
// WritePackets(). It'll be faster but cost more memory.
return e.nic.WritePacket(r, gso, ProtocolNumber, fragPkt)
})
stats.PacketsSent.IncrementBy(uint64(sent))
stats.OutgoingPacketErrors.IncrementBy(uint64(remain))
return err
}
if err := e.nic.WritePacket(r, gso, ProtocolNumber, pkt); err != nil {
stats.OutgoingPacketErrors.Increment()
return err
}
stats.PacketsSent.Increment()
return nil
}
// WritePackets implements stack.NetworkEndpoint.WritePackets.
func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, tcpip.Error) {
if r.Loop&stack.PacketLoop != 0 {
panic("not implemented")
}
if r.Loop&stack.PacketOut == 0 {
return pkts.Len(), nil
}
stats := e.stats.ip
linkMTU := e.nic.MTU()
for pb := pkts.Front(); pb != nil; pb = pb.Next() {
if err := addIPHeader(r.LocalAddress, r.RemoteAddress, pb, params, nil /* extensionHeaders */); err != nil {
return 0, err
}
networkMTU, err := calculateNetworkMTU(linkMTU, uint32(pb.NetworkHeader().View().Size()))
if err != nil {
stats.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len()))
return 0, err
}
if packetMustBeFragmented(pb, networkMTU, gso) {
// Keep track of the packet that is about to be fragmented so it can be
// removed once the fragmentation is done.
originalPkt := pb
if _, _, err := e.handleFragments(r, gso, networkMTU, pb, params.Protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error {
// Modify the packet list in place with the new fragments.
pkts.InsertAfter(pb, fragPkt)
pb = fragPkt
return nil
}); err != nil {
stats.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len()))
return 0, err
}
// Remove the packet that was just fragmented and process the rest.
pkts.Remove(originalPkt)
}
}
// iptables filtering. All packets that reach here are locally
// generated.
outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
dropped, natPkts := e.protocol.stack.IPTables().CheckPackets(stack.Output, pkts, gso, r, "" /* inNicName */, outNicName)
stats.IPTablesOutputDropped.IncrementBy(uint64(len(dropped)))
for pkt := range dropped {
pkts.Remove(pkt)
}
// The NAT-ed packets may now be destined for us.
locallyDelivered := 0
for pkt := range natPkts {
ep := e.protocol.findEndpointWithAddress(header.IPv6(pkt.NetworkHeader().View()).DestinationAddress())
if ep == nil {
// The NAT-ed packet is still destined for some remote node.
continue
}
// Do not send the locally destined packet out the NIC.
pkts.Remove(pkt)
// Deliver the packet locally.
ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */)
locallyDelivered++
}
// The rest of the packets can be delivered to the NIC as a batch.
pktsLen := pkts.Len()
written, err := e.nic.WritePackets(r, gso, pkts, ProtocolNumber)
stats.PacketsSent.IncrementBy(uint64(written))
stats.OutgoingPacketErrors.IncrementBy(uint64(pktsLen - written))
// Dropped packets aren't errors, so include them in the return value.
return locallyDelivered + written + len(dropped), err
}
// WriteHeaderIncludedPacket implements stack.NetworkEndpoint.
func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error {
// The packet already has an IP header, but there are a few required checks.
h, ok := pkt.Data().PullUp(header.IPv6MinimumSize)
if !ok {
return &tcpip.ErrMalformedHeader{}
}
ip := header.IPv6(h)
// Always set the payload length.
pktSize := pkt.Data().Size()
ip.SetPayloadLength(uint16(pktSize - header.IPv6MinimumSize))
// Set the source address when zero.
if ip.SourceAddress() == header.IPv6Any {
ip.SetSourceAddress(r.LocalAddress)
}
// Set the destination. If the packet already included a destination, it will
// be part of the route anyways.
ip.SetDestinationAddress(r.RemoteAddress)
// Populate the packet buffer's network header and don't allow an invalid
// packet to be sent.
//
// Note that parsing only makes sure that the packet is well formed as per the
// wire format. We also want to check if the header's fields are valid before
// sending the packet.
proto, _, _, _, ok := parse.IPv6(pkt)
if !ok || !header.IPv6(pkt.NetworkHeader().View()).IsValid(pktSize) {
return &tcpip.ErrMalformedHeader{}
}
return e.writePacket(r, nil /* gso */, pkt, proto, true /* headerIncluded */)
}
// forwardPacket attempts to forward a packet to its final destination.
func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) tcpip.Error {
h := header.IPv6(pkt.NetworkHeader().View())
hopLimit := h.HopLimit()
if hopLimit <= 1 {
// As per RFC 4443 section 3.3,
//
// If a router receives a packet with a Hop Limit of zero, or if a
// router decrements a packet's Hop Limit to zero, it MUST discard the
// packet and originate an ICMPv6 Time Exceeded message with Code 0 to
// the source of the packet. This indicates either a routing loop or
// too small an initial Hop Limit value.
return e.protocol.returnError(&icmpReasonHopLimitExceeded{}, pkt)
}
dstAddr := h.DestinationAddress()
// Check if the destination is owned by the stack.
if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil {
ep.handleValidatedPacket(h, pkt)
return nil
}
r, err := e.protocol.stack.FindRoute(0, "", dstAddr, ProtocolNumber, false /* multicastLoop */)
if err != nil {
return err
}
defer r.Release()
// We need to do a deep copy of the IP packet because
// WriteHeaderIncludedPacket takes ownership of the packet buffer, but we do
// not own it.
newHdr := header.IPv6(stack.PayloadSince(pkt.NetworkHeader()))
// As per RFC 8200 section 3,
//
// Hop Limit 8-bit unsigned integer. Decremented by 1 by
// each node that forwards the packet.
newHdr.SetHopLimit(hopLimit - 1)
return r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{
ReserveHeaderBytes: int(r.MaxHeaderLength()),
Data: buffer.View(newHdr).ToVectorisedView(),
}))
}
// HandlePacket is called by the link layer when new ipv6 packets arrive for
// this endpoint.
func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
stats := e.stats.ip
stats.PacketsReceived.Increment()
if !e.isEnabled() {
stats.DisabledPacketsReceived.Increment()
return
}
h, ok := e.protocol.parseAndValidate(pkt)
if !ok {
stats.MalformedPacketsReceived.Increment()
return
}
if !e.nic.IsLoopback() {
if !e.protocol.options.AllowExternalLoopbackTraffic {
if header.IsV6LoopbackAddress(h.SourceAddress()) {
stats.InvalidSourceAddressesReceived.Increment()
return
}
if header.IsV6LoopbackAddress(h.DestinationAddress()) {
stats.InvalidDestinationAddressesReceived.Increment()
return
}
}
if e.protocol.stack.HandleLocal() {
addressEndpoint := e.AcquireAssignedAddress(header.IPv6(pkt.NetworkHeader().View()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint)
if addressEndpoint != nil {
addressEndpoint.DecRef()
// The source address is one of our own, so we never should have gotten
// a packet like this unless HandleLocal is false or our NIC is the
// loopback interface.
stats.InvalidSourceAddressesReceived.Increment()
return
}
}
// Loopback traffic skips the prerouting chain.
inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
if ok := e.protocol.stack.IPTables().Check(stack.Prerouting, pkt, nil, nil, e.MainAddress().Address, inNicName, "" /* outNicName */); !ok {
// iptables is telling us to drop the packet.
stats.IPTablesPreroutingDropped.Increment()
return
}
}
e.handleValidatedPacket(h, pkt)
}
// handleLocalPacket is like HandlePacket except it does not perform the
// prerouting iptables hook or check for loopback traffic that originated from
// outside of the netstack (i.e. martian loopback packets).
func (e *endpoint) handleLocalPacket(pkt *stack.PacketBuffer, canSkipRXChecksum bool) {
stats := e.stats.ip
stats.PacketsReceived.Increment()
pkt = pkt.CloneToInbound()
pkt.RXTransportChecksumValidated = canSkipRXChecksum
h, ok := e.protocol.parseAndValidate(pkt)
if !ok {
stats.MalformedPacketsReceived.Increment()
return
}
e.handleValidatedPacket(h, pkt)
}
func (e *endpoint) handleValidatedPacket(h header.IPv6, pkt *stack.PacketBuffer) {
pkt.NICID = e.nic.ID()
stats := e.stats.ip
srcAddr := h.SourceAddress()
dstAddr := h.DestinationAddress()
// As per RFC 4291 section 2.7:
// Multicast addresses must not be used as source addresses in IPv6
// packets or appear in any Routing header.
if header.IsV6MulticastAddress(srcAddr) {
stats.InvalidSourceAddressesReceived.Increment()
return
}
// The destination address should be an address we own or a group we joined
// for us to receive the packet. Otherwise, attempt to forward the packet.
if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil {
addressEndpoint.DecRef()
} else if !e.IsInGroup(dstAddr) {
if !e.protocol.Forwarding() {
stats.InvalidDestinationAddressesReceived.Increment()
return
}
_ = e.forwardPacket(pkt)
return
}
// Create a VV to parse the packet. We don't plan to modify anything here.
// vv consists of:
// - Any IPv6 header bytes after the first 40 (i.e. extensions).
// - The transport header, if present.
// - Any other payload data.
vv := pkt.NetworkHeader().View()[header.IPv6MinimumSize:].ToVectorisedView()
vv.AppendView(pkt.TransportHeader().View())
vv.AppendViews(pkt.Data().Views())
it := header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(h.NextHeader()), vv)
// iptables filtering. All packets that reach here are intended for
// this machine and need not be forwarded.
inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
if ok := e.protocol.stack.IPTables().Check(stack.Input, pkt, nil, nil, "" /* preroutingAddr */, inNicName, "" /* outNicName */); !ok {
// iptables is telling us to drop the packet.
stats.IPTablesInputDropped.Increment()
return
}
var (
hasFragmentHeader bool
routerAlert *header.IPv6RouterAlertOption
)
for {
// Keep track of the start of the previous header so we can report the
// special case of a Hop by Hop at a location other than at the start.
previousHeaderStart := it.HeaderOffset()
extHdr, done, err := it.Next()
if err != nil {
stats.MalformedPacketsReceived.Increment()
return
}
if done {
break
}
switch extHdr := extHdr.(type) {
case header.IPv6HopByHopOptionsExtHdr:
// As per RFC 8200 section 4.1, the Hop By Hop extension header is
// restricted to appear immediately after an IPv6 fixed header.
if previousHeaderStart != 0 {
_ = e.protocol.returnError(&icmpReasonParameterProblem{
code: header.ICMPv6UnknownHeader,
pointer: previousHeaderStart,
}, pkt)
return
}
optsIt := extHdr.Iter()
for {
opt, done, err := optsIt.Next()
if err != nil {
stats.MalformedPacketsReceived.Increment()
return
}
if done {
break
}
switch opt := opt.(type) {
case *header.IPv6RouterAlertOption:
if routerAlert != nil {
// As per RFC 2711 section 3, there should be at most one Router
// Alert option per packet.
//
// There MUST only be one option of this type, regardless of
// value, per Hop-by-Hop header.
stats.MalformedPacketsReceived.Increment()
return
}
routerAlert = opt
stats.OptionRouterAlertReceived.Increment()
default:
switch opt.UnknownAction() {
case header.IPv6OptionUnknownActionSkip:
case header.IPv6OptionUnknownActionDiscard:
return
case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
if header.IsV6MulticastAddress(dstAddr) {
return
}
fallthrough
case header.IPv6OptionUnknownActionDiscardSendICMP:
// This case satisfies a requirement of RFC 8200 section 4.2 which
// states that an unknown option starting with bits [10] should:
//
// discard the packet and, regardless of whether or not the
// packet's Destination Address was a multicast address, send an
// ICMP Parameter Problem, Code 2, message to the packet's
// Source Address, pointing to the unrecognized Option Type.
_ = e.protocol.returnError(&icmpReasonParameterProblem{
code: header.ICMPv6UnknownOption,
pointer: it.ParseOffset() + optsIt.OptionOffset(),
respondToMulticast: true,
}, pkt)
return
default:
panic(fmt.Sprintf("unrecognized action for an unrecognized Hop By Hop extension header option = %d", opt))
}
}
}
case header.IPv6RoutingExtHdr:
// As per RFC 8200 section 4.4, if a node encounters a routing header with
// an unrecognized routing type value, with a non-zero Segments Left
// value, the node must discard the packet and send an ICMP Parameter
// Problem, Code 0 to the packet's Source Address, pointing to the
// unrecognized Routing Type.
//
// If the Segments Left is 0, the node must ignore the Routing extension
// header and process the next header in the packet.
//
// Note, the stack does not yet handle any type of routing extension
// header, so we just make sure Segments Left is zero before processing
// the next extension header.
if extHdr.SegmentsLeft() != 0 {
_ = e.protocol.returnError(&icmpReasonParameterProblem{
code: header.ICMPv6ErroneousHeader,
pointer: it.ParseOffset(),
}, pkt)
return
}
case header.IPv6FragmentExtHdr:
hasFragmentHeader = true
if extHdr.IsAtomic() {
// This fragment extension header indicates that this packet is an
// atomic fragment. An atomic fragment is a fragment that contains
// all the data required to reassemble a full packet. As per RFC 6946,
// atomic fragments must not interfere with "normal" fragmented traffic
// so we skip processing the fragment instead of feeding it through the
// reassembly process below.
continue
}
fragmentFieldOffset := it.ParseOffset()
// Don't consume the iterator if we have the first fragment because we
// will use it to validate that the first fragment holds the upper layer
// header.
rawPayload := it.AsRawHeader(extHdr.FragmentOffset() != 0 /* consume */)
if extHdr.FragmentOffset() == 0 {
// Check that the iterator ends with a raw payload as the first fragment
// should include all headers up to and including any upper layer
// headers, as per RFC 8200 section 4.5; only upper layer data
// (non-headers) should follow the fragment extension header.
var lastHdr header.IPv6PayloadHeader
for {
it, done, err := it.Next()
if err != nil {
stats.MalformedPacketsReceived.Increment()
stats.MalformedFragmentsReceived.Increment()
return
}
if done {
break
}
lastHdr = it
}
// If the last header is a raw header, then the last portion of the IPv6
// payload is not a known IPv6 extension header. Note, this does not
// mean that the last portion is an upper layer header or not an
// extension header because:
// 1) we do not yet support all extension headers
// 2) we do not validate the upper layer header before reassembling.
//
// This check makes sure that a known IPv6 extension header is not
// present after the Fragment extension header in a non-initial
// fragment.
//
// TODO(#2196): Support IPv6 Authentication and Encapsulated
// Security Payload extension headers.
// TODO(#2333): Validate that the upper layer header is valid.
switch lastHdr.(type) {
case header.IPv6RawPayloadHeader:
default:
stats.MalformedPacketsReceived.Increment()
stats.MalformedFragmentsReceived.Increment()
return
}
}
fragmentPayloadLen := rawPayload.Buf.Size()
if fragmentPayloadLen == 0 {
// Drop the packet as it's marked as a fragment but has no payload.
stats.MalformedPacketsReceived.Increment()
stats.MalformedFragmentsReceived.Increment()
return
}
// As per RFC 2460 Section 4.5:
//
// If the length of a fragment, as derived from the fragment packet's
// Payload Length field, is not a multiple of 8 octets and the M flag
// of that fragment is 1, then that fragment must be discarded and an
// ICMP Parameter Problem, Code 0, message should be sent to the source
// of the fragment, pointing to the Payload Length field of the
// fragment packet.
if extHdr.More() && fragmentPayloadLen%header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit != 0 {
stats.MalformedPacketsReceived.Increment()
stats.MalformedFragmentsReceived.Increment()
_ = e.protocol.returnError(&icmpReasonParameterProblem{
code: header.ICMPv6ErroneousHeader,
pointer: header.IPv6PayloadLenOffset,
}, pkt)
return
}
// The packet is a fragment, let's try to reassemble it.
start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit
// As per RFC 2460 Section 4.5:
//
// If the length and offset of a fragment are such that the Payload
// Length of the packet reassembled from that fragment would exceed
// 65,535 octets, then that fragment must be discarded and an ICMP
// Parameter Problem, Code 0, message should be sent to the source of
// the fragment, pointing to the Fragment Offset field of the fragment
// packet.
if int(start)+fragmentPayloadLen > header.IPv6MaximumPayloadSize {
stats.MalformedPacketsReceived.Increment()
stats.MalformedFragmentsReceived.Increment()
_ = e.protocol.returnError(&icmpReasonParameterProblem{
code: header.ICMPv6ErroneousHeader,
pointer: fragmentFieldOffset,
}, pkt)
return
}
// Note that pkt doesn't have its transport header set after reassembly,
// and won't until DeliverNetworkPacket sets it.
resPkt, proto, ready, err := e.protocol.fragmentation.Process(
// IPv6 ignores the Protocol field since the ID only needs to be unique
// across source-destination pairs, as per RFC 8200 section 4.5.
fragmentation.FragmentID{
Source: srcAddr,
Destination: dstAddr,
ID: extHdr.ID(),
},
start,
start+uint16(fragmentPayloadLen)-1,
extHdr.More(),
uint8(rawPayload.Identifier),
pkt,
)
if err != nil {
stats.MalformedPacketsReceived.Increment()
stats.MalformedFragmentsReceived.Increment()
return
}
if ready {
pkt = resPkt
// We create a new iterator with the reassembled packet because we could
// have more extension headers in the reassembled payload, as per RFC
// 8200 section 4.5. We also use the NextHeader value from the first
// fragment.
data := pkt.Data()
dataVV := buffer.NewVectorisedView(data.Size(), data.Views())
it = header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(proto), dataVV)
}
case header.IPv6DestinationOptionsExtHdr:
optsIt := extHdr.Iter()
for {
opt, done, err := optsIt.Next()
if err != nil {
stats.MalformedPacketsReceived.Increment()
return
}
if done {
break
}
// We currently do not support any IPv6 Destination extension header
// options.
switch opt.UnknownAction() {
case header.IPv6OptionUnknownActionSkip:
case header.IPv6OptionUnknownActionDiscard:
return
case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
if header.IsV6MulticastAddress(dstAddr) {
return
}
fallthrough
case header.IPv6OptionUnknownActionDiscardSendICMP:
// This case satisfies a requirement of RFC 8200 section 4.2
// which states that an unknown option starting with bits [10] should:
//
// discard the packet and, regardless of whether or not the
// packet's Destination Address was a multicast address, send an
// ICMP Parameter Problem, Code 2, message to the packet's
// Source Address, pointing to the unrecognized Option Type.
//
_ = e.protocol.returnError(&icmpReasonParameterProblem{
code: header.ICMPv6UnknownOption,
pointer: it.ParseOffset() + optsIt.OptionOffset(),
respondToMulticast: true,
}, pkt)
return
default:
panic(fmt.Sprintf("unrecognized action for an unrecognized Destination extension header option = %d", opt))
}
}
case header.IPv6RawPayloadHeader:
// If the last header in the payload isn't a known IPv6 extension header,
// handle it as if it is transport layer data.
// For unfragmented packets, extHdr still contains the transport header.
// Get rid of it.
//
// For reassembled fragments, pkt.TransportHeader is unset, so this is a
// no-op and pkt.Data begins with the transport header.
extHdr.Buf.TrimFront(pkt.TransportHeader().View().Size())
pkt.Data().Replace(extHdr.Buf)
stats.PacketsDelivered.Increment()
if p := tcpip.TransportProtocolNumber(extHdr.Identifier); p == header.ICMPv6ProtocolNumber {
pkt.TransportProtocolNumber = p
e.handleICMP(pkt, hasFragmentHeader, routerAlert)
} else {
stats.PacketsDelivered.Increment()
switch res := e.dispatcher.DeliverTransportPacket(p, pkt); res {
case stack.TransportPacketHandled:
case stack.TransportPacketDestinationPortUnreachable:
// As per RFC 4443 section 3.1:
// A destination node SHOULD originate a Destination Unreachable
// message with Code 4 in response to a packet for which the
// transport protocol (e.g., UDP) has no listener, if that transport
// protocol has no alternative means to inform the sender.
_ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt)
case stack.TransportPacketProtocolUnreachable:
// As per RFC 8200 section 4. (page 7):
// Extension headers are numbered from IANA IP Protocol Numbers
// [IANA-PN], the same values used for IPv4 and IPv6. When
// processing a sequence of Next Header values in a packet, the
// first one that is not an extension header [IANA-EH] indicates
// that the next item in the packet is the corresponding upper-layer
// header.
// With more related information on page 8:
// If, as a result of processing a header, the destination node is
// required to proceed to the next header but the Next Header value
// in the current header is unrecognized by the node, it should
// discard the packet and send an ICMP Parameter Problem message to
// the source of the packet, with an ICMP Code value of 1
// ("unrecognized Next Header type encountered") and the ICMP
// Pointer field containing the offset of the unrecognized value
// within the original packet.
//
// Which when taken together indicate that an unknown protocol should
// be treated as an unrecognized next header value.
// The location of the Next Header field is in a different place in
// the initial IPv6 header than it is in the extension headers so
// treat it specially.
prevHdrIDOffset := uint32(header.IPv6NextHeaderOffset)
if previousHeaderStart != 0 {
prevHdrIDOffset = previousHeaderStart
}
_ = e.protocol.returnError(&icmpReasonParameterProblem{
code: header.ICMPv6UnknownHeader,
pointer: prevHdrIDOffset,
}, pkt)
default:
panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
}
}
default:
// Since the iterator returns IPv6RawPayloadHeader for unknown Extension
// Header IDs this should never happen unless we missed a supported type
// here.
panic(fmt.Sprintf("unrecognized type from it.Next() = %T", extHdr))
}
}
}
// Close cleans up resources associated with the endpoint.
func (e *endpoint) Close() {
e.mu.Lock()
e.disableLocked()
e.mu.addressableEndpointState.Cleanup()
e.mu.Unlock()
e.protocol.forgetEndpoint(e.nic.ID())
}
// NetworkProtocolNumber implements stack.NetworkEndpoint.NetworkProtocolNumber.
func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
return e.protocol.Number()
}
// AddAndAcquirePermanentAddress implements stack.AddressableEndpoint.
func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, tcpip.Error) {
// TODO(b/169350103): add checks here after making sure we no longer receive
// an empty address.
e.mu.Lock()
defer e.mu.Unlock()
return e.addAndAcquirePermanentAddressLocked(addr, peb, configType, deprecated)
}
// addAndAcquirePermanentAddressLocked is like AddAndAcquirePermanentAddress but
// with locking requirements.
//
// addAndAcquirePermanentAddressLocked also joins the passed address's
// solicited-node multicast group and start duplicate address detection.
//
// Precondition: e.mu must be write locked.
func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, tcpip.Error) {
addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, peb, configType, deprecated)
if err != nil {
return nil, err
}
if !header.IsV6UnicastAddress(addr.Address) {
return addressEndpoint, nil
}
addressEndpoint.SetKind(stack.PermanentTentative)
if e.Enabled() {
if err := e.mu.ndp.startDuplicateAddressDetection(addr.Address, addressEndpoint); err != nil {
return nil, err
}
}
snmc := header.SolicitedNodeAddr(addr.Address)
if err := e.joinGroupLocked(snmc); err != nil {
// joinGroupLocked only returns an error if the group address is not a valid
// IPv6 multicast address.
panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", snmc, err))
}
return addressEndpoint, nil
}
// RemovePermanentAddress implements stack.AddressableEndpoint.
func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
addressEndpoint := e.getAddressRLocked(addr)
if addressEndpoint == nil || !addressEndpoint.GetKind().IsPermanent() {
return &tcpip.ErrBadLocalAddress{}
}
return e.removePermanentEndpointLocked(addressEndpoint, true /* allowSLAACInvalidation */, &stack.DADAborted{})
}
// removePermanentEndpointLocked is like removePermanentAddressLocked except
// it works with a stack.AddressEndpoint.
//
// Precondition: e.mu must be write locked.
func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEndpoint, allowSLAACInvalidation bool, dadResult stack.DADResult) tcpip.Error {
addr := addressEndpoint.AddressWithPrefix()
// If we are removing an address generated via SLAAC, cleanup
// its SLAAC resources and notify the integrator.
switch addressEndpoint.ConfigType() {
case stack.AddressConfigSlaac:
e.mu.ndp.cleanupSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation)
case stack.AddressConfigSlaacTemp:
e.mu.ndp.cleanupTempSLAACAddrResourcesAndNotify(addr)
}
return e.removePermanentEndpointInnerLocked(addressEndpoint, dadResult)
}
// removePermanentEndpointInnerLocked is like removePermanentEndpointLocked
// except it does not cleanup SLAAC address state.
//
// Precondition: e.mu must be write locked.
func (e *endpoint) removePermanentEndpointInnerLocked(addressEndpoint stack.AddressEndpoint, dadResult stack.DADResult) tcpip.Error {
addr := addressEndpoint.AddressWithPrefix()
e.mu.ndp.stopDuplicateAddressDetection(addr.Address, dadResult)
if err := e.mu.addressableEndpointState.RemovePermanentEndpoint(addressEndpoint); err != nil {
return err
}
snmc := header.SolicitedNodeAddr(addr.Address)
err := e.leaveGroupLocked(snmc)
// The endpoint may have already left the multicast group.
if _, ok := err.(*tcpip.ErrBadLocalAddress); ok {
err = nil
}
return err
}
// hasPermanentAddressLocked returns true if the endpoint has a permanent
// address equal to the passed address.
//
// Precondition: e.mu must be read or write locked.
func (e *endpoint) hasPermanentAddressRLocked(addr tcpip.Address) bool {
addressEndpoint := e.getAddressRLocked(addr)
if addressEndpoint == nil {
return false
}
return addressEndpoint.GetKind().IsPermanent()
}
// getAddressRLocked returns the endpoint for the passed address.
//
// Precondition: e.mu must be read or write locked.
func (e *endpoint) getAddressRLocked(localAddr tcpip.Address) stack.AddressEndpoint {
return e.mu.addressableEndpointState.GetAddress(localAddr)
}
// MainAddress implements stack.AddressableEndpoint.
func (e *endpoint) MainAddress() tcpip.AddressWithPrefix {
e.mu.RLock()
defer e.mu.RUnlock()
return e.mu.addressableEndpointState.MainAddress()
}
// AcquireAssignedAddress implements stack.AddressableEndpoint.
func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
e.mu.Lock()
defer e.mu.Unlock()
return e.acquireAddressOrCreateTempLocked(localAddr, allowTemp, tempPEB)
}
// acquireAddressOrCreateTempLocked is like AcquireAssignedAddress but with
// locking requirements.
//
// Precondition: e.mu must be write locked.
func (e *endpoint) acquireAddressOrCreateTempLocked(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint {
return e.mu.addressableEndpointState.AcquireAssignedAddress(localAddr, allowTemp, tempPEB)
}
// AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint.
func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
e.mu.RLock()
defer e.mu.RUnlock()
return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, allowExpired)
}
// getLinkLocalAddressRLocked returns a link-local address from the primary list
// of addresses, if one is available.
//
// See stack.PrimaryEndpointBehavior for more details about the primary list.
//
// Precondition: e.mu must be read locked.
func (e *endpoint) getLinkLocalAddressRLocked() tcpip.Address {
var linkLocalAddr tcpip.Address
e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
if addressEndpoint.IsAssigned(false /* allowExpired */) {
if addr := addressEndpoint.AddressWithPrefix().Address; header.IsV6LinkLocalAddress(addr) {
linkLocalAddr = addr
return false
}
}
return true
})
return linkLocalAddr
}
// acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress
// but with locking requirements.
//
// Precondition: e.mu must be read locked.
func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
// addrCandidate is a candidate for Source Address Selection, as per
// RFC 6724 section 5.
type addrCandidate struct {
addressEndpoint stack.AddressEndpoint
addr tcpip.Address
scope header.IPv6AddressScope
label uint8
matchingPrefix uint8
}
if len(remoteAddr) == 0 {
return e.mu.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, allowExpired)
}
// Create a candidate set of available addresses we can potentially use as a
// source address.
var cs []addrCandidate
e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
// If r is not valid for outgoing connections, it is not a valid endpoint.
if !addressEndpoint.IsAssigned(allowExpired) {
return true
}
addr := addressEndpoint.AddressWithPrefix().Address
scope, err := header.ScopeForIPv6Address(addr)
if err != nil {
// Should never happen as we got r from the primary IPv6 endpoint list and
// ScopeForIPv6Address only returns an error if addr is not an IPv6
// address.
panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", addr, err))
}
cs = append(cs, addrCandidate{
addressEndpoint: addressEndpoint,
addr: addr,
scope: scope,
label: getLabel(addr),
matchingPrefix: remoteAddr.MatchingPrefix(addr),
})
return true
})
remoteScope, err := header.ScopeForIPv6Address(remoteAddr)
if err != nil {
// primaryIPv6Endpoint should never be called with an invalid IPv6 address.
panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", remoteAddr, err))
}
remoteLabel := getLabel(remoteAddr)
// Sort the addresses as per RFC 6724 section 5 rules 1-3.
//
// TODO(b/146021396): Implement rules 4, 5 of RFC 6724 section 5.
sort.Slice(cs, func(i, j int) bool {
sa := cs[i]
sb := cs[j]
// Prefer same address as per RFC 6724 section 5 rule 1.
if sa.addr == remoteAddr {
return true
}
if sb.addr == remoteAddr {
return false
}
// Prefer appropriate scope as per RFC 6724 section 5 rule 2.
if sa.scope < sb.scope {
return sa.scope >= remoteScope
} else if sb.scope < sa.scope {
return sb.scope < remoteScope
}
// Avoid deprecated addresses as per RFC 6724 section 5 rule 3.
if saDep, sbDep := sa.addressEndpoint.Deprecated(), sb.addressEndpoint.Deprecated(); saDep != sbDep {
// If sa is not deprecated, it is preferred over sb.
return sbDep
}
// Prefer matching label as per RFC 6724 section 5 rule 6.
if sa, sb := sa.label == remoteLabel, sb.label == remoteLabel; sa != sb {
if sa {
return true
}
if sb {
return false
}
}
// Prefer temporary addresses as per RFC 6724 section 5 rule 7.
if saTemp, sbTemp := sa.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp, sb.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp; saTemp != sbTemp {
return saTemp
}
// Use longest matching prefix as per RFC 6724 section 5 rule 8.
if sa.matchingPrefix > sb.matchingPrefix {
return true
}
if sb.matchingPrefix > sa.matchingPrefix {
return false
}
// sa and sb are equal, return the endpoint that is closest to the front of
// the primary endpoint list.
return i < j
})
// Return the most preferred address that can have its reference count
// incremented.
for _, c := range cs {
if c.addressEndpoint.IncRef() {
return c.addressEndpoint
}
}
return nil
}
// PrimaryAddresses implements stack.AddressableEndpoint.
func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix {
e.mu.RLock()
defer e.mu.RUnlock()
return e.mu.addressableEndpointState.PrimaryAddresses()
}
// PermanentAddresses implements stack.AddressableEndpoint.
func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix {
e.mu.RLock()
defer e.mu.RUnlock()
return e.mu.addressableEndpointState.PermanentAddresses()
}
// JoinGroup implements stack.GroupAddressableEndpoint.
func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
return e.joinGroupLocked(addr)
}
// joinGroupLocked is like JoinGroup but with locking requirements.
//
// Precondition: e.mu must be locked.
func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error {
if !header.IsV6MulticastAddress(addr) {
return &tcpip.ErrBadAddress{}
}
e.mu.mld.joinGroup(addr)
return nil
}
// LeaveGroup implements stack.GroupAddressableEndpoint.
func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
return e.leaveGroupLocked(addr)
}
// leaveGroupLocked is like LeaveGroup but with locking requirements.
//
// Precondition: e.mu must be locked.
func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error {
return e.mu.mld.leaveGroup(addr)
}
// IsInGroup implements stack.GroupAddressableEndpoint.
func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
e.mu.RLock()
defer e.mu.RUnlock()
return e.mu.mld.isInGroup(addr)
}
// Stats implements stack.NetworkEndpoint.
func (e *endpoint) Stats() stack.NetworkEndpointStats {
return &e.stats.localStats
}
var _ stack.ForwardingNetworkProtocol = (*protocol)(nil)
var _ stack.NetworkProtocol = (*protocol)(nil)
var _ fragmentation.TimeoutHandler = (*protocol)(nil)
type protocol struct {
stack *stack.Stack
options Options
mu struct {
sync.RWMutex
// eps is keyed by NICID to allow protocol methods to retrieve an endpoint
// when handling a packet, by looking at which NIC handled the packet.
eps map[tcpip.NICID]*endpoint
}
ids []uint32
hashIV uint32
// defaultTTL is the current default TTL for the protocol. Only the
// uint8 portion of it is meaningful.
//
// Must be accessed using atomic operations.
defaultTTL uint32
// forwarding is set to 1 when the protocol has forwarding enabled and 0
// when it is disabled.
//
// Must be accessed using atomic operations.
forwarding uint32
fragmentation *fragmentation.Fragmentation
}
// Number returns the ipv6 protocol number.
func (p *protocol) Number() tcpip.NetworkProtocolNumber {
return ProtocolNumber
}
// MinimumPacketSize returns the minimum valid ipv6 packet size.
func (p *protocol) MinimumPacketSize() int {
return header.IPv6MinimumSize
}
// DefaultPrefixLen returns the IPv6 default prefix length.
func (p *protocol) DefaultPrefixLen() int {
return header.IPv6AddressSize * 8
}
// ParseAddresses implements NetworkProtocol.ParseAddresses.
func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
h := header.IPv6(v)
return h.SourceAddress(), h.DestinationAddress()
}
// NewEndpoint creates a new ipv6 endpoint.
func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint {
e := &endpoint{
nic: nic,
dispatcher: dispatcher,
protocol: p,
}
// NDP options must be 8 octet aligned and the first 2 bytes are used for
// the type and length fields leaving 6 octets as the minimum size for a
// nonce option without padding.
const nonceSize = 6
// As per RFC 7527 section 4.1,
//
// If any probe is looped back within RetransTimer milliseconds after
// having sent DupAddrDetectTransmits NS(DAD) messages, the interface
// continues with another MAX_MULTICAST_SOLICIT number of NS(DAD)
// messages transmitted RetransTimer milliseconds apart.
//
// Value taken from RFC 4861 section 10.
const maxMulticastSolicit = 3
dadOptions := ip.DADOptions{
Clock: p.stack.Clock(),
SecureRNG: p.stack.SecureRNG(),
NonceSize: nonceSize,
ExtendDADTransmits: maxMulticastSolicit,
Protocol: &e.mu.ndp,
NICID: nic.ID(),
}
e.mu.Lock()
e.mu.addressableEndpointState.Init(e)
e.mu.ndp.init(e, dadOptions)
e.mu.mld.init(e)
e.dad.mu.Lock()
e.dad.mu.dad.Init(&e.dad.mu, p.options.DADConfigs, dadOptions)
e.dad.mu.Unlock()
e.mu.Unlock()
stackStats := p.stack.Stats()
tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem())
e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP)
e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V6)
p.mu.Lock()
defer p.mu.Unlock()
p.mu.eps[nic.ID()] = e
return e
}
func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint {
p.mu.RLock()
defer p.mu.RUnlock()
for _, e := range p.mu.eps {
if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil {
addressEndpoint.DecRef()
return e
}
}
return nil
}
func (p *protocol) forgetEndpoint(nicID tcpip.NICID) {
p.mu.Lock()
defer p.mu.Unlock()
delete(p.mu.eps, nicID)
}
// SetOption implements NetworkProtocol.SetOption.
func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error {
switch v := option.(type) {
case *tcpip.DefaultTTLOption:
p.SetDefaultTTL(uint8(*v))
return nil
default:
return &tcpip.ErrUnknownProtocolOption{}
}
}
// Option implements NetworkProtocol.Option.
func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error {
switch v := option.(type) {
case *tcpip.DefaultTTLOption:
*v = tcpip.DefaultTTLOption(p.DefaultTTL())
return nil
default:
return &tcpip.ErrUnknownProtocolOption{}
}
}
// SetDefaultTTL sets the default TTL for endpoints created with this protocol.
func (p *protocol) SetDefaultTTL(ttl uint8) {
atomic.StoreUint32(&p.defaultTTL, uint32(ttl))
}
// DefaultTTL returns the default TTL for endpoints created with this protocol.
func (p *protocol) DefaultTTL() uint8 {
return uint8(atomic.LoadUint32(&p.defaultTTL))
}
// Close implements stack.TransportProtocol.Close.
func (*protocol) Close() {}
// Wait implements stack.TransportProtocol.Wait.
func (*protocol) Wait() {}
// parseAndValidate parses the packet (including its transport layer header) and
// returns the parsed IP header.
//
// Returns true if the IP header was successfully parsed.
func (p *protocol) parseAndValidate(pkt *stack.PacketBuffer) (header.IPv6, bool) {
transProtoNum, hasTransportHdr, ok := p.Parse(pkt)
if !ok {
return nil, false
}
h := header.IPv6(pkt.NetworkHeader().View())
// Do not include the link header's size when calculating the size of the IP
// packet.
if !h.IsValid(pkt.Size() - pkt.LinkHeader().View().Size()) {
return nil, false
}
if hasTransportHdr {
switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err {
case stack.ParsedOK:
case stack.UnknownTransportProtocol, stack.TransportLayerParseError:
// The transport layer will handle unknown protocols and transport layer
// parsing errors.
default:
panic(fmt.Sprintf("unexpected error parsing transport header = %d", err))
}
}
return h, true
}
// Parse implements stack.NetworkProtocol.Parse.
func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) {
proto, _, fragOffset, fragMore, ok := parse.IPv6(pkt)
if !ok {
return 0, false, false
}
return proto, !fragMore && fragOffset == 0, true
}
// Forwarding implements stack.ForwardingNetworkProtocol.
func (p *protocol) Forwarding() bool {
return uint8(atomic.LoadUint32(&p.forwarding)) == 1
}
// setForwarding sets the forwarding status for the protocol.
//
// Returns true if the forwarding status was updated.
func (p *protocol) setForwarding(v bool) bool {
if v {
return atomic.SwapUint32(&p.forwarding, 1) == 0
}
return atomic.SwapUint32(&p.forwarding, 0) == 1
}
// SetForwarding implements stack.ForwardingNetworkProtocol.
func (p *protocol) SetForwarding(v bool) {
p.mu.Lock()
defer p.mu.Unlock()
if !p.setForwarding(v) {
return
}
for _, ep := range p.mu.eps {
ep.transitionForwarding(v)
}
}
// calculateNetworkMTU calculates the network-layer payload MTU based on the
// link-layer payload MTU and the length of every IPv6 header.
// Note that this is different than the Payload Length field of the IPv6 header,
// which includes the length of the extension headers.
func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, tcpip.Error) {
if linkMTU < header.IPv6MinimumMTU {
return 0, &tcpip.ErrInvalidEndpointState{}
}
// As per RFC 7112 section 5, we should discard packets if their IPv6 header
// is bigger than 1280 bytes (ie, the minimum link MTU) since we do not
// support PMTU discovery:
// Hosts that do not discover the Path MTU MUST limit the IPv6 Header Chain
// length to 1280 bytes. Limiting the IPv6 Header Chain length to 1280
// bytes ensures that the header chain length does not exceed the IPv6
// minimum MTU.
if networkHeadersLen > header.IPv6MinimumMTU {
return 0, &tcpip.ErrMalformedHeader{}
}
networkMTU := linkMTU - uint32(networkHeadersLen)
if networkMTU > maxPayloadSize {
networkMTU = maxPayloadSize
}
return networkMTU, nil
}
// Options holds options to configure a new protocol.
type Options struct {
// NDPConfigs is the default NDP configurations used by interfaces.
NDPConfigs NDPConfigurations
// AutoGenLinkLocal determines whether or not the stack attempts to
// auto-generate a link-local address for newly enabled non-loopback
// NICs.
//
// Note, setting this to true does not mean that a link-local address is
// assigned right away, or at all. If Duplicate Address Detection is enabled,
// an address is only assigned if it successfully resolves. If it fails, no
// further attempts are made to auto-generate a link-local adddress.
//
// The generated link-local address follows RFC 4291 Appendix A guidelines.
AutoGenLinkLocal bool
// NDPDisp is the NDP event dispatcher that an integrator can provide to
// receive NDP related events.
NDPDisp NDPDispatcher
// OpaqueIIDOpts hold the options for generating opaque interface
// identifiers (IIDs) as outlined by RFC 7217.
OpaqueIIDOpts OpaqueInterfaceIdentifierOptions
// TempIIDSeed is used to seed the initial temporary interface identifier
// history value used to generate IIDs for temporary SLAAC addresses.
//
// Temporary SLAAC adresses are short-lived addresses which are unpredictable
// and random from the perspective of other nodes on the network. It is
// recommended that the seed be a random byte buffer of at least
// header.IIDSize bytes to make sure that temporary SLAAC addresses are
// sufficiently random. It should follow minimum randomness requirements for
// security as outlined by RFC 4086.
//
// Note: using a nil value, the same seed across netstack program runs, or a
// seed that is too small would reduce randomness and increase predictability,
// defeating the purpose of temporary SLAAC addresses.
TempIIDSeed []byte
// MLD holds options for MLD.
MLD MLDOptions
// DADConfigs holds the default DAD configurations used by IPv6 endpoints.
DADConfigs stack.DADConfigurations
// AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e.
// martian loopback packets) should be accepted.
AllowExternalLoopbackTraffic bool
}
// NewProtocolWithOptions returns an IPv6 network protocol.
func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory {
opts.NDPConfigs.validate()
ids := hash.RandN32(buckets)
hashIV := hash.RandN32(1)[0]
return func(s *stack.Stack) stack.NetworkProtocol {
p := &protocol{
stack: s,
options: opts,
ids: ids,
hashIV: hashIV,
}
p.fragmentation = fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p)
p.mu.eps = make(map[tcpip.NICID]*endpoint)
p.SetDefaultTTL(DefaultTTL)
return p
}
}
// NewProtocol is equivalent to NewProtocolWithOptions with an empty Options.
func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
return NewProtocolWithOptions(Options{})(s)
}
func calculateFragmentReserve(pkt *stack.PacketBuffer) int {
return pkt.AvailableHeaderBytes() + pkt.NetworkHeader().View().Size() + header.IPv6FragmentHeaderSize
}
// hashRoute calculates a hash value for the given route. It uses the source &
// destination address and 32-bit number to generate the hash.
func hashRoute(r *stack.Route, hashIV uint32) uint32 {
// The FNV-1a was chosen because it is a fast hashing algorithm, and
// cryptographic properties are not needed here.
h := fnv.New32a()
if _, err := h.Write([]byte(r.LocalAddress)); err != nil {
panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err))
}
if _, err := h.Write([]byte(r.RemoteAddress)); err != nil {
panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err))
}
s := make([]byte, 4)
binary.LittleEndian.PutUint32(s, hashIV)
if _, err := h.Write(s); err != nil {
panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected ever to return an error", err))
}
return h.Sum32()
}
func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeaders header.IPv6, transportProto tcpip.TransportProtocolNumber, id uint32) (*stack.PacketBuffer, bool) {
fragPkt, offset, copied, more := pf.BuildNextFragment()
fragPkt.NetworkProtocolNumber = ProtocolNumber
originalIPHeadersLength := len(originalIPHeaders)
s := header.IPv6ExtHdrSerializer{&header.IPv6SerializableFragmentExtHdr{
FragmentOffset: uint16(offset / header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit),
M: more,
Identification: id,
}}
fragmentIPHeadersLength := originalIPHeadersLength + s.Length()
fragmentIPHeaders := header.IPv6(fragPkt.NetworkHeader().Push(fragmentIPHeadersLength))
// Copy the IPv6 header and any extension headers already populated.
if copied := copy(fragmentIPHeaders, originalIPHeaders); copied != originalIPHeadersLength {
panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got %d, want %d", copied, originalIPHeadersLength))
}
nextHeader, _ := s.Serialize(transportProto, fragmentIPHeaders[originalIPHeadersLength:])
fragmentIPHeaders.SetNextHeader(nextHeader)
fragmentIPHeaders.SetPayloadLength(uint16(copied + fragmentIPHeadersLength - header.IPv6MinimumSize))
return fragPkt, more
}