Detect looped-back NDP DAD messages
...as per RFC 7527.
If a looped-back DAD message is received, do not fail DAD since our own
DAD message does not indicate that a neighbor has the address assigned.
Note, this is not a cherry-pick of d81fac9 which makes the same fix on
the master branch as the `master` and `releases/f1` branches have
diverged.
Bug: 71187
Change-Id: I75b56359f5d32bd34bffb0d158a2550b3cd2a8a5
diff --git a/pkg/tcpip/header/ndp_options.go b/pkg/tcpip/header/ndp_options.go
index 554242f..01144cb 100644
--- a/pkg/tcpip/header/ndp_options.go
+++ b/pkg/tcpip/header/ndp_options.go
@@ -42,6 +42,10 @@
// option, as per RFC 4861 section 4.6.2.
NDPPrefixInformationType NDPOptionIdentifier = 3
+ // NDPNonceOptionType is the type of the Nonce option, as per
+ // RFC 3971 section 5.3.2.
+ NDPNonceOptionType NDPOptionIdentifier = 14
+
// NDPRecursiveDNSServerOptionType is the type of the Recursive DNS
// Server option, as per RFC 8106 section 5.1.
NDPRecursiveDNSServerOptionType NDPOptionIdentifier = 25
@@ -231,6 +235,9 @@
case NDPTargetLinkLayerAddressOptionType:
return NDPTargetLinkLayerAddressOption(body), false, nil
+ case NDPNonceOptionType:
+ return NDPNonceOption(body), false, nil
+
case NDPPrefixInformationType:
// Make sure the length of a Prefix Information option
// body is ndpPrefixInformationLength, as per RFC 4861
@@ -416,6 +423,37 @@
return l
}
+// NDPNonceOption is the NDP Nonce Option as defined by RFC 3971 section 5.3.2.
+//
+// It is the first X bytes following the NDP option's Type and Length field
+// where X is the value in Length multiplied by lengthByteUnits - 2 bytes.
+type NDPNonceOption []byte
+
+// Type implements NDPOption.
+func (o NDPNonceOption) Type() NDPOptionIdentifier {
+ return NDPNonceOptionType
+}
+
+// Length implements NDPOption.
+func (o NDPNonceOption) Length() int {
+ return len(o)
+}
+
+// serializeInto implements NDPOption.
+func (o NDPNonceOption) serializeInto(b []byte) int {
+ return copy(b, o)
+}
+
+// String implements fmt.Stringer.
+func (o NDPNonceOption) String() string {
+ return fmt.Sprintf("%T(%x)", o, []byte(o))
+}
+
+// Nonce returns the nonce value this option holds.
+func (o NDPNonceOption) Nonce() []byte {
+ return []byte(o)
+}
+
// NDPSourceLinkLayerAddressOption is the NDP Source Link Layer Option
// as defined by RFC 4861 section 4.6.1.
//
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 6ee1627..91f23fe 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -214,6 +214,18 @@
return
}
+ var it header.NDPOptionIterator
+ {
+ var err error
+ it, err = ns.Options().Iter(false /* check */)
+ if err != nil {
+ // Options are not valid as per the wire format, silently drop the
+ // packet.
+ received.Invalid.Increment()
+ return
+ }
+ }
+
if e.hasTentativeAddr(targetAddr) {
// If the target address is tentative and the source of the packet is a
// unicast (specified) address, then the source of the packet is
@@ -227,6 +239,22 @@
// stack know so it can handle such a scenario and do nothing further with
// the NS.
if srcAddr == header.IPv6Any {
+ var nonce []byte
+ for {
+ opt, done, err := it.Next()
+ if err != nil {
+ received.Invalid.Increment()
+ return
+ }
+ if done {
+ break
+ }
+ if n, ok := opt.(header.NDPNonceOption); ok {
+ nonce = n.Nonce()
+ break
+ }
+ }
+
// We would get an error if the address no longer exists or the address
// is no longer tentative (DAD resolved between the call to
// hasTentativeAddr and this point). Both of these are valid scenarios:
@@ -238,7 +266,7 @@
//
// TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate
// address is detected for an assigned address.
- if err := e.dupTentativeAddrDetected(targetAddr); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
+ if err := e.dupTentativeAddrDetected(targetAddr, nonce); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err))
}
}
@@ -257,21 +285,10 @@
return
}
- var sourceLinkAddr tcpip.LinkAddress
- {
- it, err := ns.Options().Iter(false /* check */)
- if err != nil {
- // Options are not valid as per the wire format, silently drop the
- // packet.
- received.Invalid.Increment()
- return
- }
-
- sourceLinkAddr, ok = getSourceLinkAddr(it)
- if !ok {
- received.Invalid.Increment()
- return
- }
+ sourceLinkAddr, ok := getSourceLinkAddr(it)
+ if !ok {
+ received.Invalid.Increment()
+ return
}
// As per RFC 4861 section 4.3, the Source Link-Layer Address Option MUST
@@ -398,6 +415,10 @@
na := header.NDPNeighborAdvert(payload.ToView())
targetAddr := na.TargetAddress()
if e.hasTentativeAddr(targetAddr) {
+ // We only send a nonce value in DAD messages to check for loopedback
+ // messages so we use the empty nonce value here.
+ var nonce []byte
+
// We just got an NA from a node that owns an address we are performing
// DAD on, implying the address is not unique. In this case we let the
// stack know so it can handle such a scenario and do nothing furthur with
@@ -414,7 +435,7 @@
//
// TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate
// address is detected for an assigned address.
- if err := e.dupTentativeAddrDetected(targetAddr); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
+ if err := e.dupTentativeAddrDetected(targetAddr, nonce); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err))
}
return
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 0883956..f936b9b 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -305,7 +305,7 @@
// dupTentativeAddrDetected removes the tentative address if it exists. If the
// address was generated via SLAAC, an attempt is made to generate a new
// address.
-func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address) *tcpip.Error {
+func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address, nonce []byte) *tcpip.Error {
e.mu.Lock()
defer e.mu.Unlock()
@@ -318,27 +318,44 @@
return tcpip.ErrInvalidEndpointState
}
- // If the address is a SLAAC address, do not invalidate its SLAAC prefix as an
- // attempt will be made to generate a new address for it.
- if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */); err != nil {
- return err
- }
+ switch result := e.mu.ndp.extendIfNonceEqual(addr, nonce); result {
+ case newlyExtended:
+ // The nonce we got back was the same we sent so we know the message
+ // indicating a duplicate address was likely ours so do not consider
+ // the address duplicate here.
+ return nil
+ case alreadyExtended:
+ // See Extended.
+ //
+ // Our DAD message was looped back already.
+ return nil
+ case noDADStateFound:
+ panic(fmt.Sprintf("expected DAD state for tentative address %s", addr))
+ case nonceNotEqual:
+ // If the address is a SLAAC address, do not invalidate its SLAAC prefix as an
+ // attempt will be made to generate a new address for it.
+ if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */); err != nil {
+ return err
+ }
- prefix := addressEndpoint.Subnet()
+ prefix := addressEndpoint.Subnet()
- switch t := addressEndpoint.ConfigType(); t {
- case stack.AddressConfigStatic:
- case stack.AddressConfigSlaac:
- e.mu.ndp.regenerateSLAACAddr(prefix)
- case stack.AddressConfigSlaacTemp:
- // Do not reset the generation attempts counter for the prefix as the
- // temporary address is being regenerated in response to a DAD conflict.
- e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
+ switch t := addressEndpoint.ConfigType(); t {
+ case stack.AddressConfigStatic:
+ case stack.AddressConfigSlaac:
+ e.mu.ndp.regenerateSLAACAddr(prefix)
+ case stack.AddressConfigSlaacTemp:
+ // Do not reset the generation attempts counter for the prefix as the
+ // temporary address is being regenerated in response to a DAD conflict.
+ e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
+ default:
+ panic(fmt.Sprintf("unrecognized address config type = %d", t))
+ }
+
+ return nil
default:
- panic(fmt.Sprintf("unrecognized address config type = %d", t))
+ panic(fmt.Sprintf("unhandled result = %d", result))
}
-
- return nil
}
// transitionForwarding transitions the endpoint's forwarding status to
diff --git a/pkg/tcpip/network/ipv6/ndp.go b/pkg/tcpip/network/ipv6/ndp.go
index 6a64cc9..580e310 100644
--- a/pkg/tcpip/network/ipv6/ndp.go
+++ b/pkg/tcpip/network/ipv6/ndp.go
@@ -15,7 +15,9 @@
package ipv6
import (
+ "bytes"
"fmt"
+ "io"
"log"
"math/rand"
"time"
@@ -498,17 +500,28 @@
temporaryAddressDesyncFactor time.Duration
}
+type extendRequest int
+
+const (
+ notRequested extendRequest = iota
+ requested
+ extended
+)
+
// dadState holds the Duplicate Address Detection timer and channel to signal
// to the DAD goroutine that DAD should stop.
type dadState struct {
// The DAD timer to send the next NS message, or resolve the address.
- job *tcpip.Job
+ timer tcpip.Timer
// Used to let the DAD timer know that it has been stopped.
//
// Must only be read from or written to while protected by the lock of
// the IPv6 endpoint this dadState is associated with.
done *bool
+
+ nonce []byte
+ extendRequest extendRequest
}
// defaultRouterState holds data associated with a default router discovered by
@@ -608,6 +621,21 @@
maxGenerationAttempts uint8
}
+// NDP options must be 8 octet aligned and the first 2 bytes are used for
+// the type and length fields leaving 6 octets as the minimum size for a
+// nonce option without padding.
+const nonceSize = 6
+
+// As per RFC 7527 section 4.1,
+//
+// If any probe is looped back within RetransTimer milliseconds after
+// having sent DupAddrDetectTransmits NS(DAD) messages, the interface
+// continues with another MAX_MULTICAST_SOLICIT number of NS(DAD)
+// messages transmitted RetransTimer milliseconds apart.
+//
+// Value taken from RFC 4861 section 10.
+const maxMulticastSolicit = 3
+
// startDuplicateAddressDetection performs Duplicate Address Detection.
//
// This function must only be called by IPv6 addresses that are currently
@@ -651,8 +679,82 @@
return nil
}
+ // Protected by ndp.ep.mu.
+ done := false
+
state := dadState{
- job: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
+ done: &done,
+
+ // We initially start a timer to fire immediately because some of the DAD
+ // work cannot be done while holding the IPv6 endpoint's lock. This is
+ // effectively the same as starting a goroutine but we use a timer that
+ // fires immediately so we can reset it for the next DAD iteration.
+ timer: ndp.ep.protocol.stack.Clock().AfterFunc(0, func() {
+ dadDone := remaining == 0
+
+ nonce, earlyReturn := func() ([]byte, bool) {
+ ndp.ep.mu.Lock()
+ defer ndp.ep.mu.Unlock()
+
+ if done {
+ return nil, true
+ }
+
+ state, ok := ndp.dad[addr]
+ if !ok {
+ panic(fmt.Sprintf("ndpdad: DAD timer fired but missing state for %s on NIC(%d)", addr, ndp.ep.nic.ID()))
+ }
+
+ if addressEndpoint.GetKind() != stack.PermanentTentative {
+ // The endpoint should still be marked as tentative since we are still
+ // performing DAD on it.
+ panic(fmt.Sprintf("ndpdad: addr %s is no longer tentative on NIC(%d)", addr, ndp.ep.nic.ID()))
+ }
+
+ // As per RFC 7527 section 4
+ //
+ // If any probe is looped back within RetransTimer milliseconds
+ // after having sent DupAddrDetectTransmits NS(DAD) messages, the
+ // interface continues with another MAX_MULTICAST_SOLICIT number of
+ // NS(DAD) messages transmitted RetransTimer milliseconds apart.
+ if dadDone && state.extendRequest == requested {
+ dadDone = false
+ remaining = maxMulticastSolicit
+ state.extendRequest = extended
+ }
+
+ if !dadDone {
+ if state.nonce == nil {
+ state.nonce = make([]byte, nonceSize)
+ }
+
+ if n, err := io.ReadFull(ndp.ep.protocol.stack.SecureRNG(), state.nonce); err != nil {
+ panic(fmt.Sprintf("SecureRNG.Read(...): %s", err))
+ } else if n != len(state.nonce) {
+ panic(fmt.Sprintf("expected to read %d bytes from secure RNG, only read %d bytes", len(state.nonce), n))
+ }
+ }
+
+ ndp.dad[addr] = state
+ return state.nonce, false
+ }()
+ if earlyReturn {
+ return
+ }
+
+ var err *tcpip.Error
+ if !dadDone {
+ err = ndp.sendDADPacket(addr, addressEndpoint, nonce)
+ }
+
+ ndp.ep.mu.Lock()
+ defer ndp.ep.mu.Unlock()
+
+ if done {
+ // DAD was stopped.
+ return
+ }
+
state, ok := ndp.dad[addr]
if !ok {
panic(fmt.Sprintf("ndpdad: DAD timer fired but missing state for %s on NIC(%d)", addr, ndp.ep.nic.ID()))
@@ -664,13 +766,6 @@
panic(fmt.Sprintf("ndpdad: addr %s is no longer tentative on NIC(%d)", addr, ndp.ep.nic.ID()))
}
- dadDone := remaining == 0
-
- var err *tcpip.Error
- if !dadDone {
- err = ndp.sendDADPacket(addr, addressEndpoint)
- }
-
if dadDone {
// DAD has resolved.
addressEndpoint.SetKind(stack.Permanent)
@@ -678,7 +773,7 @@
// DAD is not done and we had no errors when sending the last NDP NS,
// schedule the next DAD timer.
remaining--
- state.job.Schedule(ndp.configs.RetransmitTimer)
+ state.timer.Reset(ndp.configs.RetransmitTimer)
return
}
@@ -703,11 +798,6 @@
}),
}
- // We initially start a timer to fire immediately because some of the DAD work
- // cannot be done while holding the IPv6 endpoint's lock. This is effectively
- // the same as starting a goroutine but we use a timer that fires immediately
- // so we can reset it for the next DAD iteration.
- state.job.Schedule(0)
ndp.dad[addr] = state
return nil
@@ -717,13 +807,18 @@
// addr.
//
// addr must be a tentative IPv6 address on ndp's IPv6 endpoint.
-func (ndp *ndpState) sendDADPacket(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) *tcpip.Error {
+func (ndp *ndpState) sendDADPacket(addr tcpip.Address, addressEndpoint stack.AddressEndpoint, nonce []byte) *tcpip.Error {
snmc := header.SolicitedNodeAddr(addr)
- icmp := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize))
+ ser := header.NDPOptionsSerializer{
+ header.NDPNonceOption(nonce),
+ }
+
+ icmp := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize + ser.Length()))
icmp.SetType(header.ICMPv6NeighborSolicit)
ns := header.NDPNeighborSolicit(icmp.MessageBody())
ns.SetTargetAddress(addr)
+ ns.Options().Serialize(ser)
icmp.SetChecksum(header.ICMPv6Checksum(icmp, header.IPv6Any, snmc, buffer.VectorisedView{}))
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -762,7 +857,8 @@
return
}
- dad.job.Cancel()
+ dad.timer.Stop()
+ *dad.done = true
delete(ndp.dad, addr)
// Let the integrator know DAD did not resolve.
@@ -771,6 +867,61 @@
}
}
+// extendIfNonceEqualDisposition enumerates the possible results from
+// extendIfNonceEqual.
+type extendIfNonceEqualDisposition int
+
+const (
+ // newlyExtended indicates that the DAD process was extended.
+ newlyExtended extendIfNonceEqualDisposition = iota
+
+ // alreadyExtended indicates that the DAD process was already extended.
+ alreadyExtended
+
+ // noDADStateFound indicates that DAD state was not found for the address.
+ noDADStateFound
+
+ // nonceNotEqual indicates that the nonce value passed and the nonce in the
+ // last send DAD message are not equal.
+ nonceNotEqual
+)
+
+// extendIfNonceEqual extends the DAD process if the provided nonce is the
+// same as the nonce sent in the last DAD message.
+//
+// The IPv6 endpoint that ndp belongs to MUST be locked.
+func (ndp *ndpState) extendIfNonceEqual(addr tcpip.Address, nonce []byte) extendIfNonceEqualDisposition {
+ s, ok := ndp.dad[addr]
+ if !ok {
+ return noDADStateFound
+ }
+
+ if s.extendRequest != notRequested {
+ return alreadyExtended
+ }
+
+ // As per RFC 7527 section 4
+ //
+ // If any probe is looped back within RetransTimer milliseconds after having
+ // sent DupAddrDetectTransmits NS(DAD) messages, the interface continues
+ // with another MAX_MULTICAST_SOLICIT number of NS(DAD) messages transmitted
+ // RetransTimer milliseconds apart.
+ //
+ // If a DAD message has already been sent and the nonce value we observed is
+ // the same as the nonce value we last sent, then we assume our probe was
+ // looped back and request an extension to the DAD process.
+ //
+ // Note, the first DAD message is sent asynchronously so we need to make sure
+ // that we sent a DAD message by checking if we have a nonce value set.
+ if s.nonce != nil && bytes.Equal(s.nonce, nonce) {
+ s.extendRequest = requested
+ ndp.dad[addr] = s
+ return newlyExtended
+ }
+
+ return nonceNotEqual
+}
+
// handleRA handles a Router Advertisement message that arrived on the NIC
// this ndp is for. Does nothing if the NIC is configured to not handle RAs.
//
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 6d761b1..8efc8a1 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -23,6 +23,7 @@
"bytes"
"encoding/binary"
"fmt"
+ "io"
mathrand "math/rand"
"sync/atomic"
"time"
@@ -448,6 +449,9 @@
// used when a random number is required.
randomGenerator *mathrand.Rand
+ // secureRNG is a cryptographically secure random number generator.
+ secureRNG io.Reader
+
// sendBufferSize holds the min/default/max send buffer sizes for
// endpoints other than TCP.
sendBufferSize SendBufferSizeOption
@@ -526,6 +530,9 @@
// IPTables are the initial iptables rules. If nil, iptables will allow
// all traffic.
IPTables *IPTables
+
+ // SecureRNG is a cryptographically secure random number generator.
+ SecureRNG io.Reader
}
// TransportEndpointInfo holds useful information about a transport endpoint
@@ -634,6 +641,10 @@
opts.NUDConfigs.resetInvalidFields()
+ if opts.SecureRNG == nil {
+ opts.SecureRNG = rand.Reader
+ }
+
s := &Stack{
transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
@@ -653,6 +664,7 @@
uniqueIDGenerator: opts.UniqueID,
nudDisp: opts.NUDDisp,
randomGenerator: mathrand.New(randSrc),
+ secureRNG: opts.SecureRNG,
sendBufferSize: SendBufferSizeOption{
Min: MinBufferSize,
Default: DefaultBufferSize,
@@ -2019,6 +2031,12 @@
return s.randomGenerator
}
+// SecureRNG returns the stack's cryptographically secure random number
+// generator.
+func (s *Stack) SecureRNG() io.Reader {
+ return s.secureRNG
+}
+
func generateRandUint32() uint32 {
b := make([]byte, 4)
if _, err := rand.Read(b); err != nil {