Detect looped-back NDP DAD messages

...as per RFC 7527.

If a looped-back DAD message is received, do not fail DAD since our own
DAD message does not indicate that a neighbor has the address assigned.

Note, this is not a cherry-pick of d81fac9 which makes the same fix on
the master branch as the `master` and `releases/f1` branches have
diverged.

Bug: 71187
Change-Id: I75b56359f5d32bd34bffb0d158a2550b3cd2a8a5
diff --git a/pkg/tcpip/header/ndp_options.go b/pkg/tcpip/header/ndp_options.go
index 554242f..01144cb 100644
--- a/pkg/tcpip/header/ndp_options.go
+++ b/pkg/tcpip/header/ndp_options.go
@@ -42,6 +42,10 @@
 	// option, as per RFC 4861 section 4.6.2.
 	NDPPrefixInformationType NDPOptionIdentifier = 3
 
+	// NDPNonceOptionType is the type of the Nonce option, as per
+	// RFC 3971 section 5.3.2.
+	NDPNonceOptionType NDPOptionIdentifier = 14
+
 	// NDPRecursiveDNSServerOptionType is the type of the Recursive DNS
 	// Server option, as per RFC 8106 section 5.1.
 	NDPRecursiveDNSServerOptionType NDPOptionIdentifier = 25
@@ -231,6 +235,9 @@
 		case NDPTargetLinkLayerAddressOptionType:
 			return NDPTargetLinkLayerAddressOption(body), false, nil
 
+		case NDPNonceOptionType:
+			return NDPNonceOption(body), false, nil
+
 		case NDPPrefixInformationType:
 			// Make sure the length of a Prefix Information option
 			// body is ndpPrefixInformationLength, as per RFC 4861
@@ -416,6 +423,37 @@
 	return l
 }
 
+// NDPNonceOption is the NDP Nonce Option as defined by RFC 3971 section 5.3.2.
+//
+// It is the first X bytes following the NDP option's Type and Length field
+// where X is the value in Length multiplied by lengthByteUnits - 2 bytes.
+type NDPNonceOption []byte
+
+// Type implements NDPOption.
+func (o NDPNonceOption) Type() NDPOptionIdentifier {
+	return NDPNonceOptionType
+}
+
+// Length implements NDPOption.
+func (o NDPNonceOption) Length() int {
+	return len(o)
+}
+
+// serializeInto implements NDPOption.
+func (o NDPNonceOption) serializeInto(b []byte) int {
+	return copy(b, o)
+}
+
+// String implements fmt.Stringer.
+func (o NDPNonceOption) String() string {
+	return fmt.Sprintf("%T(%x)", o, []byte(o))
+}
+
+// Nonce returns the nonce value this option holds.
+func (o NDPNonceOption) Nonce() []byte {
+	return []byte(o)
+}
+
 // NDPSourceLinkLayerAddressOption is the NDP Source Link Layer Option
 // as defined by RFC 4861 section 4.6.1.
 //
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 6ee1627..91f23fe 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -214,6 +214,18 @@
 			return
 		}
 
+		var it header.NDPOptionIterator
+		{
+			var err error
+			it, err = ns.Options().Iter(false /* check */)
+			if err != nil {
+				// Options are not valid as per the wire format, silently drop the
+				// packet.
+				received.Invalid.Increment()
+				return
+			}
+		}
+
 		if e.hasTentativeAddr(targetAddr) {
 			// If the target address is tentative and the source of the packet is a
 			// unicast (specified) address, then the source of the packet is
@@ -227,6 +239,22 @@
 			// stack know so it can handle such a scenario and do nothing further with
 			// the NS.
 			if srcAddr == header.IPv6Any {
+				var nonce []byte
+				for {
+					opt, done, err := it.Next()
+					if err != nil {
+						received.Invalid.Increment()
+						return
+					}
+					if done {
+						break
+					}
+					if n, ok := opt.(header.NDPNonceOption); ok {
+						nonce = n.Nonce()
+						break
+					}
+				}
+
 				// We would get an error if the address no longer exists or the address
 				// is no longer tentative (DAD resolved between the call to
 				// hasTentativeAddr and this point). Both of these are valid scenarios:
@@ -238,7 +266,7 @@
 				//
 				// TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate
 				// address is detected for an assigned address.
-				if err := e.dupTentativeAddrDetected(targetAddr); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
+				if err := e.dupTentativeAddrDetected(targetAddr, nonce); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
 					panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err))
 				}
 			}
@@ -257,21 +285,10 @@
 			return
 		}
 
-		var sourceLinkAddr tcpip.LinkAddress
-		{
-			it, err := ns.Options().Iter(false /* check */)
-			if err != nil {
-				// Options are not valid as per the wire format, silently drop the
-				// packet.
-				received.Invalid.Increment()
-				return
-			}
-
-			sourceLinkAddr, ok = getSourceLinkAddr(it)
-			if !ok {
-				received.Invalid.Increment()
-				return
-			}
+		sourceLinkAddr, ok := getSourceLinkAddr(it)
+		if !ok {
+			received.Invalid.Increment()
+			return
 		}
 
 		// As per RFC 4861 section 4.3, the Source Link-Layer Address Option MUST
@@ -398,6 +415,10 @@
 		na := header.NDPNeighborAdvert(payload.ToView())
 		targetAddr := na.TargetAddress()
 		if e.hasTentativeAddr(targetAddr) {
+			// We only send a nonce value in DAD messages to check for loopedback
+			// messages so we use the empty nonce value here.
+			var nonce []byte
+
 			// We just got an NA from a node that owns an address we are performing
 			// DAD on, implying the address is not unique. In this case we let the
 			// stack know so it can handle such a scenario and do nothing furthur with
@@ -414,7 +435,7 @@
 			//
 			// TODO(gvisor.dev/issue/4046): Handle the scenario when a duplicate
 			// address is detected for an assigned address.
-			if err := e.dupTentativeAddrDetected(targetAddr); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
+			if err := e.dupTentativeAddrDetected(targetAddr, nonce); err != nil && err != tcpip.ErrBadAddress && err != tcpip.ErrInvalidEndpointState {
 				panic(fmt.Sprintf("unexpected error handling duplicate tentative address: %s", err))
 			}
 			return
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 0883956..f936b9b 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -305,7 +305,7 @@
 // dupTentativeAddrDetected removes the tentative address if it exists. If the
 // address was generated via SLAAC, an attempt is made to generate a new
 // address.
-func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address) *tcpip.Error {
+func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address, nonce []byte) *tcpip.Error {
 	e.mu.Lock()
 	defer e.mu.Unlock()
 
@@ -318,27 +318,44 @@
 		return tcpip.ErrInvalidEndpointState
 	}
 
-	// If the address is a SLAAC address, do not invalidate its SLAAC prefix as an
-	// attempt will be made to generate a new address for it.
-	if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */); err != nil {
-		return err
-	}
+	switch result := e.mu.ndp.extendIfNonceEqual(addr, nonce); result {
+	case newlyExtended:
+		// The nonce we got back was the same we sent so we know the message
+		// indicating a duplicate address was likely ours so do not consider
+		// the address duplicate here.
+		return nil
+	case alreadyExtended:
+		// See Extended.
+		//
+		// Our DAD message was looped back already.
+		return nil
+	case noDADStateFound:
+		panic(fmt.Sprintf("expected DAD state for tentative address %s", addr))
+	case nonceNotEqual:
+		// If the address is a SLAAC address, do not invalidate its SLAAC prefix as an
+		// attempt will be made to generate a new address for it.
+		if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */); err != nil {
+			return err
+		}
 
-	prefix := addressEndpoint.Subnet()
+		prefix := addressEndpoint.Subnet()
 
-	switch t := addressEndpoint.ConfigType(); t {
-	case stack.AddressConfigStatic:
-	case stack.AddressConfigSlaac:
-		e.mu.ndp.regenerateSLAACAddr(prefix)
-	case stack.AddressConfigSlaacTemp:
-		// Do not reset the generation attempts counter for the prefix as the
-		// temporary address is being regenerated in response to a DAD conflict.
-		e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
+		switch t := addressEndpoint.ConfigType(); t {
+		case stack.AddressConfigStatic:
+		case stack.AddressConfigSlaac:
+			e.mu.ndp.regenerateSLAACAddr(prefix)
+		case stack.AddressConfigSlaacTemp:
+			// Do not reset the generation attempts counter for the prefix as the
+			// temporary address is being regenerated in response to a DAD conflict.
+			e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
+		default:
+			panic(fmt.Sprintf("unrecognized address config type = %d", t))
+		}
+
+		return nil
 	default:
-		panic(fmt.Sprintf("unrecognized address config type = %d", t))
+		panic(fmt.Sprintf("unhandled result = %d", result))
 	}
-
-	return nil
 }
 
 // transitionForwarding transitions the endpoint's forwarding status to
diff --git a/pkg/tcpip/network/ipv6/ndp.go b/pkg/tcpip/network/ipv6/ndp.go
index 6a64cc9..580e310 100644
--- a/pkg/tcpip/network/ipv6/ndp.go
+++ b/pkg/tcpip/network/ipv6/ndp.go
@@ -15,7 +15,9 @@
 package ipv6
 
 import (
+	"bytes"
 	"fmt"
+	"io"
 	"log"
 	"math/rand"
 	"time"
@@ -498,17 +500,28 @@
 	temporaryAddressDesyncFactor time.Duration
 }
 
+type extendRequest int
+
+const (
+	notRequested extendRequest = iota
+	requested
+	extended
+)
+
 // dadState holds the Duplicate Address Detection timer and channel to signal
 // to the DAD goroutine that DAD should stop.
 type dadState struct {
 	// The DAD timer to send the next NS message, or resolve the address.
-	job *tcpip.Job
+	timer tcpip.Timer
 
 	// Used to let the DAD timer know that it has been stopped.
 	//
 	// Must only be read from or written to while protected by the lock of
 	// the IPv6 endpoint this dadState is associated with.
 	done *bool
+
+	nonce         []byte
+	extendRequest extendRequest
 }
 
 // defaultRouterState holds data associated with a default router discovered by
@@ -608,6 +621,21 @@
 	maxGenerationAttempts uint8
 }
 
+// NDP options must be 8 octet aligned and the first 2 bytes are used for
+// the type and length fields leaving 6 octets as the minimum size for a
+// nonce option without padding.
+const nonceSize = 6
+
+// As per RFC 7527 section 4.1,
+//
+//   If any probe is looped back within RetransTimer milliseconds after
+//   having sent DupAddrDetectTransmits NS(DAD) messages, the interface
+//   continues with another MAX_MULTICAST_SOLICIT number of NS(DAD)
+//   messages transmitted RetransTimer milliseconds apart.
+//
+// Value taken from RFC 4861 section 10.
+const maxMulticastSolicit = 3
+
 // startDuplicateAddressDetection performs Duplicate Address Detection.
 //
 // This function must only be called by IPv6 addresses that are currently
@@ -651,8 +679,82 @@
 		return nil
 	}
 
+	// Protected by ndp.ep.mu.
+	done := false
+
 	state := dadState{
-		job: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
+		done: &done,
+
+		// We initially start a timer to fire immediately because some of the DAD
+		// work cannot be done while holding the IPv6 endpoint's lock. This is
+		// effectively the same as starting a goroutine but we use a timer that
+		// fires immediately so we can reset it for the next DAD iteration.
+		timer: ndp.ep.protocol.stack.Clock().AfterFunc(0, func() {
+			dadDone := remaining == 0
+
+			nonce, earlyReturn := func() ([]byte, bool) {
+				ndp.ep.mu.Lock()
+				defer ndp.ep.mu.Unlock()
+
+				if done {
+					return nil, true
+				}
+
+				state, ok := ndp.dad[addr]
+				if !ok {
+					panic(fmt.Sprintf("ndpdad: DAD timer fired but missing state for %s on NIC(%d)", addr, ndp.ep.nic.ID()))
+				}
+
+				if addressEndpoint.GetKind() != stack.PermanentTentative {
+					// The endpoint should still be marked as tentative since we are still
+					// performing DAD on it.
+					panic(fmt.Sprintf("ndpdad: addr %s is no longer tentative on NIC(%d)", addr, ndp.ep.nic.ID()))
+				}
+
+				// As per RFC 7527 section 4
+				//
+				//   If any probe is looped back within RetransTimer milliseconds
+				//   after having sent DupAddrDetectTransmits NS(DAD) messages, the
+				//   interface continues with another MAX_MULTICAST_SOLICIT number of
+				//   NS(DAD) messages transmitted RetransTimer milliseconds apart.
+				if dadDone && state.extendRequest == requested {
+					dadDone = false
+					remaining = maxMulticastSolicit
+					state.extendRequest = extended
+				}
+
+				if !dadDone {
+					if state.nonce == nil {
+						state.nonce = make([]byte, nonceSize)
+					}
+
+					if n, err := io.ReadFull(ndp.ep.protocol.stack.SecureRNG(), state.nonce); err != nil {
+						panic(fmt.Sprintf("SecureRNG.Read(...): %s", err))
+					} else if n != len(state.nonce) {
+						panic(fmt.Sprintf("expected to read %d bytes from secure RNG, only read %d bytes", len(state.nonce), n))
+					}
+				}
+
+				ndp.dad[addr] = state
+				return state.nonce, false
+			}()
+			if earlyReturn {
+				return
+			}
+
+			var err *tcpip.Error
+			if !dadDone {
+				err = ndp.sendDADPacket(addr, addressEndpoint, nonce)
+			}
+
+			ndp.ep.mu.Lock()
+			defer ndp.ep.mu.Unlock()
+
+			if done {
+				// DAD was stopped.
+				return
+			}
+
 			state, ok := ndp.dad[addr]
 			if !ok {
 				panic(fmt.Sprintf("ndpdad: DAD timer fired but missing state for %s on NIC(%d)", addr, ndp.ep.nic.ID()))
@@ -664,13 +766,6 @@
 				panic(fmt.Sprintf("ndpdad: addr %s is no longer tentative on NIC(%d)", addr, ndp.ep.nic.ID()))
 			}
 
-			dadDone := remaining == 0
-
-			var err *tcpip.Error
-			if !dadDone {
-				err = ndp.sendDADPacket(addr, addressEndpoint)
-			}
-
 			if dadDone {
 				// DAD has resolved.
 				addressEndpoint.SetKind(stack.Permanent)
@@ -678,7 +773,7 @@
 				// DAD is not done and we had no errors when sending the last NDP NS,
 				// schedule the next DAD timer.
 				remaining--
-				state.job.Schedule(ndp.configs.RetransmitTimer)
+				state.timer.Reset(ndp.configs.RetransmitTimer)
 				return
 			}
 
@@ -703,11 +798,6 @@
 		}),
 	}
 
-	// We initially start a timer to fire immediately because some of the DAD work
-	// cannot be done while holding the IPv6 endpoint's lock. This is effectively
-	// the same as starting a goroutine but we use a timer that fires immediately
-	// so we can reset it for the next DAD iteration.
-	state.job.Schedule(0)
 	ndp.dad[addr] = state
 
 	return nil
@@ -717,13 +807,18 @@
 // addr.
 //
 // addr must be a tentative IPv6 address on ndp's IPv6 endpoint.
-func (ndp *ndpState) sendDADPacket(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) *tcpip.Error {
+func (ndp *ndpState) sendDADPacket(addr tcpip.Address, addressEndpoint stack.AddressEndpoint, nonce []byte) *tcpip.Error {
 	snmc := header.SolicitedNodeAddr(addr)
 
-	icmp := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize))
+	ser := header.NDPOptionsSerializer{
+		header.NDPNonceOption(nonce),
+	}
+
+	icmp := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize + ser.Length()))
 	icmp.SetType(header.ICMPv6NeighborSolicit)
 	ns := header.NDPNeighborSolicit(icmp.MessageBody())
 	ns.SetTargetAddress(addr)
+	ns.Options().Serialize(ser)
 	icmp.SetChecksum(header.ICMPv6Checksum(icmp, header.IPv6Any, snmc, buffer.VectorisedView{}))
 
 	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -762,7 +857,8 @@
 		return
 	}
 
-	dad.job.Cancel()
+	dad.timer.Stop()
+	*dad.done = true
 	delete(ndp.dad, addr)
 
 	// Let the integrator know DAD did not resolve.
@@ -771,6 +867,61 @@
 	}
 }
 
+// extendIfNonceEqualDisposition enumerates the possible results from
+// extendIfNonceEqual.
+type extendIfNonceEqualDisposition int
+
+const (
+	// newlyExtended indicates that the DAD process was extended.
+	newlyExtended extendIfNonceEqualDisposition = iota
+
+	// alreadyExtended indicates that the DAD process was already extended.
+	alreadyExtended
+
+	// noDADStateFound indicates that DAD state was not found for the address.
+	noDADStateFound
+
+	// nonceNotEqual indicates that the nonce value passed and the nonce in the
+	// last send DAD message are not equal.
+	nonceNotEqual
+)
+
+// extendIfNonceEqual extends the DAD process if the provided nonce is the
+// same as the nonce sent in the last DAD message.
+//
+// The IPv6 endpoint that ndp belongs to MUST be locked.
+func (ndp *ndpState) extendIfNonceEqual(addr tcpip.Address, nonce []byte) extendIfNonceEqualDisposition {
+	s, ok := ndp.dad[addr]
+	if !ok {
+		return noDADStateFound
+	}
+
+	if s.extendRequest != notRequested {
+		return alreadyExtended
+	}
+
+	// As per RFC 7527 section 4
+	//
+	//   If any probe is looped back within RetransTimer milliseconds after having
+	//   sent DupAddrDetectTransmits NS(DAD) messages, the interface continues
+	//   with another MAX_MULTICAST_SOLICIT number of NS(DAD) messages transmitted
+	//   RetransTimer milliseconds apart.
+	//
+	// If a DAD message has already been sent and the nonce value we observed is
+	// the same as the nonce value we last sent, then we assume our probe was
+	// looped back and request an extension to the DAD process.
+	//
+	// Note, the first DAD message is sent asynchronously so we need to make sure
+	// that we sent a DAD message by checking if we have a nonce value set.
+	if s.nonce != nil && bytes.Equal(s.nonce, nonce) {
+		s.extendRequest = requested
+		ndp.dad[addr] = s
+		return newlyExtended
+	}
+
+	return nonceNotEqual
+}
+
 // handleRA handles a Router Advertisement message that arrived on the NIC
 // this ndp is for. Does nothing if the NIC is configured to not handle RAs.
 //
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 6d761b1..8efc8a1 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -23,6 +23,7 @@
 	"bytes"
 	"encoding/binary"
 	"fmt"
+	"io"
 	mathrand "math/rand"
 	"sync/atomic"
 	"time"
@@ -448,6 +449,9 @@
 	// used when a random number is required.
 	randomGenerator *mathrand.Rand
 
+	// secureRNG is a cryptographically secure random number generator.
+	secureRNG io.Reader
+
 	// sendBufferSize holds the min/default/max send buffer sizes for
 	// endpoints other than TCP.
 	sendBufferSize SendBufferSizeOption
@@ -526,6 +530,9 @@
 	// IPTables are the initial iptables rules. If nil, iptables will allow
 	// all traffic.
 	IPTables *IPTables
+
+	// SecureRNG is a cryptographically secure random number generator.
+	SecureRNG io.Reader
 }
 
 // TransportEndpointInfo holds useful information about a transport endpoint
@@ -634,6 +641,10 @@
 
 	opts.NUDConfigs.resetInvalidFields()
 
+	if opts.SecureRNG == nil {
+		opts.SecureRNG = rand.Reader
+	}
+
 	s := &Stack{
 		transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
 		networkProtocols:   make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
@@ -653,6 +664,7 @@
 		uniqueIDGenerator:  opts.UniqueID,
 		nudDisp:            opts.NUDDisp,
 		randomGenerator:    mathrand.New(randSrc),
+		secureRNG:          opts.SecureRNG,
 		sendBufferSize: SendBufferSizeOption{
 			Min:     MinBufferSize,
 			Default: DefaultBufferSize,
@@ -2019,6 +2031,12 @@
 	return s.randomGenerator
 }
 
+// SecureRNG returns the stack's cryptographically secure random number
+// generator.
+func (s *Stack) SecureRNG() io.Reader {
+	return s.secureRNG
+}
+
 func generateRandUint32() uint32 {
 	b := make([]byte, 4)
 	if _, err := rand.Read(b); err != nil {