Merge pull request #47775 from vvoland/v26.1-47771
[26.1 backport] Option to avoid deleting the kernel_ll address from bridges.
diff --git a/integration/networking/bridge_test.go b/integration/networking/bridge_test.go
index ae82f00..cb2abfa 100644
--- a/integration/networking/bridge_test.go
+++ b/integration/networking/bridge_test.go
@@ -469,7 +469,6 @@
skip.If(t, testEnv.DaemonInfo.OSType == "windows")
ctx := setupTest(t)
- d := daemon.New(t)
type testStep struct {
stepName string
@@ -487,13 +486,13 @@
{
stepName: "Set up initial UL prefix",
fixedCIDRV6: "fd1c:f1a0:5d8d:aaaa::/64",
- expAddrs: []string{"fd1c:f1a0:5d8d:aaaa::1/64", "fe80::1/64"},
+ expAddrs: []string{"fd1c:f1a0:5d8d:aaaa::1/64", "fe80::"},
},
{
// Modify that prefix, the default bridge's address must be deleted and re-added.
stepName: "Modify UL prefix - address change",
fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/64",
- expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"},
+ expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::"},
},
{
// Modify the prefix length, the default bridge's address should not change.
@@ -501,7 +500,7 @@
fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/80",
// The prefix length displayed by 'ip a' is not updated - it's informational, and
// can't be changed without unnecessarily deleting and re-adding the address.
- expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"},
+ expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::"},
},
},
},
@@ -511,14 +510,14 @@
{
stepName: "Standard LL subnet prefix",
fixedCIDRV6: "fe80::/64",
- expAddrs: []string{"fe80::1/64"},
+ expAddrs: []string{"fe80::"},
},
{
// Modify that prefix, the default bridge's address must be deleted and re-added.
// The bridge must still have an address in the required (standard) LL subnet.
stepName: "Nonstandard LL prefix - address change",
fixedCIDRV6: "fe80:1234::/32",
- expAddrs: []string{"fe80:1234::1/32", "fe80::1/64"},
+ expAddrs: []string{"fe80:1234::1/32", "fe80::"},
},
{
// Modify the prefix length, the addresses should not change.
@@ -526,32 +525,48 @@
fixedCIDRV6: "fe80:1234::/64",
// The prefix length displayed by 'ip a' is not updated - it's informational, and
// can't be changed without unnecessarily deleting and re-adding the address.
- expAddrs: []string{"fe80:1234::1/", "fe80::1/64"},
+ expAddrs: []string{"fe80:1234::1/", "fe80::"},
},
},
},
}
- for _, tc := range testcases {
- t.Run(tc.name, func(t *testing.T) {
- for _, step := range tc.steps {
- // Check that the daemon starts - regression test for:
- // https://github.com/moby/moby/issues/46829
- d.Start(t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)
- d.Stop(t)
+ for _, preserveKernelLL := range []bool{false, true} {
+ var dopts []daemon.Option
+ if preserveKernelLL {
+ dopts = append(dopts, daemon.WithEnvVars("DOCKER_BRIDGE_PRESERVE_KERNEL_LL=1"))
+ }
+ d := daemon.New(t, dopts...)
+ c := d.NewClientT(t)
- // Check that the expected addresses have been applied to the bridge. (Skip in
- // rootless mode, because the bridge is in a different network namespace.)
- if !testEnv.IsRootless() {
- res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
- assert.Equal(t, res.ExitCode, 0, step.stepName)
- stdout := res.Stdout()
- for _, expAddr := range step.expAddrs {
- assert.Check(t, is.Contains(stdout, expAddr))
+ for _, tc := range testcases {
+ for _, step := range tc.steps {
+ tcName := fmt.Sprintf("kernel_ll_%v/%s/%s", preserveKernelLL, tc.name, step.stepName)
+ t.Run(tcName, func(t *testing.T) {
+ ctx := testutil.StartSpan(ctx, t)
+ // Check that the daemon starts - regression test for:
+ // https://github.com/moby/moby/issues/46829
+ d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)
+
+ // Start a container, so that the bridge is set "up" and gets a kernel_ll address.
+ cID := container.Run(ctx, t, c)
+ defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})
+
+ d.Stop(t)
+
+ // Check that the expected addresses have been applied to the bridge. (Skip in
+ // rootless mode, because the bridge is in a different network namespace.)
+ if !testEnv.IsRootless() {
+ res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
+ assert.Equal(t, res.ExitCode, 0, step.stepName)
+ stdout := res.Stdout()
+ for _, expAddr := range step.expAddrs {
+ assert.Check(t, is.Contains(stdout, expAddr))
+ }
}
- }
+ })
}
- })
+ }
}
}
diff --git a/libnetwork/drivers/bridge/interface_linux.go b/libnetwork/drivers/bridge/interface_linux.go
index 2a9c375..a78c02ca 100644
--- a/libnetwork/drivers/bridge/interface_linux.go
+++ b/libnetwork/drivers/bridge/interface_linux.go
@@ -5,6 +5,7 @@
"fmt"
"net"
"net/netip"
+ "os"
"github.com/containerd/log"
"github.com/docker/docker/errdefs"
@@ -73,18 +74,20 @@
func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, err error) {
requiredAddrs = make(map[netip.Addr]netip.Prefix)
- // Always give the bridge 'fe80::1' - every interface is required to have an
- // address in 'fe80::/64'. Linux may assign an address, but we'll replace it with
- // 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool
- // assigned address will not be a second address in the LL subnet.
- ra, ok := netiputil.ToPrefix(bridgeIPv6)
- if !ok {
- err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
- return nil, err
+ if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") != "1" {
+ // Always give the bridge 'fe80::1' - every interface is required to have an
+ // address in 'fe80::/64'. Linux may assign an address, but we'll replace it with
+ // 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool
+ // assigned address will not be a second address in the LL subnet.
+ ra, ok := netiputil.ToPrefix(bridgeIPv6)
+ if !ok {
+ err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
+ return nil, err
+ }
+ requiredAddrs[ra.Addr()] = ra
}
- requiredAddrs[ra.Addr()] = ra
- ra, ok = netiputil.ToPrefix(config.AddressIPv6)
+ ra, ok := netiputil.ToPrefix(config.AddressIPv6)
if !ok {
err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String())
return nil, err
@@ -116,6 +119,14 @@
if !ok {
return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
}
+ // Optionally, avoid deleting the kernel-assigned link local address.
+ // (Don't delete fe80::1 either - if it was previously assigned to the bridge, and the
+ // kernel_ll address was deleted, the bridge won't get a new kernel_ll address.)
+ if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") == "1" {
+ if p, _ := ea.Prefix(64); p == linkLocalPrefix {
+ continue
+ }
+ }
// Ignore the prefix length when comparing addresses, it's informational
// (RFC-5942 section 4), and removing/re-adding an address that's still valid
// would disrupt traffic on live-restore.
diff --git a/libnetwork/drivers/bridge/setup_ipv6_linux.go b/libnetwork/drivers/bridge/setup_ipv6_linux.go
index 779306f..2cb0da7 100644
--- a/libnetwork/drivers/bridge/setup_ipv6_linux.go
+++ b/libnetwork/drivers/bridge/setup_ipv6_linux.go
@@ -4,6 +4,7 @@
"context"
"fmt"
"net"
+ "net/netip"
"os"
"github.com/containerd/log"
@@ -13,6 +14,9 @@
// bridgeIPv6 is the default, link-local IPv6 address for the bridge (fe80::1/64)
var bridgeIPv6 = &net.IPNet{IP: net.ParseIP("fe80::1"), Mask: net.CIDRMask(64, 128)}
+// Standard link local prefix
+var linkLocalPrefix = netip.MustParsePrefix("fe80::/64")
+
const (
ipv6ForwardConfPerm = 0o644
ipv6ForwardConfDefault = "/proc/sys/net/ipv6/conf/default/forwarding"