Merge pull request #93 from ctelfer/18.09-backport-dsr

[18.09] Bump libnetwork to 6da50d19 for DSR load balancing changes
diff --git a/hack/dockerfile/install/proxy.installer b/hack/dockerfile/install/proxy.installer
index ed9ab53..f1d3f93 100755
--- a/hack/dockerfile/install/proxy.installer
+++ b/hack/dockerfile/install/proxy.installer
@@ -3,7 +3,7 @@
 # LIBNETWORK_COMMIT is used to build the docker-userland-proxy binary. When
 # updating the binary version, consider updating github.com/docker/libnetwork
 # in vendor.conf accordingly
-LIBNETWORK_COMMIT=f30a35b091cc2a431ef9856c75c343f75bb5f2e2
+LIBNETWORK_COMMIT=6da50d1978302f04c3e2089e29112ea24812f05b
 
 install_proxy() {
 	case "$1" in
diff --git a/vendor.conf b/vendor.conf
index 178c70e..6c9c512 100644
--- a/vendor.conf
+++ b/vendor.conf
@@ -37,7 +37,7 @@
 #get libnetwork packages
 
 # When updating, also update LIBNETWORK_COMMIT in hack/dockerfile/install/proxy accordingly
-github.com/docker/libnetwork a79d3687931697244b8e03485bf7b2042f8ec6b6
+github.com/docker/libnetwork 6da50d1978302f04c3e2089e29112ea24812f05b
 github.com/docker/go-events 9461782956ad83b30282bf90e31fa6a70c255ba9
 github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
 github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
diff --git a/vendor/github.com/docker/libnetwork/controller.go b/vendor/github.com/docker/libnetwork/controller.go
index f956ddb..2896011 100644
--- a/vendor/github.com/docker/libnetwork/controller.go
+++ b/vendor/github.com/docker/libnetwork/controller.go
@@ -700,6 +700,9 @@
 	return nil
 }
 
+// XXX  This should be made driver agnostic.  See comment below.
+const overlayDSROptionString = "dsr"
+
 // NewNetwork creates a new network of the specified network type. The options
 // are network specific and modeled in a generic way.
 func (c *controller) NewNetwork(networkType, name string, id string, options ...NetworkOption) (Network, error) {
@@ -723,15 +726,16 @@
 	defaultIpam := defaultIpamForNetworkType(networkType)
 	// Construct the network object
 	network := &network{
-		name:        name,
-		networkType: networkType,
-		generic:     map[string]interface{}{netlabel.GenericData: make(map[string]string)},
-		ipamType:    defaultIpam,
-		id:          id,
-		created:     time.Now(),
-		ctrlr:       c,
-		persist:     true,
-		drvOnce:     &sync.Once{},
+		name:             name,
+		networkType:      networkType,
+		generic:          map[string]interface{}{netlabel.GenericData: make(map[string]string)},
+		ipamType:         defaultIpam,
+		id:               id,
+		created:          time.Now(),
+		ctrlr:            c,
+		persist:          true,
+		drvOnce:          &sync.Once{},
+		loadBalancerMode: loadBalancerModeDefault,
 	}
 
 	network.processOptions(options...)
@@ -829,6 +833,21 @@
 		}
 	}()
 
+	// XXX If the driver type is "overlay" check the options for DSR
+	// being set.  If so, set the network's load balancing mode to DSR.
+	// This should really be done in a network option, but due to
+	// time pressure to get this in without adding changes to moby,
+	// swarm and CLI, it is being implemented as a driver-specific
+	// option.  Unfortunately, drivers can't influence the core
+	// "libnetwork.network" data type.  Hence we need this hack code
+	// to implement in this manner.
+	if gval, ok := network.generic[netlabel.GenericData]; ok && network.networkType == "overlay" {
+		optMap := gval.(map[string]string)
+		if _, ok := optMap[overlayDSROptionString]; ok {
+			network.loadBalancerMode = loadBalancerModeDSR
+		}
+	}
+
 addToStore:
 	// First store the endpoint count, then the network. To avoid to
 	// end up with a datastore containing a network and not an epCnt,
diff --git a/vendor/github.com/docker/libnetwork/ipvs/constants.go b/vendor/github.com/docker/libnetwork/ipvs/constants.go
index d36bec0..b6b7f2b 100644
--- a/vendor/github.com/docker/libnetwork/ipvs/constants.go
+++ b/vendor/github.com/docker/libnetwork/ipvs/constants.go
@@ -145,3 +145,23 @@
 	// addresses.
 	SourceHashing = "sh"
 )
+
+const (
+	// ConnFwdMask is a mask for the fwd methods
+	ConnFwdMask = 0x0007
+
+	// ConnFwdMasq denotes forwarding via masquerading/NAT
+	ConnFwdMasq = 0x0000
+
+	// ConnFwdLocalNode denotes forwarding to a local node
+	ConnFwdLocalNode = 0x0001
+
+	// ConnFwdTunnel denotes forwarding via a tunnel
+	ConnFwdTunnel = 0x0002
+
+	// ConnFwdDirectRoute denotes forwarding via direct routing
+	ConnFwdDirectRoute = 0x0003
+
+	// ConnFwdBypass denotes forwarding while bypassing the cache
+	ConnFwdBypass = 0x0004
+)
diff --git a/vendor/github.com/docker/libnetwork/network.go b/vendor/github.com/docker/libnetwork/network.go
index 052aa8f..0a4a227 100644
--- a/vendor/github.com/docker/libnetwork/network.go
+++ b/vendor/github.com/docker/libnetwork/network.go
@@ -199,43 +199,50 @@
 }
 
 type network struct {
-	ctrlr          *controller
-	name           string
-	networkType    string
-	id             string
-	created        time.Time
-	scope          string // network data scope
-	labels         map[string]string
-	ipamType       string
-	ipamOptions    map[string]string
-	addrSpace      string
-	ipamV4Config   []*IpamConf
-	ipamV6Config   []*IpamConf
-	ipamV4Info     []*IpamInfo
-	ipamV6Info     []*IpamInfo
-	enableIPv6     bool
-	postIPv6       bool
-	epCnt          *endpointCnt
-	generic        options.Generic
-	dbIndex        uint64
-	dbExists       bool
-	persist        bool
-	stopWatchCh    chan struct{}
-	drvOnce        *sync.Once
-	resolverOnce   sync.Once
-	resolver       []Resolver
-	internal       bool
-	attachable     bool
-	inDelete       bool
-	ingress        bool
-	driverTables   []networkDBTable
-	dynamic        bool
-	configOnly     bool
-	configFrom     string
-	loadBalancerIP net.IP
+	ctrlr            *controller
+	name             string
+	networkType      string
+	id               string
+	created          time.Time
+	scope            string // network data scope
+	labels           map[string]string
+	ipamType         string
+	ipamOptions      map[string]string
+	addrSpace        string
+	ipamV4Config     []*IpamConf
+	ipamV6Config     []*IpamConf
+	ipamV4Info       []*IpamInfo
+	ipamV6Info       []*IpamInfo
+	enableIPv6       bool
+	postIPv6         bool
+	epCnt            *endpointCnt
+	generic          options.Generic
+	dbIndex          uint64
+	dbExists         bool
+	persist          bool
+	stopWatchCh      chan struct{}
+	drvOnce          *sync.Once
+	resolverOnce     sync.Once
+	resolver         []Resolver
+	internal         bool
+	attachable       bool
+	inDelete         bool
+	ingress          bool
+	driverTables     []networkDBTable
+	dynamic          bool
+	configOnly       bool
+	configFrom       string
+	loadBalancerIP   net.IP
+	loadBalancerMode string
 	sync.Mutex
 }
 
+const (
+	loadBalancerModeNAT     = "NAT"
+	loadBalancerModeDSR     = "DSR"
+	loadBalancerModeDefault = loadBalancerModeNAT
+)
+
 func (n *network) Name() string {
 	n.Lock()
 	defer n.Unlock()
@@ -475,6 +482,7 @@
 	dstN.configOnly = n.configOnly
 	dstN.configFrom = n.configFrom
 	dstN.loadBalancerIP = n.loadBalancerIP
+	dstN.loadBalancerMode = n.loadBalancerMode
 
 	// copy labels
 	if dstN.labels == nil {
@@ -592,6 +600,7 @@
 	netMap["configOnly"] = n.configOnly
 	netMap["configFrom"] = n.configFrom
 	netMap["loadBalancerIP"] = n.loadBalancerIP
+	netMap["loadBalancerMode"] = n.loadBalancerMode
 	return json.Marshal(netMap)
 }
 
@@ -705,6 +714,10 @@
 	if v, ok := netMap["loadBalancerIP"]; ok {
 		n.loadBalancerIP = net.ParseIP(v.(string))
 	}
+	n.loadBalancerMode = loadBalancerModeDefault
+	if v, ok := netMap["loadBalancerMode"]; ok {
+		n.loadBalancerMode = v.(string)
+	}
 	// Reconcile old networks with the recently added `--ipv6` flag
 	if !n.enableIPv6 {
 		n.enableIPv6 = len(n.ipamV6Info) > 0
diff --git a/vendor/github.com/docker/libnetwork/osl/namespace_linux.go b/vendor/github.com/docker/libnetwork/osl/namespace_linux.go
index 45c4685..bfc5d31 100644
--- a/vendor/github.com/docker/libnetwork/osl/namespace_linux.go
+++ b/vendor/github.com/docker/libnetwork/osl/namespace_linux.go
@@ -384,6 +384,36 @@
 	return n.nlHandle.AddrDel(iface, &netlink.Addr{IPNet: ip})
 }
 
+func (n *networkNamespace) DisableARPForVIP(srcName string) (Err error) {
+	dstName := ""
+	for _, i := range n.Interfaces() {
+		if i.SrcName() == srcName {
+			dstName = i.DstName()
+			break
+		}
+	}
+	if dstName == "" {
+		return fmt.Errorf("failed to find interface %s in sandbox", srcName)
+	}
+
+	err := n.InvokeFunc(func() {
+		path := filepath.Join("/proc/sys/net/ipv4/conf", dstName, "arp_ignore")
+		if err := ioutil.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil {
+			Err = fmt.Errorf("Failed to set %s to 1: %v", path, err)
+			return
+		}
+		path = filepath.Join("/proc/sys/net/ipv4/conf", dstName, "arp_announce")
+		if err := ioutil.WriteFile(path, []byte{'2', '\n'}, 0644); err != nil {
+			Err = fmt.Errorf("Failed to set %s to 2: %v", path, err)
+			return
+		}
+	})
+	if err != nil {
+		return err
+	}
+	return
+}
+
 func (n *networkNamespace) InvokeFunc(f func()) error {
 	return nsInvoke(n.nsPath(), func(nsFD int) error { return nil }, func(callerFD int) error {
 		f()
diff --git a/vendor/github.com/docker/libnetwork/osl/sandbox.go b/vendor/github.com/docker/libnetwork/osl/sandbox.go
index 5019e06..198cf64 100644
--- a/vendor/github.com/docker/libnetwork/osl/sandbox.go
+++ b/vendor/github.com/docker/libnetwork/osl/sandbox.go
@@ -51,6 +51,10 @@
 	// RemoveAliasIP removes the passed IP address from the named interface
 	RemoveAliasIP(ifName string, ip *net.IPNet) error
 
+	// DisableARPForVIP disables ARP replies and requests for VIP addresses
+	// on a particular interface
+	DisableARPForVIP(ifName string) error
+
 	// Add a static route to the sandbox.
 	AddStaticRoute(*types.StaticRoute) error
 
diff --git a/vendor/github.com/docker/libnetwork/sandbox.go b/vendor/github.com/docker/libnetwork/sandbox.go
index d98f359..03c9215 100644
--- a/vendor/github.com/docker/libnetwork/sandbox.go
+++ b/vendor/github.com/docker/libnetwork/sandbox.go
@@ -742,8 +742,17 @@
 
 	ep.Lock()
 	joinInfo := ep.joinInfo
+	vip := ep.virtualIP
+	lbModeIsDSR := ep.network.loadBalancerMode == loadBalancerModeDSR
 	ep.Unlock()
 
+	if len(vip) > 0 && lbModeIsDSR {
+		ipNet := &net.IPNet{IP: vip, Mask: net.CIDRMask(32, 32)}
+		if err := osSbox.RemoveAliasIP(osSbox.GetLoopbackIfaceName(), ipNet); err != nil {
+			logrus.WithError(err).Debugf("failed to remove virtual ip %v to loopback", ipNet)
+		}
+	}
+
 	if joinInfo == nil {
 		return
 	}
@@ -831,6 +840,7 @@
 	ep.Lock()
 	joinInfo := ep.joinInfo
 	i := ep.iface
+	lbModeIsDSR := ep.network.loadBalancerMode == loadBalancerModeDSR
 	ep.Unlock()
 
 	if ep.needResolver() {
@@ -854,6 +864,18 @@
 		if err := sb.osSbox.AddInterface(i.srcName, i.dstPrefix, ifaceOptions...); err != nil {
 			return fmt.Errorf("failed to add interface %s to sandbox: %v", i.srcName, err)
 		}
+
+		if len(ep.virtualIP) > 0 && lbModeIsDSR {
+			if sb.loadBalancerNID == "" {
+				if err := sb.osSbox.DisableARPForVIP(i.srcName); err != nil {
+					return fmt.Errorf("failed disable ARP for VIP: %v", err)
+				}
+			}
+			ipNet := &net.IPNet{IP: ep.virtualIP, Mask: net.CIDRMask(32, 32)}
+			if err := sb.osSbox.AddAliasIP(sb.osSbox.GetLoopbackIfaceName(), ipNet); err != nil {
+				return fmt.Errorf("failed to add virtual ip %v to loopback: %v", ipNet, err)
+			}
+		}
 	}
 
 	if joinInfo != nil {
diff --git a/vendor/github.com/docker/libnetwork/service_linux.go b/vendor/github.com/docker/libnetwork/service_linux.go
index a97d24b..562c3f7 100644
--- a/vendor/github.com/docker/libnetwork/service_linux.go
+++ b/vendor/github.com/docker/libnetwork/service_linux.go
@@ -143,7 +143,7 @@
 		}
 
 		logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
-		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false); err != nil {
+		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
 			logrus.Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
 			return
 		}
@@ -159,6 +159,9 @@
 		Address:       ip,
 		Weight:        1,
 	}
+	if n.loadBalancerMode == loadBalancerModeDSR {
+		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
+	}
 
 	// Remove the sched name before using the service to add
 	// destination.
@@ -204,6 +207,9 @@
 		Address:       ip,
 		Weight:        1,
 	}
+	if n.loadBalancerMode == loadBalancerModeDSR {
+		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
+	}
 
 	if fullRemove {
 		if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
@@ -233,7 +239,7 @@
 			}
 		}
 
-		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true); err != nil {
+		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
 			logrus.Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
 		}
 
@@ -544,7 +550,7 @@
 
 // Invoke fwmarker reexec routine to mark vip destined packets with
 // the passed firewall mark.
-func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool) error {
+func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
 	var ingressPortsFile string
 
 	if len(ingressPorts) != 0 {
@@ -564,7 +570,7 @@
 
 	cmd := &exec.Cmd{
 		Path:   reexec.Self(),
-		Args:   append([]string{"fwmarker"}, path, vip.String(), fmt.Sprintf("%d", fwMark), addDelOpt, ingressPortsFile, eIP.String()),
+		Args:   append([]string{"fwmarker"}, path, vip.String(), fmt.Sprintf("%d", fwMark), addDelOpt, ingressPortsFile, eIP.String(), lbMode),
 		Stdout: os.Stdout,
 		Stderr: os.Stderr,
 	}
@@ -581,7 +587,7 @@
 	runtime.LockOSThread()
 	defer runtime.UnlockOSThread()
 
-	if len(os.Args) < 7 {
+	if len(os.Args) < 8 {
 		logrus.Error("invalid number of arguments..")
 		os.Exit(1)
 	}
@@ -623,7 +629,8 @@
 		os.Exit(5)
 	}
 
-	if addDelOpt == "-A" {
+	lbMode := os.Args[7]
+	if addDelOpt == "-A" && lbMode == loadBalancerModeNAT {
 		eIP, subnet, err := net.ParseCIDR(os.Args[6])
 		if err != nil {
 			logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)