blob: 4e3f9ae76f0336f1bab7e78790aca09af5ba49f5 [file] [log] [blame]
package overlay
import (
"encoding/json"
"fmt"
"net"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/driverapi"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/netutils"
"github.com/docker/libnetwork/osl"
"github.com/docker/libnetwork/resolvconf"
"github.com/docker/libnetwork/types"
"github.com/sirupsen/logrus"
)
var (
hostMode bool
networkOnce sync.Once
networkMu sync.Mutex
vniTbl = make(map[uint32]string)
)
type networkTable map[string]*network
type subnet struct {
once *sync.Once
vxlanName string
brName string
vni uint32
initErr error
subnetIP *net.IPNet
gwIP *net.IPNet
}
type subnetJSON struct {
SubnetIP string
GwIP string
Vni uint32
}
type network struct {
id string
dbIndex uint64
dbExists bool
sbox osl.Sandbox
endpoints endpointTable
driver *driver
joinCnt int
once *sync.Once
initEpoch int
initErr error
subnets []*subnet
secure bool
mtu int
sync.Mutex
}
func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
return nil, types.NotImplementedErrorf("not implemented")
}
func (d *driver) NetworkFree(id string) error {
return types.NotImplementedErrorf("not implemented")
}
func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
if id == "" {
return fmt.Errorf("invalid network id")
}
if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
return types.BadRequestErrorf("ipv4 pool is empty")
}
// Since we perform lazy configuration make sure we try
// configuring the driver when we enter CreateNetwork
if err := d.configure(); err != nil {
return err
}
n := &network{
id: id,
driver: d,
endpoints: endpointTable{},
once: &sync.Once{},
subnets: []*subnet{},
}
vnis := make([]uint32, 0, len(ipV4Data))
if gval, ok := option[netlabel.GenericData]; ok {
optMap := gval.(map[string]string)
if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok {
logrus.Debugf("overlay: Received vxlan IDs: %s", val)
vniStrings := strings.Split(val, ",")
for _, vniStr := range vniStrings {
vni, err := strconv.Atoi(vniStr)
if err != nil {
return fmt.Errorf("invalid vxlan id value %q passed", vniStr)
}
vnis = append(vnis, uint32(vni))
}
}
if _, ok := optMap[secureOption]; ok {
n.secure = true
}
if val, ok := optMap[netlabel.DriverMTU]; ok {
var err error
if n.mtu, err = strconv.Atoi(val); err != nil {
return fmt.Errorf("failed to parse %v: %v", val, err)
}
if n.mtu < 0 {
return fmt.Errorf("invalid MTU value: %v", n.mtu)
}
}
}
// If we are getting vnis from libnetwork, either we get for
// all subnets or none.
if len(vnis) != 0 && len(vnis) < len(ipV4Data) {
return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis))
}
for i, ipd := range ipV4Data {
s := &subnet{
subnetIP: ipd.Pool,
gwIP: ipd.Gateway,
once: &sync.Once{},
}
if len(vnis) != 0 {
s.vni = vnis[i]
}
n.subnets = append(n.subnets, s)
}
if err := n.writeToStore(); err != nil {
return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
}
// Make sure no rule is on the way from any stale secure network
if !n.secure {
for _, vni := range vnis {
programMangle(vni, false)
}
}
if nInfo != nil {
if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil {
return err
}
}
d.addNetwork(n)
return nil
}
func (d *driver) DeleteNetwork(nid string) error {
if nid == "" {
return fmt.Errorf("invalid network id")
}
// Make sure driver resources are initialized before proceeding
if err := d.configure(); err != nil {
return err
}
n := d.network(nid)
if n == nil {
return fmt.Errorf("could not find network with id %s", nid)
}
d.deleteNetwork(nid)
vnis, err := n.releaseVxlanID()
if err != nil {
return err
}
if n.secure {
for _, vni := range vnis {
programMangle(vni, false)
}
}
return nil
}
func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
return nil
}
func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
return nil
}
func (n *network) incEndpointCount() {
n.Lock()
defer n.Unlock()
n.joinCnt++
}
func (n *network) joinSandbox(restore bool) error {
// If there is a race between two go routines here only one will win
// the other will wait.
n.once.Do(func() {
// save the error status of initSandbox in n.initErr so that
// all the racing go routines are able to know the status.
n.initErr = n.initSandbox(restore)
})
return n.initErr
}
func (n *network) joinSubnetSandbox(s *subnet, restore bool) error {
s.once.Do(func() {
s.initErr = n.initSubnetSandbox(s, restore)
})
return s.initErr
}
func (n *network) leaveSandbox() {
n.Lock()
defer n.Unlock()
n.joinCnt--
if n.joinCnt != 0 {
return
}
// We are about to destroy sandbox since the container is leaving the network
// Reinitialize the once variable so that we will be able to trigger one time
// sandbox initialization(again) when another container joins subsequently.
n.once = &sync.Once{}
for _, s := range n.subnets {
s.once = &sync.Once{}
}
n.destroySandbox()
}
// to be called while holding network lock
func (n *network) destroySandbox() {
if n.sbox != nil {
for _, iface := range n.sbox.Info().Interfaces() {
if err := iface.Remove(); err != nil {
logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err)
}
}
for _, s := range n.subnets {
if s.vxlanName != "" {
err := deleteInterface(s.vxlanName)
if err != nil {
logrus.Warnf("could not cleanup sandbox properly: %v", err)
}
}
}
n.sbox.Destroy()
n.sbox = nil
}
}
func networkOnceInit() {
if os.Getenv("_OVERLAY_HOST_MODE") != "" {
hostMode = true
return
}
err := createVxlan("testvxlan1", 1, 0)
if err != nil {
logrus.Errorf("Failed to create testvxlan1 interface: %v", err)
return
}
defer deleteInterface("testvxlan1")
}
func (n *network) generateVxlanName(s *subnet) string {
id := n.id
if len(n.id) > 12 {
id = n.id[:12]
}
return "vx_" + id + "_0"
}
func (n *network) generateBridgeName(s *subnet) string {
id := n.id
if len(n.id) > 5 {
id = n.id[:5]
}
return n.getBridgeNamePrefix(s) + "_" + id + "_0"
}
func (n *network) getBridgeNamePrefix(s *subnet) string {
return "ov_" + fmt.Sprintf("%06x", n.vxlanID(s))
}
func isOverlap(nw *net.IPNet) bool {
var nameservers []string
if rc, err := resolvconf.Get(); err == nil {
nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
}
if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
return true
}
if err := netutils.CheckRouteOverlaps(nw); err != nil {
return true
}
return false
}
func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error {
sbox := n.sandbox()
// restore overlay osl sandbox
Ifaces := make(map[string][]osl.IfaceOption)
brIfaceOption := make([]osl.IfaceOption, 2)
brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP))
brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true))
Ifaces[brName+"+br"] = brIfaceOption
err := sbox.Restore(Ifaces, nil, nil, nil)
if err != nil {
return err
}
Ifaces = make(map[string][]osl.IfaceOption)
vxlanIfaceOption := make([]osl.IfaceOption, 1)
vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName))
Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption
err = sbox.Restore(Ifaces, nil, nil, nil)
if err != nil {
return err
}
return nil
}
func (n *network) addInterface(srcName, dstPrefix, name string, isBridge bool) error {
return nil
}
func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
if hostMode {
// Try to delete stale bridge interface if it exists
if err := deleteInterface(brName); err != nil {
deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s)
}
if isOverlap(s.subnetIP) {
return fmt.Errorf("overlay subnet %s has conflicts in the host while running in host mode", s.subnetIP.String())
}
}
if !hostMode {
// Try to find this subnet's vni is being used in some
// other namespace by looking at vniTbl that we just
// populated in the once init. If a hit is found then
// it must a stale namespace from previous
// life. Destroy it completely and reclaim resourced.
networkMu.Lock()
path, ok := vniTbl[n.vxlanID(s)]
networkMu.Unlock()
if ok {
os.Remove(path)
networkMu.Lock()
delete(vniTbl, n.vxlanID(s))
networkMu.Unlock()
}
}
err := createVxlan(vxlanName, n.vxlanID(s), n.maxMTU())
if err != nil {
return err
}
return nil
}
func (n *network) initSubnetSandbox(s *subnet, restore bool) error {
brName := n.generateBridgeName(s)
vxlanName := n.generateVxlanName(s)
if restore {
n.restoreSubnetSandbox(s, brName, vxlanName)
} else {
n.setupSubnetSandbox(s, brName, vxlanName)
}
n.Lock()
s.vxlanName = vxlanName
s.brName = brName
n.Unlock()
return nil
}
func (n *network) cleanupStaleSandboxes() {
filepath.Walk(filepath.Dir(osl.GenerateKey("walk")),
func(path string, info os.FileInfo, err error) error {
_, fname := filepath.Split(path)
pList := strings.Split(fname, "-")
if len(pList) <= 1 {
return nil
}
pattern := pList[1]
if strings.Contains(n.id, pattern) {
// Now that we have destroyed this
// sandbox, remove all references to
// it in vniTbl so that we don't
// inadvertently destroy the sandbox
// created in this life.
networkMu.Lock()
for vni, tblPath := range vniTbl {
if tblPath == path {
delete(vniTbl, vni)
}
}
networkMu.Unlock()
}
return nil
})
}
func (n *network) initSandbox(restore bool) error {
n.Lock()
n.initEpoch++
n.Unlock()
networkOnce.Do(networkOnceInit)
if !restore {
// If there are any stale sandboxes related to this network
// from previous daemon life clean it up here
n.cleanupStaleSandboxes()
}
// In the restore case network sandbox already exist; but we don't know
// what epoch number it was created with. It has to be retrieved by
// searching the net namespaces.
var key string
if restore {
key = osl.GenerateKey("-" + n.id)
} else {
key = osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
}
sbox, err := osl.NewSandbox(key, !hostMode, restore)
if err != nil {
return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err)
}
n.setSandbox(sbox)
if !restore {
n.driver.peerDbUpdateSandbox(n.id)
}
return nil
}
func (d *driver) addNetwork(n *network) {
d.Lock()
d.networks[n.id] = n
d.Unlock()
}
func (d *driver) deleteNetwork(nid string) {
d.Lock()
delete(d.networks, nid)
d.Unlock()
}
func (d *driver) network(nid string) *network {
d.Lock()
networks := d.networks
d.Unlock()
n, ok := networks[nid]
if !ok {
n = d.getNetworkFromStore(nid)
if n != nil {
n.driver = d
n.endpoints = endpointTable{}
n.once = &sync.Once{}
networks[nid] = n
}
}
return n
}
func (d *driver) getNetworkFromStore(nid string) *network {
if d.store == nil {
return nil
}
n := &network{id: nid}
if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
return nil
}
return n
}
func (n *network) sandbox() osl.Sandbox {
n.Lock()
defer n.Unlock()
return n.sbox
}
func (n *network) setSandbox(sbox osl.Sandbox) {
n.Lock()
n.sbox = sbox
n.Unlock()
}
func (n *network) vxlanID(s *subnet) uint32 {
n.Lock()
defer n.Unlock()
return s.vni
}
func (n *network) setVxlanID(s *subnet, vni uint32) {
n.Lock()
s.vni = vni
n.Unlock()
}
func (n *network) Key() []string {
return []string{"overlay", "network", n.id}
}
func (n *network) KeyPrefix() []string {
return []string{"overlay", "network"}
}
func (n *network) Value() []byte {
m := map[string]interface{}{}
netJSON := []*subnetJSON{}
for _, s := range n.subnets {
sj := &subnetJSON{
SubnetIP: s.subnetIP.String(),
GwIP: s.gwIP.String(),
Vni: s.vni,
}
netJSON = append(netJSON, sj)
}
m["secure"] = n.secure
m["subnets"] = netJSON
m["mtu"] = n.mtu
b, err := json.Marshal(m)
if err != nil {
return []byte{}
}
return b
}
func (n *network) Index() uint64 {
return n.dbIndex
}
func (n *network) SetIndex(index uint64) {
n.dbIndex = index
n.dbExists = true
}
func (n *network) Exists() bool {
return n.dbExists
}
func (n *network) Skip() bool {
return false
}
func (n *network) SetValue(value []byte) error {
var (
m map[string]interface{}
newNet bool
isMap = true
netJSON = []*subnetJSON{}
)
if err := json.Unmarshal(value, &m); err != nil {
err := json.Unmarshal(value, &netJSON)
if err != nil {
return err
}
isMap = false
}
if len(n.subnets) == 0 {
newNet = true
}
if isMap {
if val, ok := m["secure"]; ok {
n.secure = val.(bool)
}
if val, ok := m["mtu"]; ok {
n.mtu = int(val.(float64))
}
bytes, err := json.Marshal(m["subnets"])
if err != nil {
return err
}
if err := json.Unmarshal(bytes, &netJSON); err != nil {
return err
}
}
for _, sj := range netJSON {
subnetIPstr := sj.SubnetIP
gwIPstr := sj.GwIP
vni := sj.Vni
subnetIP, _ := types.ParseCIDR(subnetIPstr)
gwIP, _ := types.ParseCIDR(gwIPstr)
if newNet {
s := &subnet{
subnetIP: subnetIP,
gwIP: gwIP,
vni: vni,
once: &sync.Once{},
}
n.subnets = append(n.subnets, s)
} else {
sNet := n.getMatchingSubnet(subnetIP)
if sNet != nil {
sNet.vni = vni
}
}
}
return nil
}
func (n *network) DataScope() string {
return datastore.GlobalScope
}
func (n *network) writeToStore() error {
if n.driver.store == nil {
return nil
}
return n.driver.store.PutObjectAtomic(n)
}
func (n *network) releaseVxlanID() ([]uint32, error) {
if len(n.subnets) == 0 {
return nil, nil
}
if n.driver.store != nil {
if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
// In both the above cases we can safely assume that the key has been removed by some other
// instance and so simply get out of here
return nil, nil
}
return nil, fmt.Errorf("failed to delete network to vxlan id map: %v", err)
}
}
var vnis []uint32
for _, s := range n.subnets {
if n.driver.vxlanIdm != nil {
vni := n.vxlanID(s)
vnis = append(vnis, vni)
n.driver.vxlanIdm.Release(uint64(vni))
}
n.setVxlanID(s, 0)
}
return vnis, nil
}
func (n *network) obtainVxlanID(s *subnet) error {
//return if the subnet already has a vxlan id assigned
if s.vni != 0 {
return nil
}
if n.driver.store == nil {
return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id")
}
for {
if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
return fmt.Errorf("getting network %q from datastore failed %v", n.id, err)
}
if s.vni == 0 {
vxlanID, err := n.driver.vxlanIdm.GetID()
if err != nil {
return fmt.Errorf("failed to allocate vxlan id: %v", err)
}
n.setVxlanID(s, uint32(vxlanID))
if err := n.writeToStore(); err != nil {
n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
n.setVxlanID(s, 0)
if err == datastore.ErrKeyModified {
continue
}
return fmt.Errorf("network %q failed to update data store: %v", n.id, err)
}
return nil
}
return nil
}
}
// contains return true if the passed ip belongs to one the network's
// subnets
func (n *network) contains(ip net.IP) bool {
for _, s := range n.subnets {
if s.subnetIP.Contains(ip) {
return true
}
}
return false
}
// getSubnetforIP returns the subnet to which the given IP belongs
func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
for _, s := range n.subnets {
// first check if the mask lengths are the same
i, _ := s.subnetIP.Mask.Size()
j, _ := ip.Mask.Size()
if i != j {
continue
}
if s.subnetIP.Contains(ip.IP) {
return s
}
}
return nil
}
// getMatchingSubnet return the network's subnet that matches the input
func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet {
if ip == nil {
return nil
}
for _, s := range n.subnets {
// first check if the mask lengths are the same
i, _ := s.subnetIP.Mask.Size()
j, _ := ip.Mask.Size()
if i != j {
continue
}
if s.subnetIP.IP.Equal(ip.IP) {
return s
}
}
return nil
}