| // Copyright 2018 The gVisor Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package ports provides PortManager that manages allocating, reserving and releasing ports. |
| package ports |
| |
| import ( |
| "math" |
| "math/rand" |
| "sync/atomic" |
| |
| "gvisor.dev/gvisor/pkg/sync" |
| "gvisor.dev/gvisor/pkg/tcpip" |
| ) |
| |
| const ( |
| // FirstEphemeral is the first ephemeral port. |
| FirstEphemeral = 16000 |
| |
| // numEphemeralPorts it the mnumber of available ephemeral ports to |
| // Netstack. |
| numEphemeralPorts = math.MaxUint16 - FirstEphemeral + 1 |
| |
| anyIPAddress tcpip.Address = "" |
| ) |
| |
| type portDescriptor struct { |
| network tcpip.NetworkProtocolNumber |
| transport tcpip.TransportProtocolNumber |
| port uint16 |
| } |
| |
| // Flags represents the type of port reservation. |
| // |
| // +stateify savable |
| type Flags struct { |
| // MostRecent represents UDP SO_REUSEADDR. |
| MostRecent bool |
| |
| // LoadBalanced indicates SO_REUSEPORT. |
| // |
| // LoadBalanced takes precidence over MostRecent. |
| LoadBalanced bool |
| |
| // TupleOnly represents TCP SO_REUSEADDR. |
| TupleOnly bool |
| } |
| |
| // Bits converts the Flags to their bitset form. |
| func (f Flags) Bits() BitFlags { |
| var rf BitFlags |
| if f.MostRecent { |
| rf |= MostRecentFlag |
| } |
| if f.LoadBalanced { |
| rf |= LoadBalancedFlag |
| } |
| if f.TupleOnly { |
| rf |= TupleOnlyFlag |
| } |
| return rf |
| } |
| |
| // Effective returns the effective behavior of a flag config. |
| func (f Flags) Effective() Flags { |
| e := f |
| if e.LoadBalanced && e.MostRecent { |
| e.MostRecent = false |
| } |
| return e |
| } |
| |
| // PortManager manages allocating, reserving and releasing ports. |
| type PortManager struct { |
| mu sync.RWMutex |
| allocatedPorts map[portDescriptor]bindAddresses |
| |
| // hint is used to pick ports ephemeral ports in a stable order for |
| // a given port offset. |
| // |
| // hint must be accessed using the portHint/incPortHint helpers. |
| // TODO(gvisor.dev/issue/940): S/R this field. |
| hint uint32 |
| } |
| |
| // BitFlags is a bitset representation of Flags. |
| type BitFlags uint32 |
| |
| const ( |
| // MostRecentFlag represents Flags.MostRecent. |
| MostRecentFlag BitFlags = 1 << iota |
| |
| // LoadBalancedFlag represents Flags.LoadBalanced. |
| LoadBalancedFlag |
| |
| // TupleOnlyFlag represents Flags.TupleOnly. |
| TupleOnlyFlag |
| |
| // nextFlag is the value that the next added flag will have. |
| // |
| // It is used to calculate FlagMask below. It is also the number of |
| // valid flag states. |
| nextFlag |
| |
| // FlagMask is a bit mask for BitFlags. |
| FlagMask = nextFlag - 1 |
| |
| // MultiBindFlagMask contains the flags that allow binding the same |
| // tuple multiple times. |
| MultiBindFlagMask = MostRecentFlag | LoadBalancedFlag |
| ) |
| |
| // ToFlags converts the bitset into a Flags struct. |
| func (f BitFlags) ToFlags() Flags { |
| return Flags{ |
| MostRecent: f&MostRecentFlag != 0, |
| LoadBalanced: f&LoadBalancedFlag != 0, |
| TupleOnly: f&TupleOnlyFlag != 0, |
| } |
| } |
| |
| // FlagCounter counts how many references each flag combination has. |
| type FlagCounter struct { |
| // refs stores the count for each possible flag combination, (0 though |
| // FlagMask). |
| refs [nextFlag]int |
| } |
| |
| // AddRef increases the reference count for a specific flag combination. |
| func (c *FlagCounter) AddRef(flags BitFlags) { |
| c.refs[flags]++ |
| } |
| |
| // DropRef decreases the reference count for a specific flag combination. |
| func (c *FlagCounter) DropRef(flags BitFlags) { |
| c.refs[flags]-- |
| } |
| |
| // TotalRefs calculates the total number of references for all flag |
| // combinations. |
| func (c FlagCounter) TotalRefs() int { |
| var total int |
| for _, r := range c.refs { |
| total += r |
| } |
| return total |
| } |
| |
| // FlagRefs returns the number of references with all specified flags. |
| func (c FlagCounter) FlagRefs(flags BitFlags) int { |
| var total int |
| for i, r := range c.refs { |
| if BitFlags(i)&flags == flags { |
| total += r |
| } |
| } |
| return total |
| } |
| |
| // AllRefsHave returns if all references have all specified flags. |
| func (c FlagCounter) AllRefsHave(flags BitFlags) bool { |
| for i, r := range c.refs { |
| if BitFlags(i)&flags != flags && r > 0 { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // IntersectionRefs returns the set of flags shared by all references. |
| func (c FlagCounter) IntersectionRefs() BitFlags { |
| intersection := FlagMask |
| for i, r := range c.refs { |
| if r > 0 { |
| intersection &= BitFlags(i) |
| } |
| } |
| return intersection |
| } |
| |
| type destination struct { |
| addr tcpip.Address |
| port uint16 |
| } |
| |
| func makeDestination(a tcpip.FullAddress) destination { |
| return destination{ |
| a.Addr, |
| a.Port, |
| } |
| } |
| |
| // portNode is never empty. When it has no elements, it is removed from the |
| // map that references it. |
| type portNode map[destination]FlagCounter |
| |
| // intersectionRefs calculates the intersection of flag bit values which affect |
| // the specified destination. |
| // |
| // If no destinations are present, all flag values are returned as there are no |
| // entries to limit possible flag values of a new entry. |
| // |
| // In addition to the intersection, the number of intersecting refs is |
| // returned. |
| func (p portNode) intersectionRefs(dst destination) (BitFlags, int) { |
| intersection := FlagMask |
| var count int |
| |
| for d, f := range p { |
| if d == dst { |
| intersection &= f.IntersectionRefs() |
| count++ |
| continue |
| } |
| // Wildcard destinations affect all destinations for TupleOnly. |
| if d.addr == anyIPAddress || dst.addr == anyIPAddress { |
| // Only bitwise and the TupleOnlyFlag. |
| intersection &= ((^TupleOnlyFlag) | f.IntersectionRefs()) |
| count++ |
| } |
| } |
| |
| return intersection, count |
| } |
| |
| // deviceNode is never empty. When it has no elements, it is removed from the |
| // map that references it. |
| type deviceNode map[tcpip.NICID]portNode |
| |
| // isAvailable checks whether binding is possible by device. If not binding to a |
| // device, check against all FlagCounters. If binding to a specific device, check |
| // against the unspecified device and the provided device. |
| // |
| // If either of the port reuse flags is enabled on any of the nodes, all nodes |
| // sharing a port must share at least one reuse flag. This matches Linux's |
| // behavior. |
| func (d deviceNode) isAvailable(flags Flags, bindToDevice tcpip.NICID, dst destination) bool { |
| flagBits := flags.Bits() |
| if bindToDevice == 0 { |
| intersection := FlagMask |
| for _, p := range d { |
| i, c := p.intersectionRefs(dst) |
| if c == 0 { |
| continue |
| } |
| intersection &= i |
| if intersection&flagBits == 0 { |
| // Can't bind because the (addr,port) was |
| // previously bound without reuse. |
| return false |
| } |
| } |
| return true |
| } |
| |
| intersection := FlagMask |
| |
| if p, ok := d[0]; ok { |
| var c int |
| intersection, c = p.intersectionRefs(dst) |
| if c > 0 && intersection&flagBits == 0 { |
| return false |
| } |
| } |
| |
| if p, ok := d[bindToDevice]; ok { |
| i, c := p.intersectionRefs(dst) |
| intersection &= i |
| if c > 0 && intersection&flagBits == 0 { |
| return false |
| } |
| } |
| |
| return true |
| } |
| |
| // bindAddresses is a set of IP addresses. |
| type bindAddresses map[tcpip.Address]deviceNode |
| |
| // isAvailable checks whether an IP address is available to bind to. If the |
| // address is the "any" address, check all other addresses. Otherwise, just |
| // check against the "any" address and the provided address. |
| func (b bindAddresses) isAvailable(addr tcpip.Address, flags Flags, bindToDevice tcpip.NICID, dst destination) bool { |
| if addr == anyIPAddress { |
| // If binding to the "any" address then check that there are no conflicts |
| // with all addresses. |
| for _, d := range b { |
| if !d.isAvailable(flags, bindToDevice, dst) { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // Check that there is no conflict with the "any" address. |
| if d, ok := b[anyIPAddress]; ok { |
| if !d.isAvailable(flags, bindToDevice, dst) { |
| return false |
| } |
| } |
| |
| // Check that this is no conflict with the provided address. |
| if d, ok := b[addr]; ok { |
| if !d.isAvailable(flags, bindToDevice, dst) { |
| return false |
| } |
| } |
| |
| return true |
| } |
| |
| // NewPortManager creates new PortManager. |
| func NewPortManager() *PortManager { |
| return &PortManager{allocatedPorts: make(map[portDescriptor]bindAddresses)} |
| } |
| |
| // PickEphemeralPort randomly chooses a starting point and iterates over all |
| // possible ephemeral ports, allowing the caller to decide whether a given port |
| // is suitable for its needs, and stopping when a port is found or an error |
| // occurs. |
| func (s *PortManager) PickEphemeralPort(testPort func(p uint16) (bool, *tcpip.Error)) (port uint16, err *tcpip.Error) { |
| offset := uint32(rand.Int31n(numEphemeralPorts)) |
| return s.pickEphemeralPort(offset, numEphemeralPorts, testPort) |
| } |
| |
| // portHint atomically reads and returns the s.hint value. |
| func (s *PortManager) portHint() uint32 { |
| return atomic.LoadUint32(&s.hint) |
| } |
| |
| // incPortHint atomically increments s.hint by 1. |
| func (s *PortManager) incPortHint() { |
| atomic.AddUint32(&s.hint, 1) |
| } |
| |
| // PickEphemeralPortStable starts at the specified offset + s.portHint and |
| // iterates over all ephemeral ports, allowing the caller to decide whether a |
| // given port is suitable for its needs and stopping when a port is found or an |
| // error occurs. |
| func (s *PortManager) PickEphemeralPortStable(offset uint32, testPort func(p uint16) (bool, *tcpip.Error)) (port uint16, err *tcpip.Error) { |
| p, err := s.pickEphemeralPort(s.portHint()+offset, numEphemeralPorts, testPort) |
| if err == nil { |
| s.incPortHint() |
| } |
| return p, err |
| |
| } |
| |
| // pickEphemeralPort starts at the offset specified from the FirstEphemeral port |
| // and iterates over the number of ports specified by count and allows the |
| // caller to decide whether a given port is suitable for its needs, and stopping |
| // when a port is found or an error occurs. |
| func (s *PortManager) pickEphemeralPort(offset, count uint32, testPort func(p uint16) (bool, *tcpip.Error)) (port uint16, err *tcpip.Error) { |
| for i := uint32(0); i < count; i++ { |
| port = uint16(FirstEphemeral + (offset+i)%count) |
| ok, err := testPort(port) |
| if err != nil { |
| return 0, err |
| } |
| |
| if ok { |
| return port, nil |
| } |
| } |
| |
| return 0, tcpip.ErrNoPortAvailable |
| } |
| |
| // IsPortAvailable tests if the given port is available on all given protocols. |
| func (s *PortManager) IsPortAvailable(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress) bool { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| return s.isPortAvailableLocked(networks, transport, addr, port, flags, bindToDevice, makeDestination(dest)) |
| } |
| |
| func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dst destination) bool { |
| for _, network := range networks { |
| desc := portDescriptor{network, transport, port} |
| if addrs, ok := s.allocatedPorts[desc]; ok { |
| if !addrs.isAvailable(addr, flags, bindToDevice, dst) { |
| return false |
| } |
| } |
| } |
| return true |
| } |
| |
| // ReservePort marks a port/IP combination as reserved so that it cannot be |
| // reserved by another endpoint. If port is zero, ReservePort will search for |
| // an unreserved ephemeral port and reserve it, returning its value in the |
| // "port" return value. |
| // |
| // An optional testPort closure can be passed in which if provided will be used |
| // to test if the picked port can be used. The function should return true if |
| // the port is safe to use, false otherwise. |
| func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress, testPort func(port uint16) bool) (reservedPort uint16, err *tcpip.Error) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| dst := makeDestination(dest) |
| |
| // If a port is specified, just try to reserve it for all network |
| // protocols. |
| if port != 0 { |
| if !s.reserveSpecificPort(networks, transport, addr, port, flags, bindToDevice, dst) { |
| return 0, tcpip.ErrPortInUse |
| } |
| if testPort != nil && !testPort(port) { |
| s.releasePortLocked(networks, transport, addr, port, flags.Bits(), bindToDevice, dst) |
| return 0, tcpip.ErrPortInUse |
| } |
| return port, nil |
| } |
| |
| // A port wasn't specified, so try to find one. |
| return s.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) { |
| if !s.reserveSpecificPort(networks, transport, addr, p, flags, bindToDevice, dst) { |
| return false, nil |
| } |
| if testPort != nil && !testPort(p) { |
| s.releasePortLocked(networks, transport, addr, p, flags.Bits(), bindToDevice, dst) |
| return false, nil |
| } |
| return true, nil |
| }) |
| } |
| |
| // reserveSpecificPort tries to reserve the given port on all given protocols. |
| func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dst destination) bool { |
| if !s.isPortAvailableLocked(networks, transport, addr, port, flags, bindToDevice, dst) { |
| return false |
| } |
| |
| flagBits := flags.Bits() |
| |
| // Reserve port on all network protocols. |
| for _, network := range networks { |
| desc := portDescriptor{network, transport, port} |
| m, ok := s.allocatedPorts[desc] |
| if !ok { |
| m = make(bindAddresses) |
| s.allocatedPorts[desc] = m |
| } |
| d, ok := m[addr] |
| if !ok { |
| d = make(deviceNode) |
| m[addr] = d |
| } |
| p := d[bindToDevice] |
| if p == nil { |
| p = make(portNode) |
| } |
| n := p[dst] |
| n.AddRef(flagBits) |
| p[dst] = n |
| d[bindToDevice] = p |
| } |
| |
| return true |
| } |
| |
| // ReserveTuple adds a port reservation for the tuple on all given protocol. |
| func (s *PortManager) ReserveTuple(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress) bool { |
| flagBits := flags.Bits() |
| dst := makeDestination(dest) |
| |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| // It is easier to undo the entire reservation, so if we find that the |
| // tuple can't be fully added, finish and undo the whole thing. |
| undo := false |
| |
| // Reserve port on all network protocols. |
| for _, network := range networks { |
| desc := portDescriptor{network, transport, port} |
| m, ok := s.allocatedPorts[desc] |
| if !ok { |
| m = make(bindAddresses) |
| s.allocatedPorts[desc] = m |
| } |
| d, ok := m[addr] |
| if !ok { |
| d = make(deviceNode) |
| m[addr] = d |
| } |
| p := d[bindToDevice] |
| if p == nil { |
| p = make(portNode) |
| } |
| |
| n := p[dst] |
| if n.TotalRefs() != 0 && n.IntersectionRefs()&flagBits == 0 { |
| // Tuple already exists. |
| undo = true |
| } |
| n.AddRef(flagBits) |
| p[dst] = n |
| d[bindToDevice] = p |
| } |
| |
| if undo { |
| // releasePortLocked decrements the counts (rather than setting |
| // them to zero), so it will undo the incorrect incrementing |
| // above. |
| s.releasePortLocked(networks, transport, addr, port, flagBits, bindToDevice, dst) |
| return false |
| } |
| |
| return true |
| } |
| |
| // ReleasePort releases the reservation on a port/IP combination so that it can |
| // be reserved by other endpoints. |
| func (s *PortManager) ReleasePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| |
| s.releasePortLocked(networks, transport, addr, port, flags.Bits(), bindToDevice, makeDestination(dest)) |
| } |
| |
| func (s *PortManager) releasePortLocked(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags BitFlags, bindToDevice tcpip.NICID, dst destination) { |
| for _, network := range networks { |
| desc := portDescriptor{network, transport, port} |
| if m, ok := s.allocatedPorts[desc]; ok { |
| d, ok := m[addr] |
| if !ok { |
| continue |
| } |
| p, ok := d[bindToDevice] |
| if !ok { |
| continue |
| } |
| n, ok := p[dst] |
| if !ok { |
| continue |
| } |
| n.DropRef(flags) |
| if n.TotalRefs() > 0 { |
| p[dst] = n |
| continue |
| } |
| delete(p, dst) |
| if len(p) > 0 { |
| continue |
| } |
| delete(d, bindToDevice) |
| if len(d) > 0 { |
| continue |
| } |
| delete(m, addr) |
| if len(m) > 0 { |
| continue |
| } |
| delete(s.allocatedPorts, desc) |
| } |
| } |
| } |