blob: d77ed1c3a524115fc4b135ad16bf954b8a9dbf4a [file] [log] [blame]
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stack
import (
"fmt"
"strings"
"sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
)
// A Hook specifies one of the hooks built into the network stack.
//
// Userspace app Userspace app
// ^ |
// | v
// [Input] [Output]
// ^ |
// | v
// | routing
// | |
// | v
// ----->[Prerouting]----->routing----->[Forward]---------[Postrouting]----->
type Hook uint
const (
// Prerouting happens before a packet is routed to applications or to
// be forwarded.
Prerouting Hook = iota
// Input happens before a packet reaches an application.
Input
// Forward happens once it's decided that a packet should be forwarded
// to another host.
Forward
// Output happens after a packet is written by an application to be
// sent out.
Output
// Postrouting happens just before a packet goes out on the wire.
Postrouting
// NumHooks is the total number of hooks.
NumHooks
)
// A RuleVerdict is what a rule decides should be done with a packet.
type RuleVerdict int
const (
// RuleAccept indicates the packet should continue through netstack.
RuleAccept RuleVerdict = iota
// RuleDrop indicates the packet should be dropped.
RuleDrop
// RuleJump indicates the packet should jump to another chain.
RuleJump
// RuleReturn indicates the packet should return to the previous chain.
RuleReturn
)
// IPTables holds all the tables for a netstack.
//
// +stateify savable
type IPTables struct {
connections ConnTrack
reaper tcpip.Timer
mu sync.RWMutex
// v4Tables and v6tables map tableIDs to tables. They hold builtin
// tables only, not user tables.
//
// mu protects the array of tables, but not the tables themselves.
// +checklocks:mu
v4Tables [NumTables]Table
//
// mu protects the array of tables, but not the tables themselves.
// +checklocks:mu
v6Tables [NumTables]Table
// modified is whether tables have been modified at least once. It is
// used to elide the iptables performance overhead for workloads that
// don't utilize iptables.
//
// +checklocks:mu
modified bool
}
// VisitTargets traverses all the targets of all tables and replaces each with
// transform(target).
func (it *IPTables) VisitTargets(transform func(Target) Target) {
it.mu.Lock()
defer it.mu.Unlock()
for tid := range it.v4Tables {
for i, rule := range it.v4Tables[tid].Rules {
it.v4Tables[tid].Rules[i].Target = transform(rule.Target)
}
}
for tid := range it.v6Tables {
for i, rule := range it.v6Tables[tid].Rules {
it.v6Tables[tid].Rules[i].Target = transform(rule.Target)
}
}
}
// A Table defines a set of chains and hooks into the network stack.
//
// It is a list of Rules, entry points (BuiltinChains), and error handlers
// (Underflows). As packets traverse netstack, they hit hooks. When a packet
// hits a hook, iptables compares it to Rules starting from that hook's entry
// point. So if a packet hits the Input hook, we look up the corresponding
// entry point in BuiltinChains and jump to that point.
//
// If the Rule doesn't match the packet, iptables continues to the next Rule.
// If a Rule does match, it can issue a verdict on the packet (e.g. RuleAccept
// or RuleDrop) that causes the packet to stop traversing iptables. It can also
// jump to other rules or perform custom actions based on Rule.Target.
//
// Underflow Rules are invoked when a chain returns without reaching a verdict.
//
// +stateify savable
type Table struct {
// Rules holds the rules that make up the table.
Rules []Rule
// BuiltinChains maps builtin chains to their entrypoint rule in Rules.
BuiltinChains [NumHooks]int
// Underflows maps builtin chains to their underflow rule in Rules
// (i.e. the rule to execute if the chain returns without a verdict).
Underflows [NumHooks]int
}
// ValidHooks returns a bitmap of the builtin hooks for the given table.
func (table *Table) ValidHooks() uint32 {
hooks := uint32(0)
for hook, ruleIdx := range table.BuiltinChains {
if ruleIdx != HookUnset {
hooks |= 1 << hook
}
}
return hooks
}
// A Rule is a packet processing rule. It consists of two pieces. First it
// contains zero or more matchers, each of which is a specification of which
// packets this rule applies to. If there are no matchers in the rule, it
// applies to any packet.
//
// +stateify savable
type Rule struct {
// Filter holds basic IP filtering fields common to every rule.
Filter IPHeaderFilter
// Matchers is the list of matchers for this rule.
Matchers []Matcher
// Target is the action to invoke if all the matchers match the packet.
Target Target
}
// IPHeaderFilter performs basic IP header matching common to every rule.
//
// +stateify savable
type IPHeaderFilter struct {
// Protocol matches the transport protocol.
Protocol tcpip.TransportProtocolNumber
// CheckProtocol determines whether the Protocol field should be
// checked during matching.
CheckProtocol bool
// Dst matches the destination IP address.
Dst tcpip.Address
// DstMask masks bits of the destination IP address when comparing with
// Dst.
DstMask tcpip.Address
// DstInvert inverts the meaning of the destination IP check, i.e. when
// true the filter will match packets that fail the destination
// comparison.
DstInvert bool
// Src matches the source IP address.
Src tcpip.Address
// SrcMask masks bits of the source IP address when comparing with Src.
SrcMask tcpip.Address
// SrcInvert inverts the meaning of the source IP check, i.e. when true the
// filter will match packets that fail the source comparison.
SrcInvert bool
// InputInterface matches the name of the incoming interface for the packet.
InputInterface string
// InputInterfaceMask masks the characters of the interface name when
// comparing with InputInterface.
InputInterfaceMask string
// InputInterfaceInvert inverts the meaning of incoming interface check,
// i.e. when true the filter will match packets that fail the incoming
// interface comparison.
InputInterfaceInvert bool
// OutputInterface matches the name of the outgoing interface for the packet.
OutputInterface string
// OutputInterfaceMask masks the characters of the interface name when
// comparing with OutputInterface.
OutputInterfaceMask string
// OutputInterfaceInvert inverts the meaning of outgoing interface check,
// i.e. when true the filter will match packets that fail the outgoing
// interface comparison.
OutputInterfaceInvert bool
}
// match returns whether pkt matches the filter.
//
// Preconditions: pkt.NetworkHeader is set and is at least of the minimal IPv4
// or IPv6 header length.
func (fl IPHeaderFilter) match(pkt *PacketBuffer, hook Hook, inNicName, outNicName string) bool {
// Extract header fields.
var (
transProto tcpip.TransportProtocolNumber
dstAddr tcpip.Address
srcAddr tcpip.Address
)
switch proto := pkt.NetworkProtocolNumber; proto {
case header.IPv4ProtocolNumber:
hdr := header.IPv4(pkt.NetworkHeader().View())
transProto = hdr.TransportProtocol()
dstAddr = hdr.DestinationAddress()
srcAddr = hdr.SourceAddress()
case header.IPv6ProtocolNumber:
hdr := header.IPv6(pkt.NetworkHeader().View())
transProto = hdr.TransportProtocol()
dstAddr = hdr.DestinationAddress()
srcAddr = hdr.SourceAddress()
default:
panic(fmt.Sprintf("unknown network protocol with EtherType: %d", proto))
}
// Check the transport protocol.
if fl.CheckProtocol && fl.Protocol != transProto {
return false
}
// Check the addresses.
if !filterAddress(dstAddr, fl.DstMask, fl.Dst, fl.DstInvert) ||
!filterAddress(srcAddr, fl.SrcMask, fl.Src, fl.SrcInvert) {
return false
}
switch hook {
case Prerouting, Input:
return matchIfName(inNicName, fl.InputInterface, fl.InputInterfaceInvert)
case Output:
return matchIfName(outNicName, fl.OutputInterface, fl.OutputInterfaceInvert)
case Forward:
if !matchIfName(inNicName, fl.InputInterface, fl.InputInterfaceInvert) {
return false
}
if !matchIfName(outNicName, fl.OutputInterface, fl.OutputInterfaceInvert) {
return false
}
return true
case Postrouting:
return true
default:
panic(fmt.Sprintf("unknown hook: %d", hook))
}
}
func matchIfName(nicName string, ifName string, invert bool) bool {
n := len(ifName)
if n == 0 {
// If the interface name is omitted in the filter, any interface will match.
return true
}
// If the interface name ends with '+', any interface which begins with the
// name should be matched.
var matches bool
if strings.HasSuffix(ifName, "+") {
matches = strings.HasPrefix(nicName, ifName[:n-1])
} else {
matches = nicName == ifName
}
return matches != invert
}
// NetworkProtocol returns the protocol (IPv4 or IPv6) on to which the header
// applies.
func (fl IPHeaderFilter) NetworkProtocol() tcpip.NetworkProtocolNumber {
switch len(fl.Src) {
case header.IPv4AddressSize:
return header.IPv4ProtocolNumber
case header.IPv6AddressSize:
return header.IPv6ProtocolNumber
}
panic(fmt.Sprintf("invalid address in IPHeaderFilter: %s", fl.Src))
}
// filterAddress returns whether addr matches the filter.
func filterAddress(addr, mask, filterAddr tcpip.Address, invert bool) bool {
matches := true
for i := range filterAddr {
if addr[i]&mask[i] != filterAddr[i] {
matches = false
break
}
}
return matches != invert
}
// A Matcher is the interface for matching packets.
type Matcher interface {
// Match returns whether the packet matches and whether the packet
// should be "hotdropped", i.e. dropped immediately. This is usually
// used for suspicious packets.
//
// Precondition: packet.NetworkHeader is set.
Match(hook Hook, packet *PacketBuffer, inputInterfaceName, outputInterfaceName string) (matches bool, hotdrop bool)
}
// A Target is the interface for taking an action for a packet.
type Target interface {
// Action takes an action on the packet and returns a verdict on how
// traversal should (or should not) continue. If the return value is
// Jump, it also returns the index of the rule to jump to.
Action(*PacketBuffer, Hook, *Route, AddressableEndpoint) (RuleVerdict, int)
}