blob: 52890f6eb65a8cafdd40894a05b72f5bba5c71f8 [file] [log] [blame]
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stack
import (
"fmt"
"time"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
)
// TableID identifies a specific table.
type TableID int
// Each value identifies a specific table.
const (
NATID TableID = iota
MangleID
FilterID
NumTables
)
// HookUnset indicates that there is no hook set for an entrypoint or
// underflow.
const HookUnset = -1
// reaperDelay is how long to wait before starting to reap connections.
const reaperDelay = 5 * time.Second
// DefaultTables returns a default set of tables. Each chain is set to accept
// all packets.
func DefaultTables() *IPTables {
return &IPTables{
v4Tables: [NumTables]Table{
NATID: {
Rules: []Rule{
{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
{Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
},
BuiltinChains: [NumHooks]int{
Prerouting: 0,
Input: 1,
Forward: HookUnset,
Output: 2,
Postrouting: 3,
},
Underflows: [NumHooks]int{
Prerouting: 0,
Input: 1,
Forward: HookUnset,
Output: 2,
Postrouting: 3,
},
},
MangleID: {
Rules: []Rule{
{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
{Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
},
BuiltinChains: [NumHooks]int{
Prerouting: 0,
Output: 1,
},
Underflows: [NumHooks]int{
Prerouting: 0,
Input: HookUnset,
Forward: HookUnset,
Output: 1,
Postrouting: HookUnset,
},
},
FilterID: {
Rules: []Rule{
{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
{Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
},
BuiltinChains: [NumHooks]int{
Prerouting: HookUnset,
Input: 0,
Forward: 1,
Output: 2,
Postrouting: HookUnset,
},
Underflows: [NumHooks]int{
Prerouting: HookUnset,
Input: 0,
Forward: 1,
Output: 2,
Postrouting: HookUnset,
},
},
},
v6Tables: [NumTables]Table{
NATID: {
Rules: []Rule{
{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
{Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
},
BuiltinChains: [NumHooks]int{
Prerouting: 0,
Input: 1,
Forward: HookUnset,
Output: 2,
Postrouting: 3,
},
Underflows: [NumHooks]int{
Prerouting: 0,
Input: 1,
Forward: HookUnset,
Output: 2,
Postrouting: 3,
},
},
MangleID: {
Rules: []Rule{
{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
{Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
},
BuiltinChains: [NumHooks]int{
Prerouting: 0,
Output: 1,
},
Underflows: [NumHooks]int{
Prerouting: 0,
Input: HookUnset,
Forward: HookUnset,
Output: 1,
Postrouting: HookUnset,
},
},
FilterID: {
Rules: []Rule{
{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
{Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
},
BuiltinChains: [NumHooks]int{
Prerouting: HookUnset,
Input: 0,
Forward: 1,
Output: 2,
Postrouting: HookUnset,
},
Underflows: [NumHooks]int{
Prerouting: HookUnset,
Input: 0,
Forward: 1,
Output: 2,
Postrouting: HookUnset,
},
},
},
priorities: [NumHooks][]TableID{
Prerouting: {MangleID, NATID},
Input: {NATID, FilterID},
Output: {MangleID, NATID, FilterID},
},
connections: ConnTrack{
seed: generateRandUint32(),
},
reaperDone: make(chan struct{}, 1),
}
}
// EmptyFilterTable returns a Table with no rules and the filter table chains
// mapped to HookUnset.
func EmptyFilterTable() Table {
return Table{
Rules: []Rule{},
BuiltinChains: [NumHooks]int{
Prerouting: HookUnset,
Postrouting: HookUnset,
},
Underflows: [NumHooks]int{
Prerouting: HookUnset,
Postrouting: HookUnset,
},
}
}
// EmptyNATTable returns a Table with no rules and the filter table chains
// mapped to HookUnset.
func EmptyNATTable() Table {
return Table{
Rules: []Rule{},
BuiltinChains: [NumHooks]int{
Forward: HookUnset,
},
Underflows: [NumHooks]int{
Forward: HookUnset,
},
}
}
// GetTable returns a table with the given id and IP version. It panics when an
// invalid id is provided.
func (it *IPTables) GetTable(id TableID, ipv6 bool) Table {
it.mu.RLock()
defer it.mu.RUnlock()
if ipv6 {
return it.v6Tables[id]
}
return it.v4Tables[id]
}
// ReplaceTable replaces or inserts table by name. It panics when an invalid id
// is provided.
func (it *IPTables) ReplaceTable(id TableID, table Table, ipv6 bool) tcpip.Error {
it.mu.Lock()
defer it.mu.Unlock()
// If iptables is being enabled, initialize the conntrack table and
// reaper.
if !it.modified {
it.connections.init()
it.startReaper(reaperDelay)
}
it.modified = true
if ipv6 {
it.v6Tables[id] = table
} else {
it.v4Tables[id] = table
}
return nil
}
// A chainVerdict is what a table decides should be done with a packet.
type chainVerdict int
const (
// chainAccept indicates the packet should continue through netstack.
chainAccept chainVerdict = iota
// chainAccept indicates the packet should be dropped.
chainDrop
// chainReturn indicates the packet should return to the calling chain
// or the underflow rule of a builtin chain.
chainReturn
)
// Check runs pkt through the rules for hook. It returns true when the packet
// should continue traversing the network stack and false when it should be
// dropped.
//
// TODO(gvisor.dev/issue/170): PacketBuffer should hold the GSO and route, from
// which address can be gathered. Currently, address is only needed for
// prerouting.
//
// Precondition: pkt.NetworkHeader is set.
func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, preroutingAddr tcpip.Address, inNicName, outNicName string) bool {
if pkt.NetworkProtocolNumber != header.IPv4ProtocolNumber && pkt.NetworkProtocolNumber != header.IPv6ProtocolNumber {
return true
}
// Many users never configure iptables. Spare them the cost of rule
// traversal if rules have never been set.
it.mu.RLock()
defer it.mu.RUnlock()
if !it.modified {
return true
}
// Packets are manipulated only if connection and matching
// NAT rule exists.
shouldTrack := it.connections.handlePacket(pkt, hook, gso, r)
// Go through each table containing the hook.
priorities := it.priorities[hook]
for _, tableID := range priorities {
// If handlePacket already NATed the packet, we don't need to
// check the NAT table.
if tableID == NATID && pkt.NatDone {
continue
}
var table Table
if pkt.NetworkProtocolNumber == header.IPv6ProtocolNumber {
table = it.v6Tables[tableID]
} else {
table = it.v4Tables[tableID]
}
ruleIdx := table.BuiltinChains[hook]
switch verdict := it.checkChain(hook, pkt, table, ruleIdx, gso, r, preroutingAddr, inNicName, outNicName); verdict {
// If the table returns Accept, move on to the next table.
case chainAccept:
continue
// The Drop verdict is final.
case chainDrop:
return false
case chainReturn:
// Any Return from a built-in chain means we have to
// call the underflow.
underflow := table.Rules[table.Underflows[hook]]
switch v, _ := underflow.Target.Action(pkt, &it.connections, hook, gso, r, preroutingAddr); v {
case RuleAccept:
continue
case RuleDrop:
return false
case RuleJump, RuleReturn:
panic("Underflows should only return RuleAccept or RuleDrop.")
default:
panic(fmt.Sprintf("Unknown verdict: %d", v))
}
default:
panic(fmt.Sprintf("Unknown verdict %v.", verdict))
}
}
// If this connection should be tracked, try to add an entry for it. If
// traversing the nat table didn't end in adding an entry,
// maybeInsertNoop will add a no-op entry for the connection. This is
// needeed when establishing connections so that the SYN/ACK reply to an
// outgoing SYN is delivered to the correct endpoint rather than being
// redirected by a prerouting rule.
//
// From the iptables documentation: "If there is no rule, a `null'
// binding is created: this usually does not map the packet, but exists
// to ensure we don't map another stream over an existing one."
if shouldTrack {
it.connections.maybeInsertNoop(pkt, hook)
}
// Every table returned Accept.
return true
}
// beforeSave is invoked by stateify.
func (it *IPTables) beforeSave() {
// Ensure the reaper exits cleanly.
it.reaperDone <- struct{}{}
// Prevent others from modifying the connection table.
it.connections.mu.Lock()
}
// afterLoad is invoked by stateify.
func (it *IPTables) afterLoad() {
it.startReaper(reaperDelay)
}
// startReaper starts a goroutine that wakes up periodically to reap timed out
// connections.
func (it *IPTables) startReaper(interval time.Duration) {
go func() { // S/R-SAFE: reaperDone is signalled when iptables is saved.
bucket := 0
for {
select {
case <-it.reaperDone:
return
case <-time.After(interval):
bucket, interval = it.connections.reapUnused(bucket, interval)
}
}
}()
}
// CheckPackets runs pkts through the rules for hook and returns a map of packets that
// should not go forward.
//
// Preconditions:
// * pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
// * pkt.NetworkHeader is not nil.
//
// NOTE: unlike the Check API the returned map contains packets that should be
// dropped.
func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r *Route, inNicName, outNicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) {
for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
if !pkt.NatDone {
if ok := it.Check(hook, pkt, gso, r, "", inNicName, outNicName); !ok {
if drop == nil {
drop = make(map[*PacketBuffer]struct{})
}
drop[pkt] = struct{}{}
}
if pkt.NatDone {
if natPkts == nil {
natPkts = make(map[*PacketBuffer]struct{})
}
natPkts[pkt] = struct{}{}
}
}
}
return drop, natPkts
}
// Preconditions:
// * pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
// * pkt.NetworkHeader is not nil.
func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, preroutingAddr tcpip.Address, inNicName, outNicName string) chainVerdict {
// Start from ruleIdx and walk the list of rules until a rule gives us
// a verdict.
for ruleIdx < len(table.Rules) {
switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, gso, r, preroutingAddr, inNicName, outNicName); verdict {
case RuleAccept:
return chainAccept
case RuleDrop:
return chainDrop
case RuleReturn:
return chainReturn
case RuleJump:
// "Jumping" to the next rule just means we're
// continuing on down the list.
if jumpTo == ruleIdx+1 {
ruleIdx++
continue
}
switch verdict := it.checkChain(hook, pkt, table, jumpTo, gso, r, preroutingAddr, inNicName, outNicName); verdict {
case chainAccept:
return chainAccept
case chainDrop:
return chainDrop
case chainReturn:
ruleIdx++
continue
default:
panic(fmt.Sprintf("Unknown verdict: %d", verdict))
}
default:
panic(fmt.Sprintf("Unknown verdict: %d", verdict))
}
}
// We got through the entire table without a decision. Default to DROP
// for safety.
return chainDrop
}
// Preconditions:
// * pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
// * pkt.NetworkHeader is not nil.
func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, preroutingAddr tcpip.Address, inNicName, outNicName string) (RuleVerdict, int) {
rule := table.Rules[ruleIdx]
// Check whether the packet matches the IP header filter.
if !rule.Filter.match(pkt, hook, inNicName, outNicName) {
// Continue on to the next rule.
return RuleJump, ruleIdx + 1
}
// Go through each rule matcher. If they all match, run
// the rule target.
for _, matcher := range rule.Matchers {
matches, hotdrop := matcher.Match(hook, pkt, inNicName, outNicName)
if hotdrop {
return RuleDrop, 0
}
if !matches {
// Continue on to the next rule.
return RuleJump, ruleIdx + 1
}
}
// All the matchers matched, so run the target.
return rule.Target.Action(pkt, &it.connections, hook, gso, r, preroutingAddr)
}
// OriginalDst returns the original destination of redirected connections. It
// returns an error if the connection doesn't exist or isn't redirected.
func (it *IPTables) OriginalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber) (tcpip.Address, uint16, tcpip.Error) {
it.mu.RLock()
defer it.mu.RUnlock()
if !it.modified {
return "", 0, &tcpip.ErrNotConnected{}
}
return it.connections.originalDst(epID, netProto)
}