| // Copyright 2018 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // +build linux |
| |
| // Package fdbased provides the implemention of data-link layer endpoints |
| // backed by boundary-preserving file descriptors (e.g., TUN devices, |
| // seqpacket/datagram sockets). |
| // |
| // FD based endpoints can be used in the networking stack by calling New() to |
| // create a new endpoint, and then passing it as an argument to |
| // Stack.CreateNIC(). |
| package fdbased |
| |
| import ( |
| "syscall" |
| |
| "github.com/google/netstack/tcpip" |
| "github.com/google/netstack/tcpip/buffer" |
| "github.com/google/netstack/tcpip/header" |
| "github.com/google/netstack/tcpip/link/rawfile" |
| "github.com/google/netstack/tcpip/stack" |
| ) |
| |
| // BufConfig defines the shape of the vectorised view used to read packets from the NIC. |
| var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768} |
| |
| type endpoint struct { |
| // fd is the file descriptor used to send and receive packets. |
| fd int |
| |
| // mtu (maximum transmission unit) is the maximum size of a packet. |
| mtu uint32 |
| |
| // hdrSize specifies the link-layer header size. If set to 0, no header |
| // is added/removed; otherwise an ethernet header is used. |
| hdrSize int |
| |
| // addr is the address of the endpoint. |
| addr tcpip.LinkAddress |
| |
| // caps holds the endpoint capabilities. |
| caps stack.LinkEndpointCapabilities |
| |
| // closed is a function to be called when the FD's peer (if any) closes |
| // its end of the communication pipe. |
| closed func(*tcpip.Error) |
| |
| iovecs []syscall.Iovec |
| views []buffer.View |
| dispatcher stack.NetworkDispatcher |
| |
| // handleLocal indicates whether packets destined to itself should be |
| // handled by the netstack internally (true) or be forwarded to the FD |
| // endpoint (false). |
| handleLocal bool |
| } |
| |
| // Options specify the details about the fd-based endpoint to be created. |
| type Options struct { |
| FD int |
| MTU uint32 |
| EthernetHeader bool |
| ChecksumOffload bool |
| ClosedFunc func(*tcpip.Error) |
| Address tcpip.LinkAddress |
| SaveRestore bool |
| DisconnectOk bool |
| HandleLocal bool |
| } |
| |
| // New creates a new fd-based endpoint. |
| // |
| // Makes fd non-blocking, but does not take ownership of fd, which must remain |
| // open for the lifetime of the returned endpoint. |
| func New(opts *Options) tcpip.LinkEndpointID { |
| syscall.SetNonblock(opts.FD, true) |
| |
| caps := stack.LinkEndpointCapabilities(0) |
| if opts.ChecksumOffload { |
| caps |= stack.CapabilityChecksumOffload |
| } |
| |
| hdrSize := 0 |
| if opts.EthernetHeader { |
| hdrSize = header.EthernetMinimumSize |
| caps |= stack.CapabilityResolutionRequired |
| } |
| |
| if opts.SaveRestore { |
| caps |= stack.CapabilitySaveRestore |
| } |
| |
| if opts.DisconnectOk { |
| caps |= stack.CapabilityDisconnectOk |
| } |
| |
| e := &endpoint{ |
| fd: opts.FD, |
| mtu: opts.MTU, |
| caps: caps, |
| closed: opts.ClosedFunc, |
| addr: opts.Address, |
| hdrSize: hdrSize, |
| views: make([]buffer.View, len(BufConfig)), |
| iovecs: make([]syscall.Iovec, len(BufConfig)), |
| handleLocal: opts.HandleLocal, |
| } |
| return stack.RegisterLinkEndpoint(e) |
| } |
| |
| // Attach launches the goroutine that reads packets from the file descriptor and |
| // dispatches them via the provided dispatcher. |
| func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { |
| e.dispatcher = dispatcher |
| // Link endpoints are not savable. When transportation endpoints are |
| // saved, they stop sending outgoing packets and all incoming packets |
| // are rejected. |
| go e.dispatchLoop() |
| } |
| |
| // IsAttached implements stack.LinkEndpoint.IsAttached. |
| func (e *endpoint) IsAttached() bool { |
| return e.dispatcher != nil |
| } |
| |
| // MTU implements stack.LinkEndpoint.MTU. It returns the value initialized |
| // during construction. |
| func (e *endpoint) MTU() uint32 { |
| return e.mtu |
| } |
| |
| // Capabilities implements stack.LinkEndpoint.Capabilities. |
| func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities { |
| return e.caps |
| } |
| |
| // MaxHeaderLength returns the maximum size of the link-layer header. |
| func (e *endpoint) MaxHeaderLength() uint16 { |
| return uint16(e.hdrSize) |
| } |
| |
| // LinkAddress returns the link address of this endpoint. |
| func (e *endpoint) LinkAddress() tcpip.LinkAddress { |
| return e.addr |
| } |
| |
| // WritePacket writes outbound packets to the file descriptor. If it is not |
| // currently writable, the packet is dropped. |
| func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { |
| if e.handleLocal && r.LocalAddress != "" && r.LocalAddress == r.RemoteAddress { |
| views := make([]buffer.View, 1, 1+len(payload.Views())) |
| views[0] = hdr.View() |
| views = append(views, payload.Views()...) |
| vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views) |
| e.dispatcher.DeliverNetworkPacket(e, r.RemoteLinkAddress, r.LocalLinkAddress, protocol, vv) |
| return nil |
| } |
| if e.hdrSize > 0 { |
| // Add ethernet header if needed. |
| eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize)) |
| ethHdr := &header.EthernetFields{ |
| DstAddr: r.RemoteLinkAddress, |
| Type: protocol, |
| } |
| |
| // Preserve the src address if it's set in the route. |
| if r.LocalLinkAddress != "" { |
| ethHdr.SrcAddr = r.LocalLinkAddress |
| } else { |
| ethHdr.SrcAddr = e.addr |
| } |
| eth.Encode(ethHdr) |
| } |
| |
| if payload.Size() == 0 { |
| return rawfile.NonBlockingWrite(e.fd, hdr.View()) |
| } |
| |
| return rawfile.NonBlockingWrite2(e.fd, hdr.View(), payload.ToView()) |
| } |
| |
| func (e *endpoint) capViews(n int, buffers []int) int { |
| c := 0 |
| for i, s := range buffers { |
| c += s |
| if c >= n { |
| e.views[i].CapLength(s - (c - n)) |
| return i + 1 |
| } |
| } |
| return len(buffers) |
| } |
| |
| func (e *endpoint) allocateViews(bufConfig []int) { |
| for i, v := range e.views { |
| if v != nil { |
| break |
| } |
| b := buffer.NewView(bufConfig[i]) |
| e.views[i] = b |
| e.iovecs[i] = syscall.Iovec{ |
| Base: &b[0], |
| Len: uint64(len(b)), |
| } |
| } |
| } |
| |
| // dispatch reads one packet from the file descriptor and dispatches it. |
| func (e *endpoint) dispatch(largeV buffer.View) (bool, *tcpip.Error) { |
| e.allocateViews(BufConfig) |
| |
| n, err := rawfile.BlockingReadv(e.fd, e.iovecs) |
| if err != nil { |
| return false, err |
| } |
| |
| if n <= e.hdrSize { |
| return false, nil |
| } |
| |
| var ( |
| p tcpip.NetworkProtocolNumber |
| remoteLinkAddr, localLinkAddr tcpip.LinkAddress |
| ) |
| if e.hdrSize > 0 { |
| eth := header.Ethernet(e.views[0]) |
| p = eth.Type() |
| remoteLinkAddr = eth.SourceAddress() |
| localLinkAddr = eth.DestinationAddress() |
| } else { |
| // We don't get any indication of what the packet is, so try to guess |
| // if it's an IPv4 or IPv6 packet. |
| switch header.IPVersion(e.views[0]) { |
| case header.IPv4Version: |
| p = header.IPv4ProtocolNumber |
| case header.IPv6Version: |
| p = header.IPv6ProtocolNumber |
| default: |
| return true, nil |
| } |
| } |
| |
| used := e.capViews(n, BufConfig) |
| vv := buffer.NewVectorisedView(n, e.views[:used]) |
| vv.TrimFront(e.hdrSize) |
| |
| e.dispatcher.DeliverNetworkPacket(e, remoteLinkAddr, localLinkAddr, p, vv) |
| |
| // Prepare e.views for another packet: release used views. |
| for i := 0; i < used; i++ { |
| e.views[i] = nil |
| } |
| |
| return true, nil |
| } |
| |
| // dispatchLoop reads packets from the file descriptor in a loop and dispatches |
| // them to the network stack. |
| func (e *endpoint) dispatchLoop() *tcpip.Error { |
| v := buffer.NewView(header.MaxIPPacketSize) |
| for { |
| cont, err := e.dispatch(v) |
| if err != nil || !cont { |
| if e.closed != nil { |
| e.closed(err) |
| } |
| return err |
| } |
| } |
| } |
| |
| // InjectableEndpoint is an injectable fd-based endpoint. The endpoint writes |
| // to the FD, but does not read from it. All reads come from injected packets. |
| type InjectableEndpoint struct { |
| endpoint |
| |
| dispatcher stack.NetworkDispatcher |
| } |
| |
| // Attach saves the stack network-layer dispatcher for use later when packets |
| // are injected. |
| func (e *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) { |
| e.dispatcher = dispatcher |
| } |
| |
| // Inject injects an inbound packet. |
| func (e *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { |
| e.dispatcher.DeliverNetworkPacket(e, "" /* remoteLinkAddr */, "" /* localLinkAddr */, protocol, vv) |
| } |
| |
| // NewInjectable creates a new fd-based InjectableEndpoint. |
| func NewInjectable(fd int, mtu uint32) (tcpip.LinkEndpointID, *InjectableEndpoint) { |
| syscall.SetNonblock(fd, true) |
| |
| e := &InjectableEndpoint{endpoint: endpoint{ |
| fd: fd, |
| mtu: mtu, |
| }} |
| |
| return stack.RegisterLinkEndpoint(e), e |
| } |