Casey Callendrello a124fb36e6 bridge: various fixes
* Don't set the MAC, send gratuitous arp instead
* Set the bridge's MAC to itself
* Only disable DAD when necessary
2017-08-28 18:12:49 +02:00

503 lines
14 KiB
Go

// Copyright 2014 CNI authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"encoding/json"
"errors"
"fmt"
"net"
"runtime"
"syscall"
"io/ioutil"
"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/types"
"github.com/containernetworking/cni/pkg/types/current"
"github.com/containernetworking/cni/pkg/version"
"github.com/containernetworking/plugins/pkg/ip"
"github.com/containernetworking/plugins/pkg/ipam"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/containernetworking/plugins/pkg/utils"
"github.com/j-keck/arping"
"github.com/vishvananda/netlink"
)
const defaultBrName = "cni0"
type NetConf struct {
types.NetConf
BrName string `json:"bridge"`
IsGW bool `json:"isGateway"`
IsDefaultGW bool `json:"isDefaultGateway"`
ForceAddress bool `json:"forceAddress"`
IPMasq bool `json:"ipMasq"`
MTU int `json:"mtu"`
HairpinMode bool `json:"hairpinMode"`
PromiscMode bool `json:"promiscMode"`
}
type gwInfo struct {
gws []net.IPNet
family int
defaultRouteFound bool
}
func init() {
// this ensures that main runs only on main thread (thread group leader).
// since namespace ops (unshare, setns) are done for a single thread, we
// must ensure that the goroutine does not jump from OS thread to thread
runtime.LockOSThread()
}
func loadNetConf(bytes []byte) (*NetConf, string, error) {
n := &NetConf{
BrName: defaultBrName,
}
if err := json.Unmarshal(bytes, n); err != nil {
return nil, "", fmt.Errorf("failed to load netconf: %v", err)
}
return n, n.CNIVersion, nil
}
// calcGateways processes the results from the IPAM plugin and does the
// following for each IP family:
// - Calculates and compiles a list of gateway addresses
// - Adds a default route if needed
func calcGateways(result *current.Result, n *NetConf) (*gwInfo, *gwInfo, error) {
gwsV4 := &gwInfo{}
gwsV6 := &gwInfo{}
for _, ipc := range result.IPs {
// Determine if this config is IPv4 or IPv6
var gws *gwInfo
defaultNet := &net.IPNet{}
switch {
case ipc.Address.IP.To4() != nil:
gws = gwsV4
gws.family = netlink.FAMILY_V4
defaultNet.IP = net.IPv4zero
case len(ipc.Address.IP) == net.IPv6len:
gws = gwsV6
gws.family = netlink.FAMILY_V6
defaultNet.IP = net.IPv6zero
default:
return nil, nil, fmt.Errorf("Unknown IP object: %v", ipc)
}
defaultNet.Mask = net.IPMask(defaultNet.IP)
// All IPs currently refer to the container interface
ipc.Interface = current.Int(2)
// If not provided, calculate the gateway address corresponding
// to the selected IP address
if ipc.Gateway == nil && n.IsGW {
ipc.Gateway = calcGatewayIP(&ipc.Address)
}
// Add a default route for this family using the current
// gateway address if necessary.
if n.IsDefaultGW && !gws.defaultRouteFound {
for _, route := range result.Routes {
if route.GW != nil && defaultNet.String() == route.Dst.String() {
gws.defaultRouteFound = true
break
}
}
if !gws.defaultRouteFound {
result.Routes = append(
result.Routes,
&types.Route{Dst: *defaultNet, GW: ipc.Gateway},
)
gws.defaultRouteFound = true
}
}
// Append this gateway address to the list of gateways
if n.IsGW {
gw := net.IPNet{
IP: ipc.Gateway,
Mask: ipc.Address.Mask,
}
gws.gws = append(gws.gws, gw)
}
}
return gwsV4, gwsV6, nil
}
func ensureBridgeAddr(br *netlink.Bridge, family int, ipn *net.IPNet, forceAddress bool) error {
addrs, err := netlink.AddrList(br, family)
if err != nil && err != syscall.ENOENT {
return fmt.Errorf("could not get list of IP addresses: %v", err)
}
ipnStr := ipn.String()
for _, a := range addrs {
// string comp is actually easiest for doing IPNet comps
if a.IPNet.String() == ipnStr {
return nil
}
// Multiple IPv6 addresses are allowed on the bridge if the
// corresponding subnets do not overlap. For IPv4 or for
// overlapping IPv6 subnets, reconfigure the IP address if
// forceAddress is true, otherwise throw an error.
if family == netlink.FAMILY_V4 || a.IPNet.Contains(ipn.IP) || ipn.Contains(a.IPNet.IP) {
if forceAddress {
if err = deleteBridgeAddr(br, a.IPNet); err != nil {
return err
}
} else {
return fmt.Errorf("%q already has an IP address different from %v", br.Name, ipnStr)
}
}
}
addr := &netlink.Addr{IPNet: ipn, Label: ""}
if err := netlink.AddrAdd(br, addr); err != nil {
return fmt.Errorf("could not add IP address to %q: %v", br.Name, err)
}
// Set the bridge's MAC to itself. Otherwise, the bridge will take the
// lowest-numbered mac on the bridge, and will change as ifs churn
if err := netlink.LinkSetHardwareAddr(br, br.HardwareAddr); err != nil {
return fmt.Errorf("could not set bridge's mac: %v", err)
}
return nil
}
func deleteBridgeAddr(br *netlink.Bridge, ipn *net.IPNet) error {
addr := &netlink.Addr{IPNet: ipn, Label: ""}
if err := netlink.AddrDel(br, addr); err != nil {
return fmt.Errorf("could not remove IP address from %q: %v", br.Name, err)
}
return nil
}
func bridgeByName(name string) (*netlink.Bridge, error) {
l, err := netlink.LinkByName(name)
if err != nil {
return nil, fmt.Errorf("could not lookup %q: %v", name, err)
}
br, ok := l.(*netlink.Bridge)
if !ok {
return nil, fmt.Errorf("%q already exists but is not a bridge", name)
}
return br, nil
}
func ensureBridge(brName string, mtu int, promiscMode bool) (*netlink.Bridge, error) {
br := &netlink.Bridge{
LinkAttrs: netlink.LinkAttrs{
Name: brName,
MTU: mtu,
// Let kernel use default txqueuelen; leaving it unset
// means 0, and a zero-length TX queue messes up FIFO
// traffic shapers which use TX queue length as the
// default packet limit
TxQLen: -1,
},
}
err := netlink.LinkAdd(br)
if err != nil && err != syscall.EEXIST {
return nil, fmt.Errorf("could not add %q: %v", brName, err)
}
if promiscMode {
if err := netlink.SetPromiscOn(br); err != nil {
return nil, fmt.Errorf("could not set promiscuous mode on %q: %v", brName, err)
}
}
// Re-fetch link to read all attributes and if it already existed,
// ensure it's really a bridge with similar configuration
br, err = bridgeByName(brName)
if err != nil {
return nil, err
}
if err := netlink.LinkSetUp(br); err != nil {
return nil, err
}
return br, nil
}
func setupVeth(netns ns.NetNS, br *netlink.Bridge, ifName string, mtu int, hairpinMode bool) (*current.Interface, *current.Interface, error) {
contIface := &current.Interface{}
hostIface := &current.Interface{}
err := netns.Do(func(hostNS ns.NetNS) error {
// create the veth pair in the container and move host end into host netns
hostVeth, containerVeth, err := ip.SetupVeth(ifName, mtu, hostNS)
if err != nil {
return err
}
contIface.Name = containerVeth.Name
contIface.Mac = containerVeth.HardwareAddr.String()
contIface.Sandbox = netns.Path()
hostIface.Name = hostVeth.Name
return nil
})
if err != nil {
return nil, nil, err
}
// need to lookup hostVeth again as its index has changed during ns move
hostVeth, err := netlink.LinkByName(hostIface.Name)
if err != nil {
return nil, nil, fmt.Errorf("failed to lookup %q: %v", hostIface.Name, err)
}
hostIface.Mac = hostVeth.Attrs().HardwareAddr.String()
// connect host veth end to the bridge
if err := netlink.LinkSetMaster(hostVeth, br); err != nil {
return nil, nil, fmt.Errorf("failed to connect %q to bridge %v: %v", hostVeth.Attrs().Name, br.Attrs().Name, err)
}
// set hairpin mode
if err = netlink.LinkSetHairpin(hostVeth, hairpinMode); err != nil {
return nil, nil, fmt.Errorf("failed to setup hairpin mode for %v: %v", hostVeth.Attrs().Name, err)
}
return hostIface, contIface, nil
}
func calcGatewayIP(ipn *net.IPNet) net.IP {
nid := ipn.IP.Mask(ipn.Mask)
return ip.NextIP(nid)
}
func setupBridge(n *NetConf) (*netlink.Bridge, *current.Interface, error) {
// create bridge if necessary
br, err := ensureBridge(n.BrName, n.MTU, n.PromiscMode)
if err != nil {
return nil, nil, fmt.Errorf("failed to create bridge %q: %v", n.BrName, err)
}
return br, &current.Interface{
Name: br.Attrs().Name,
Mac: br.Attrs().HardwareAddr.String(),
}, nil
}
// disableIPV6DAD disables IPv6 Duplicate Address Detection (DAD)
// for an interface, if the interface does not support enhanced_dad.
// We do this because interfaces with hairpin mode will see their own DAD packets
func disableIPV6DAD(ifName string) error {
// ehanced_dad sends a nonce with the DAD packets, so that we can safely
// ignore ourselves
enh, err := ioutil.ReadFile(fmt.Sprintf("/proc/sys/net/ipv6/conf/%s/enhanced_dad", ifName))
if err == nil && string(enh) == "1\n" {
return nil
}
f := fmt.Sprintf("/proc/sys/net/ipv6/conf/%s/accept_dad", ifName)
return ioutil.WriteFile(f, []byte("0"), 0644)
}
func enableIPForward(family int) error {
if family == netlink.FAMILY_V4 {
return ip.EnableIP4Forward()
}
return ip.EnableIP6Forward()
}
func cmdAdd(args *skel.CmdArgs) error {
n, cniVersion, err := loadNetConf(args.StdinData)
if err != nil {
return err
}
if n.IsDefaultGW {
n.IsGW = true
}
if n.HairpinMode && n.PromiscMode {
return fmt.Errorf("cannot set hairpin mode and promiscous mode at the same time.")
}
br, brInterface, err := setupBridge(n)
if err != nil {
return err
}
netns, err := ns.GetNS(args.Netns)
if err != nil {
return fmt.Errorf("failed to open netns %q: %v", args.Netns, err)
}
defer netns.Close()
hostInterface, containerInterface, err := setupVeth(netns, br, args.IfName, n.MTU, n.HairpinMode)
if err != nil {
return err
}
// run the IPAM plugin and get back the config to apply
r, err := ipam.ExecAdd(n.IPAM.Type, args.StdinData)
if err != nil {
return err
}
// Convert whatever the IPAM result was into the current Result type
result, err := current.NewResultFromResult(r)
if err != nil {
return err
}
if len(result.IPs) == 0 {
return errors.New("IPAM plugin returned missing IP config")
}
result.Interfaces = []*current.Interface{brInterface, hostInterface, containerInterface}
// Gather gateway information for each IP family
gwsV4, gwsV6, err := calcGateways(result, n)
if err != nil {
return err
}
// Configure the container hardware address and IP address(es)
if err := netns.Do(func(_ ns.NetNS) error {
contVeth, err := net.InterfaceByName(args.IfName)
if err != nil {
return err
}
// Disable IPv6 DAD just in case hairpin mode is enabled on the
// bridge. Hairpin mode causes echos of neighbor solicitation
// packets, which causes DAD failures.
for _, ipc := range result.IPs {
if ipc.Version == "6" && (n.HairpinMode || n.PromiscMode) {
if err := disableIPV6DAD(args.IfName); err != nil {
return err
}
break
}
}
// Add the IP to the interface
if err := ipam.ConfigureIface(args.IfName, result); err != nil {
return err
}
// Send a gratuitous arp
for _, ipc := range result.IPs {
if ipc.Version == "4" {
_ = arping.GratuitousArpOverIface(ipc.Address.IP, *contVeth)
}
}
return nil
}); err != nil {
return err
}
if n.IsGW {
var firstV4Addr net.IP
// Set the IP address(es) on the bridge and enable forwarding
for _, gws := range []*gwInfo{gwsV4, gwsV6} {
for _, gw := range gws.gws {
if gw.IP.To4() != nil && firstV4Addr == nil {
firstV4Addr = gw.IP
}
err = ensureBridgeAddr(br, gws.family, &gw, n.ForceAddress)
if err != nil {
return fmt.Errorf("failed to set bridge addr: %v", err)
}
}
if gws.gws != nil {
if err = enableIPForward(gws.family); err != nil {
return fmt.Errorf("failed to enable forwarding: %v", err)
}
}
}
}
if n.IPMasq {
chain := utils.FormatChainName(n.Name, args.ContainerID)
comment := utils.FormatComment(n.Name, args.ContainerID)
for _, ipc := range result.IPs {
if err = ip.SetupIPMasq(ip.Network(&ipc.Address), chain, comment); err != nil {
return err
}
}
}
// Refetch the bridge since its MAC address may change when the first
// veth is added or after its IP address is set
br, err = bridgeByName(n.BrName)
if err != nil {
return err
}
brInterface.Mac = br.Attrs().HardwareAddr.String()
result.DNS = n.DNS
return types.PrintResult(result, cniVersion)
}
func cmdDel(args *skel.CmdArgs) error {
n, _, err := loadNetConf(args.StdinData)
if err != nil {
return err
}
if err := ipam.ExecDel(n.IPAM.Type, args.StdinData); err != nil {
return err
}
if args.Netns == "" {
return nil
}
// There is a netns so try to clean up. Delete can be called multiple times
// so don't return an error if the device is already removed.
// If the device isn't there then don't try to clean up IP masq either.
var ipn *net.IPNet
err = ns.WithNetNSPath(args.Netns, func(_ ns.NetNS) error {
var err error
ipn, err = ip.DelLinkByNameAddr(args.IfName, netlink.FAMILY_ALL)
if err != nil && err == ip.ErrLinkNotFound {
return nil
}
return err
})
if err != nil {
return err
}
if ipn != nil && n.IPMasq {
chain := utils.FormatChainName(n.Name, args.ContainerID)
comment := utils.FormatComment(n.Name, args.ContainerID)
err = ip.TeardownIPMasq(ipn, chain, comment)
}
return err
}
func main() {
skel.PluginMain(cmdAdd, cmdDel, version.All)
}