Miguel Duarte Barroso edab9efdea tap: allow for a tap device to be created as a bridge port
This extends the tap plugin API enabling the user to instruct the CNI
plugin the created tap device must be set as a port of an *existing*
linux bridge on the pod network namespace.

This is helpful for KubeVirt, allowing network connectivity to be
extended from the pod's interface into the Virtual Machine running
inside the pod.

Signed-off-by: Miguel Duarte Barroso <mdbarroso@redhat.com>
2023-05-19 16:26:14 +02:00

459 lines
12 KiB
Go

// Copyright 2022 CNI authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"encoding/json"
"errors"
"fmt"
"net"
"os/exec"
"runtime"
"strconv"
"syscall"
"github.com/opencontainers/selinux/go-selinux"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/types"
current "github.com/containernetworking/cni/pkg/types/100"
"github.com/containernetworking/cni/pkg/version"
"github.com/containernetworking/plugins/pkg/ip"
"github.com/containernetworking/plugins/pkg/ipam"
"github.com/containernetworking/plugins/pkg/ns"
bv "github.com/containernetworking/plugins/pkg/utils/buildversion"
"github.com/containernetworking/plugins/pkg/utils/sysctl"
)
type NetConf struct {
types.NetConf
MultiQueue bool `json:"multiQueue"`
MTU int `json:"mtu"`
Mac string `json:"mac,omitempty"`
Owner *uint32 `json:"owner,omitempty"`
Group *uint32 `json:"group,omitempty"`
SelinuxContext string `json:"selinuxContext,omitempty"`
Bridge string `json:"bridge,omitempty"`
Args *struct{} `json:"args,omitempty"`
RuntimeConfig struct {
Mac string `json:"mac,omitempty"`
} `json:"runtimeConfig,omitempty"`
}
// MacEnvArgs represents CNI_ARG
type MacEnvArgs struct {
types.CommonArgs
MAC types.UnmarshallableString `json:"mac,omitempty"`
}
func init() {
// this ensures that main runs only on main thread (thread group leader).
// since namespace ops (unshare, setns) are done for a single thread, we
// must ensure that the goroutine does not jump from OS thread to thread
runtime.LockOSThread()
}
func loadConf(args *skel.CmdArgs) (*NetConf, string, error) {
n := &NetConf{}
if err := json.Unmarshal(args.StdinData, n); err != nil {
return nil, "", fmt.Errorf("failed to load netconf: %v", err)
}
if args.Args != "" {
e := MacEnvArgs{}
err := types.LoadArgs(args.Args, &e)
if err != nil {
return nil, "", err
}
if e.MAC != "" {
n.Mac = string(e.MAC)
}
}
if n.RuntimeConfig.Mac != "" {
n.Mac = n.RuntimeConfig.Mac
}
return n, n.CNIVersion, nil
}
// We want to share the parent process std{in|out|err} - fds 0 through 2.
// Since the FDs are inherited on fork / exec, we close on exec all others.
func closeFileDescriptorsOnExec() {
minFDToCloseOnExec := 3
maxFDToCloseOnExec := 256
for fd := minFDToCloseOnExec; fd < maxFDToCloseOnExec; fd++ {
syscall.CloseOnExec(fd)
}
}
// Due to issues with the vishvananda/netlink library (fix pending) it is not possible to create an ownerless/groupless
// tap device. Until the issue is fixed, the workaround for creating a tap device with no owner/group is to use the iptool
func createTapWithIptool(tmpName string, mtu int, multiqueue bool, mac string, owner *uint32, group *uint32) error {
closeFileDescriptorsOnExec()
tapDeviceArgs := []string{"tuntap", "add", "mode", "tap", "name", tmpName}
if multiqueue {
tapDeviceArgs = append(tapDeviceArgs, "multi_queue")
}
if owner != nil {
tapDeviceArgs = append(tapDeviceArgs, "user", fmt.Sprintf("%d", *owner))
}
if group != nil {
tapDeviceArgs = append(tapDeviceArgs, "group", fmt.Sprintf("%d", *group))
}
output, err := exec.Command("ip", tapDeviceArgs...).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to run command %s: %v", output, err)
}
tapDeviceArgs = []string{"link", "set", tmpName}
if mtu != 0 {
tapDeviceArgs = append(tapDeviceArgs, "mtu", strconv.Itoa(mtu))
}
if mac != "" {
tapDeviceArgs = append(tapDeviceArgs, "address", mac)
}
output, err = exec.Command("ip", tapDeviceArgs...).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to run command %s: %v", output, err)
}
return nil
}
func createLinkWithNetlink(tmpName string, mtu int, nsFd int, multiqueue bool, mac string, owner *uint32, group *uint32) error {
linkAttrs := netlink.LinkAttrs{
Name: tmpName,
Namespace: netlink.NsFd(nsFd),
}
if mtu != 0 {
linkAttrs.MTU = mtu
}
mv := &netlink.Tuntap{
LinkAttrs: linkAttrs,
Mode: netlink.TUNTAP_MODE_TAP,
}
if owner != nil {
mv.Owner = *owner
}
if group != nil {
mv.Group = *group
}
if mac != "" {
addr, err := net.ParseMAC(mac)
if err != nil {
return fmt.Errorf("invalid args %v for MAC addr: %v", mac, err)
}
linkAttrs.HardwareAddr = addr
}
mv.Flags = netlink.TUNTAP_VNET_HDR | unix.IFF_TAP
if multiqueue {
mv.Flags = netlink.TUNTAP_MULTI_QUEUE_DEFAULTS | mv.Flags
}
if err := netlink.LinkAdd(mv); err != nil {
return fmt.Errorf("failed to create tap: %v", err)
}
return nil
}
func createLink(tmpName string, conf *NetConf, netns ns.NetNS) error {
switch {
case conf.SelinuxContext != "":
if err := selinux.SetExecLabel(conf.SelinuxContext); err != nil {
return fmt.Errorf("failed set socket label: %v", err)
}
return createTapWithIptool(tmpName, conf.MTU, conf.MultiQueue, conf.Mac, conf.Owner, conf.Group)
case conf.Owner == nil || conf.Group == nil:
return createTapWithIptool(tmpName, conf.MTU, conf.MultiQueue, conf.Mac, conf.Owner, conf.Group)
default:
return createLinkWithNetlink(tmpName, conf.MTU, int(netns.Fd()), conf.MultiQueue, conf.Mac, conf.Owner, conf.Group)
}
}
func createTap(conf *NetConf, ifName string, netns ns.NetNS) (*current.Interface, error) {
tap := &current.Interface{}
// due to kernel bug we have to create with tmpName or it might
// collide with the name on the host and error out
tmpName, err := ip.RandomVethName()
if err != nil {
return nil, err
}
err = netns.Do(func(_ ns.NetNS) error {
err := createLink(tmpName, conf, netns)
if err != nil {
return err
}
if err = ip.RenameLink(tmpName, ifName); err != nil {
link, err := netlink.LinkByName(tmpName)
if err != nil {
netlink.LinkDel(link)
return fmt.Errorf("failed to rename tap to %q: %v", ifName, err)
}
}
tap.Name = ifName
// Re-fetch link to get all properties/attributes
link, err := netlink.LinkByName(ifName)
if err != nil {
return fmt.Errorf("failed to refetch tap %q: %v", ifName, err)
}
if conf.Bridge != "" {
bridge, err := netlink.LinkByName(conf.Bridge)
if err != nil {
return fmt.Errorf("failed to get bridge %s: %v", conf.Bridge, err)
}
tapDev := link
if err := netlink.LinkSetMaster(tapDev, bridge); err != nil {
return fmt.Errorf("failed to set tap %s as a port of bridge %s: %v", tap.Name, conf.Bridge, err)
}
}
err = netlink.LinkSetUp(link)
if err != nil {
return fmt.Errorf("failed to set tap interface up: %v", err)
}
tap.Mac = link.Attrs().HardwareAddr.String()
tap.Sandbox = netns.Path()
return nil
})
if err != nil {
return nil, err
}
return tap, nil
}
func cmdAdd(args *skel.CmdArgs) error {
n, cniVersion, err := loadConf(args)
if err != nil {
return err
}
isLayer3 := n.IPAM.Type != ""
netns, err := ns.GetNS(args.Netns)
if err != nil {
return fmt.Errorf("failed to open netns %q: %v", netns, err)
}
defer netns.Close()
tapInterface, err := createTap(n, args.IfName, netns)
if err != nil {
return err
}
// Delete link if err to avoid link leak in this ns
defer func() {
if err != nil {
netns.Do(func(_ ns.NetNS) error {
return ip.DelLinkByName(args.IfName)
})
}
}()
// Assume L2 interface only
result := &current.Result{
CNIVersion: current.ImplementedSpecVersion,
Interfaces: []*current.Interface{tapInterface},
}
if isLayer3 {
// run the IPAM plugin and get back the config to apply
r, err := ipam.ExecAdd(n.IPAM.Type, args.StdinData)
if err != nil {
return err
}
// Invoke ipam del if err to avoid ip leak
defer func() {
if err != nil {
ipam.ExecDel(n.IPAM.Type, args.StdinData)
}
}()
// Convert whatever the IPAM result was into the current Result type
ipamResult, err := current.NewResultFromResult(r)
if err != nil {
return err
}
if len(ipamResult.IPs) == 0 {
return errors.New("IPAM plugin returned missing IP config")
}
result.IPs = ipamResult.IPs
result.Routes = ipamResult.Routes
for _, ipc := range result.IPs {
// All addresses apply to the container tap interface
ipc.Interface = current.Int(0)
}
err = netns.Do(func(_ ns.NetNS) error {
_, _ = sysctl.Sysctl(fmt.Sprintf("net/ipv4/conf/%s/arp_notify", args.IfName), "1")
return ipam.ConfigureIface(args.IfName, result)
})
if err != nil {
return err
}
} else {
// For L2 just change interface status to up
err = netns.Do(func(_ ns.NetNS) error {
tapInterfaceLink, err := netlink.LinkByName(args.IfName)
if err != nil {
return fmt.Errorf("failed to find interface name %q: %v", tapInterface.Name, err)
}
if err := netlink.LinkSetUp(tapInterfaceLink); err != nil {
return fmt.Errorf("failed to set %q UP: %v", args.IfName, err)
}
return nil
})
if err != nil {
return err
}
}
result.DNS = n.DNS
return types.PrintResult(result, cniVersion)
}
func cmdDel(args *skel.CmdArgs) error {
n, _, err := loadConf(args)
if err != nil {
return err
}
isLayer3 := n.IPAM.Type != ""
if isLayer3 {
err = ipam.ExecDel(n.IPAM.Type, args.StdinData)
if err != nil {
return err
}
}
if args.Netns == "" {
return nil
}
// There is a netns so try to clean up. Delete can be called multiple times
// so don't return an error if the device is already removed.
err = ns.WithNetNSPath(args.Netns, func(_ ns.NetNS) error {
if err := ip.DelLinkByName(args.IfName); err != nil {
if err != ip.ErrLinkNotFound {
return err
}
}
return nil
})
if err != nil {
// if NetNs is passed down by the Cloud Orchestration Engine, or if it called multiple times
// so don't return an error if the device is already removed.
// https://github.com/kubernetes/kubernetes/issues/43014#issuecomment-287164444
_, ok := err.(ns.NSPathNotExistErr)
if ok {
return nil
}
return err
}
return err
}
func main() {
skel.PluginMain(cmdAdd, cmdCheck, cmdDel, version.All, bv.BuildString("tap"))
}
func cmdCheck(args *skel.CmdArgs) error {
n, _, err := loadConf(args)
if err != nil {
return err
}
isLayer3 := n.IPAM.Type != ""
netns, err := ns.GetNS(args.Netns)
if err != nil {
return fmt.Errorf("failed to open netns %q: %v", args.Netns, err)
}
defer netns.Close()
if isLayer3 {
// run the IPAM plugin and get back the config to apply
err = ipam.ExecCheck(n.IPAM.Type, args.StdinData)
if err != nil {
return err
}
}
// Parse previous result.
if n.NetConf.RawPrevResult == nil {
return fmt.Errorf("Required prevResult missing")
}
if err := version.ParsePrevResult(&n.NetConf); err != nil {
return err
}
result, err := current.NewResultFromResult(n.PrevResult)
if err != nil {
return err
}
var contMap current.Interface
// Find interfaces for names whe know, tap device name inside container
for _, intf := range result.Interfaces {
if args.IfName == intf.Name {
if args.Netns == intf.Sandbox {
contMap = *intf
continue
}
}
}
// The namespace must be the same as what was configured
if args.Netns != contMap.Sandbox {
return fmt.Errorf("Sandbox in prevResult %s doesn't match configured netns: %s",
contMap.Sandbox, args.Netns)
}
// Check prevResults for ips, routes and dns against values found in the container
return netns.Do(func(_ ns.NetNS) error {
err = ip.ValidateExpectedInterfaceIPs(args.IfName, result.IPs)
if err != nil {
return err
}
err = ip.ValidateExpectedRoute(result.Routes)
if err != nil {
return err
}
return nil
})
}