host-device: use temp network namespace for rename

Using a temporary name / doing a fast rename causes
some race conditions with udev and NetworkManager:
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/1599

Signed-off-by: Etienne Champetier <e.champetier@ateme.com>
This commit is contained in:
Etienne Champetier 2024-08-13 17:32:21 -04:00 committed by Casey Callendrello
parent 3a49cff1f6
commit a4b80cc634
2 changed files with 221 additions and 136 deletions

View File

@ -31,6 +31,10 @@ func GetCurrentNS() (NetNS, error) {
// return an unexpected network namespace. // return an unexpected network namespace.
runtime.LockOSThread() runtime.LockOSThread()
defer runtime.UnlockOSThread() defer runtime.UnlockOSThread()
return getCurrentNSNoLock()
}
func getCurrentNSNoLock() (NetNS, error) {
return GetNS(getCurrentThreadNetNSPath()) return GetNS(getCurrentThreadNetNSPath())
} }
@ -152,6 +156,54 @@ func GetNS(nspath string) (NetNS, error) {
return &netNS{file: fd}, nil return &netNS{file: fd}, nil
} }
// Returns a new empty NetNS.
// Calling Close() let the kernel garbage collect the network namespace.
func TempNetNS() (NetNS, error) {
var tempNS NetNS
var err error
var wg sync.WaitGroup
wg.Add(1)
// Create the new namespace in a new goroutine so that if we later fail
// to switch the namespace back to the original one, we can safely
// leave the thread locked to die without a risk of the current thread
// left lingering with incorrect namespace.
go func() {
defer wg.Done()
runtime.LockOSThread()
var threadNS NetNS
// save a handle to current network namespace
threadNS, err = getCurrentNSNoLock()
if err != nil {
err = fmt.Errorf("failed to open current namespace: %v", err)
return
}
defer threadNS.Close()
// create the temporary network namespace
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}
// get a handle to the temporary network namespace
tempNS, err = getCurrentNSNoLock()
err2 := threadNS.Set()
if err2 == nil {
// Unlock the current thread only when we successfully switched back
// to the original namespace; otherwise leave the thread locked which
// will force the runtime to scrap the current thread, that is maybe
// not as optimal but at least always safe to do.
runtime.UnlockOSThread()
}
}()
wg.Wait()
return tempNS, err
}
func (ns *netNS) Path() string { func (ns *netNS) Path() string {
return ns.file.Name() return ns.file.Name()
} }
@ -173,7 +225,7 @@ func (ns *netNS) Do(toRun func(NetNS) error) error {
} }
containedCall := func(hostNS NetNS) error { containedCall := func(hostNS NetNS) error {
threadNS, err := GetCurrentNS() threadNS, err := getCurrentNSNoLock()
if err != nil { if err != nil {
return fmt.Errorf("failed to open current netns: %v", err) return fmt.Errorf("failed to open current netns: %v", err)
} }

View File

@ -230,118 +230,121 @@ func cmdDel(args *skel.CmdArgs) error {
return nil return nil
} }
// setTempName sets a temporary name for netdevice, returns updated Link object or error func moveLinkIn(hostDev netlink.Link, containerNs ns.NetNS, containerIfName string) (netlink.Link, error) {
// if occurred.
func setTempName(dev netlink.Link) (netlink.Link, error) {
tempName := fmt.Sprintf("%s%d", "temp_", dev.Attrs().Index)
// rename to tempName
if err := netlink.LinkSetName(dev, tempName); err != nil {
return nil, fmt.Errorf("failed to rename device %q to %q: %v", dev.Attrs().Name, tempName, err)
}
// Get updated Link obj
tempDev, err := netlink.LinkByName(tempName)
if err != nil {
return nil, fmt.Errorf("failed to find %q after rename to %q: %v", dev.Attrs().Name, tempName, err)
}
return tempDev, nil
}
func moveLinkIn(hostDev netlink.Link, containerNs ns.NetNS, ifName string) (netlink.Link, error) {
origLinkFlags := hostDev.Attrs().Flags
hostDevName := hostDev.Attrs().Name hostDevName := hostDev.Attrs().Name
defaultNs, err := ns.GetCurrentNS()
// With recent kernels we could do all changes in a single netlink call,
// but on failure the device is left in a partially modified state.
// Doing changes one by one allow us to (try to) rollback to the initial state.
// Create a temporary namespace to rename (and modify) the device in.
// We were previously using a temporary name, but rapid rename leads to
// race condition with udev and NetworkManager.
tempNS, err := ns.TempNetNS()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get host namespace: %v", err) return nil, fmt.Errorf("failed to create tempNS: %v", err)
} }
defer tempNS.Close()
// Devices can be renamed only when down // Restore original up state in case of error
if err = netlink.LinkSetDown(hostDev); err != nil { // This must be done in the hostNS as moving
return nil, fmt.Errorf("failed to set %q down: %v", hostDev.Attrs().Name, err) // device between namespaces sets the link down
} if hostDev.Attrs().Flags&net.FlagUp == net.FlagUp {
// restore original link state in case of error
defer func() { defer func() {
if err != nil { if err != nil {
if origLinkFlags&net.FlagUp == net.FlagUp && hostDev != nil { // lookup the device again (index might have changed)
if hostDev, err := netlink.LinkByName(hostDevName); err == nil {
_ = netlink.LinkSetUp(hostDev) _ = netlink.LinkSetUp(hostDev)
} }
} }
}() }()
hostDev, err = setTempName(hostDev)
if err != nil {
return nil, fmt.Errorf("failed to rename device %q to temporary name: %v", hostDevName, err)
} }
// restore original netdev name in case of error // Move the host device into tempNS
defer func() { if err = netlink.LinkSetNsFd(hostDev, int(tempNS.Fd())); err != nil {
if err != nil && hostDev != nil { return nil, fmt.Errorf("failed to move %q to tempNS: %v", hostDevName, err)
_ = netlink.LinkSetName(hostDev, hostDevName)
}
}()
if err = netlink.LinkSetNsFd(hostDev, int(containerNs.Fd())); err != nil {
return nil, fmt.Errorf("failed to move %q to container ns: %v", hostDev.Attrs().Name, err)
} }
var contDev netlink.Link var contDev netlink.Link
tempDevName := hostDev.Attrs().Name
if err = containerNs.Do(func(_ ns.NetNS) error { // In a container in container scenario, hostNS is not the initial net namespace,
var err error // but host / container naming is easier to follow.
contDev, err = netlink.LinkByName(tempDevName) if err = tempNS.Do(func(hostNS ns.NetNS) error {
// lookup the device in tempNS (index might have changed)
tempNSDev, err := netlink.LinkByName(hostDevName)
if err != nil { if err != nil {
return fmt.Errorf("failed to find %q: %v", tempDevName, err) return fmt.Errorf("failed to find %q in tempNS: %v", hostDevName, err)
} }
// move netdev back to host namespace in case of error // detroying a non empty tempNS would move physical devices back to the initial net namespace,
// not the namespace of the "parent" process, and virtual devices would be destroyed,
// so we need to actively move the device back to hostNS on error
defer func() { defer func() {
if err != nil { if err != nil && tempNSDev != nil {
_ = netlink.LinkSetNsFd(contDev, int(defaultNs.Fd())) _ = netlink.LinkSetNsFd(tempNSDev, int(hostNS.Fd()))
// we need to get updated link object as link was moved back to host namepsace }
_ = defaultNs.Do(func(_ ns.NetNS) error { }()
hostDev, _ = netlink.LinkByName(tempDevName)
return nil // Rename the device to the wanted name
}) if err = netlink.LinkSetName(tempNSDev, containerIfName); err != nil {
return fmt.Errorf("failed to rename host device %q to %q: %v", hostDevName, containerIfName, err)
}
// Restore the original device name in case of error
defer func() {
if err != nil && tempNSDev != nil {
_ = netlink.LinkSetName(tempNSDev, hostDevName)
} }
}() }()
// Save host device name into the container device's alias property // Save host device name into the container device's alias property
if err = netlink.LinkSetAlias(contDev, hostDevName); err != nil { if err = netlink.LinkSetAlias(tempNSDev, hostDevName); err != nil {
return fmt.Errorf("failed to set alias to %q: %v", tempDevName, err) return fmt.Errorf("failed to set alias to %q: %v", hostDevName, err)
}
// Rename container device to respect args.IfName
if err = netlink.LinkSetName(contDev, ifName); err != nil {
return fmt.Errorf("failed to rename device %q to %q: %v", tempDevName, ifName, err)
} }
// restore tempDevName in case of error // Remove the alias on error
defer func() { defer func() {
if err != nil { if err != nil && tempNSDev != nil {
_ = netlink.LinkSetName(contDev, tempDevName) _ = netlink.LinkSetAlias(tempNSDev, "")
} }
}() }()
// Bring container device up // Move the device to the containerNS
if err = netlink.LinkSetNsFd(tempNSDev, int(containerNs.Fd())); err != nil {
return fmt.Errorf("failed to move %q (host: %q) to container NS: %v", containerIfName, hostDevName, err)
}
// Lookup the device again on error, the index might have changed
defer func() {
if err != nil {
tempNSDev, _ = netlink.LinkByName(containerIfName)
}
}()
err = containerNs.Do(func(_ ns.NetNS) error {
var err error
contDev, err = netlink.LinkByName(containerIfName)
if err != nil {
return fmt.Errorf("failed to find %q in container NS: %v", containerIfName, err)
}
// Move the interface back to tempNS on error
defer func() {
if err != nil {
_ = netlink.LinkSetNsFd(contDev, int(tempNS.Fd()))
}
}()
// Bring the device up
// This must be done in the containerNS
if err = netlink.LinkSetUp(contDev); err != nil { if err = netlink.LinkSetUp(contDev); err != nil {
return fmt.Errorf("failed to set %q up: %v", ifName, err) return fmt.Errorf("failed to set %q up: %v", containerIfName, err)
} }
// bring device down in case of error
defer func() {
if err != nil {
_ = netlink.LinkSetDown(contDev)
}
}()
// Retrieve link again to get up-to-date name and attributes
contDev, err = netlink.LinkByName(ifName)
if err != nil {
return fmt.Errorf("failed to find %q: %v", ifName, err)
}
return nil return nil
})
return err
}); err != nil { }); err != nil {
return nil, err return nil, err
} }
@ -349,47 +352,49 @@ func moveLinkIn(hostDev netlink.Link, containerNs ns.NetNS, ifName string) (netl
return contDev, nil return contDev, nil
} }
func moveLinkOut(containerNs ns.NetNS, ifName string) error { func moveLinkOut(containerNs ns.NetNS, containerIfName string) error {
defaultNs, err := ns.GetCurrentNS() // Create a temporary namespace to rename (and modify) the device in.
// We were previously using a temporary name, but multiple rapid renames
// leads to race condition with udev and NetworkManager.
tempNS, err := ns.TempNetNS()
if err != nil { if err != nil {
return err return fmt.Errorf("failed to create tempNS: %v", err)
} }
defer defaultNs.Close() defer tempNS.Close()
var contDev netlink.Link
// Restore original up state in case of error
// This must be done in the containerNS as moving
// device between namespaces sets the link down
defer func() {
if err != nil && contDev != nil && contDev.Attrs().Flags&net.FlagUp == net.FlagUp {
containerNs.Do(func(_ ns.NetNS) error {
// lookup the device again (index might have changed)
if contDev, err := netlink.LinkByName(containerIfName); err == nil {
_ = netlink.LinkSetUp(contDev)
}
return nil
})
}
}()
var tempName string
var origDev netlink.Link
err = containerNs.Do(func(_ ns.NetNS) error { err = containerNs.Do(func(_ ns.NetNS) error {
dev, err := netlink.LinkByName(ifName) var err error
// Lookup the device in the containerNS
contDev, err = netlink.LinkByName(containerIfName)
if err != nil { if err != nil {
return fmt.Errorf("failed to find %q: %v", ifName, err) return fmt.Errorf("failed to find %q in containerNS: %v", containerIfName, err)
}
origDev = dev
// Devices can be renamed only when down
if err = netlink.LinkSetDown(dev); err != nil {
return fmt.Errorf("failed to set %q down: %v", ifName, err)
} }
defer func() { // Verify we have the original name
// If moving the device to the host namespace fails, set its name back to ifName so that this if contDev.Attrs().Alias == "" {
// function can be retried. Also bring the device back up, unless it was already down before. return fmt.Errorf("failed to find original ifname for %q (alias is not set)", containerIfName)
if err != nil {
_ = netlink.LinkSetName(dev, ifName)
if dev.Attrs().Flags&net.FlagUp == net.FlagUp {
_ = netlink.LinkSetUp(dev)
} }
}
}()
newLink, err := setTempName(dev) // Move the device to the tempNS
if err != nil { if err = netlink.LinkSetNsFd(contDev, int(tempNS.Fd())); err != nil {
return fmt.Errorf("failed to rename device %q to temporary name: %v", ifName, err) return fmt.Errorf("failed to move %q to tempNS: %v", containerIfName, err)
}
dev = newLink
tempName = dev.Attrs().Name
if err = netlink.LinkSetNsFd(dev, int(defaultNs.Fd())); err != nil {
return fmt.Errorf("failed to move %q to host netns: %v", tempName, err)
} }
return nil return nil
}) })
@ -397,29 +402,57 @@ func moveLinkOut(containerNs ns.NetNS, ifName string) error {
return err return err
} }
// Rename the device to its original name from the host namespace err = tempNS.Do(func(hostNS ns.NetNS) error {
tempDev, err := netlink.LinkByName(tempName) // Lookup the device in tempNS (index might have changed)
tempNSDev, err := netlink.LinkByName(containerIfName)
if err != nil { if err != nil {
return fmt.Errorf("failed to find %q in host namespace: %v", tempName, err) return fmt.Errorf("failed to find %q in tempNS: %v", containerIfName, err)
} }
if err = netlink.LinkSetName(tempDev, tempDev.Attrs().Alias); err != nil { // Move the device back to containerNS on error
// move device back to container ns so it may be retired
defer func() { defer func() {
_ = netlink.LinkSetNsFd(tempDev, int(containerNs.Fd()))
_ = containerNs.Do(func(_ ns.NetNS) error {
lnk, err := netlink.LinkByName(tempName)
if err != nil { if err != nil {
return err _ = netlink.LinkSetNsFd(tempNSDev, int(containerNs.Fd()))
} }
_ = netlink.LinkSetName(lnk, ifName) }()
if origDev.Attrs().Flags&net.FlagUp == net.FlagUp {
_ = netlink.LinkSetUp(lnk) hostDevName := tempNSDev.Attrs().Alias
// Rename container device to hostDevName
if err = netlink.LinkSetName(tempNSDev, hostDevName); err != nil {
return fmt.Errorf("failed to rename device %q to %q: %v", containerIfName, hostDevName, err)
} }
// Rename the device back to containerIfName on error
defer func() {
if err != nil {
_ = netlink.LinkSetName(tempNSDev, containerIfName)
}
}()
// Unset device's alias property
if err = netlink.LinkSetAlias(tempNSDev, ""); err != nil {
return fmt.Errorf("failed to unset alias of %q: %v", hostDevName, err)
}
// Set back the device alias to hostDevName on error
defer func() {
if err != nil {
_ = netlink.LinkSetAlias(tempNSDev, hostDevName)
}
}()
// Finally move the device to the hostNS
if err = netlink.LinkSetNsFd(tempNSDev, int(hostNS.Fd())); err != nil {
return fmt.Errorf("failed to move %q to hostNS: %v", hostDevName, err)
}
// As we don't know the previous state, leave the link down
return nil return nil
}) })
}() if err != nil {
return fmt.Errorf("failed to restore %q to original name %q: %v", tempName, tempDev.Attrs().Alias, err) return err
} }
return nil return nil