ns: add interface, use it, and fix thread-related namespace switch issues

Add a namespace object interface for somewhat cleaner code when creating and switching between network namespaces. All created namespaces are now mounted in /var/run/netns to ensure they have persistent inodes and paths that can be passed around between plugin components without relying on the current namespace being correct. Also remove the thread-locking arguments from the ns package per https://github.com/appc/cni/issues/183 by doing all the namespace changes in a separate goroutine that locks/unlocks itself, instead of the caller having to track OS thread locking.
2016-04-05 11:10:31 -05:00
parent c019ed25d2
commit f2922c5c1f
10 changed files with 334 additions and 420 deletions
--- a/ns/README.md
+++ b/ns/README.md
@@ -0,0 +1,31 @@
+### Namespaces, Threads, and Go
+On Linux each OS thread can have a different network namespace.  Go's thread scheduling model switches goroutines between OS threads based on OS thread load and whether the goroutine would block other goroutines.  This can result in a goroutine switching network namespaces without notice and lead to errors in your code.
+
+### Namespace Switching
+Switching namespaces with the `ns.Set()` method is not recommended without additional strategies to prevent unexpected namespace changes when your goroutines switch OS threads.
+
+Go provides the `runtime.LockOSThread()` function to ensure a specific goroutine executes on its current OS thread and prevents any other goroutine from running in that thread until the locked one exits.  Careful usage of `LockOSThread()` and goroutines can provide good control over which network namespace a given goroutine executes in.
+
+For example, you cannot rely on the `ns.Set()` namespace being the current namespace after the `Set()` call unless you do two things.  First, the goroutine calling `Set()` must have previously called `LockOSThread()`.  Second, you must ensure `runtime.UnlockOSThread()` is not called somewhere in-between.  You also cannot rely on the initial network namespace remaining the current network namespace if any other code in your program switches namespaces, unless you have already called `LockOSThread()` in that goroutine.  Note that `LockOSThread()` prevents the Go scheduler from optimally scheduling goroutines for best performance, so `LockOSThread()` should only be used in small, isolated goroutines that release the lock quickly.
+
+### Do() The Recommended Thing
+The `ns.Do()` method provides control over network namespaces for you by implementing these strategies. All code dependent on a particular network namespace should be wrapped in the `ns.Do()` method to ensure the correct namespace is selected for the duration of your code.  For example:
+
+```go
+targetNs, err := ns.NewNS()
+if err != nil {
+    return err
+}
+err = targetNs.Do(func(hostNs ns.NetNS) error {
+	dummy := &netlink.Dummy{
+		LinkAttrs: netlink.LinkAttrs{
+			Name: "dummy0",
+		},
+	}
+	return netlink.LinkAdd(dummy)
+})
+```
+
+### Further Reading
+ - https://github.com/golang/go/wiki/LockOSThread
+ - http://morsmachine.dk/go-scheduler
--- a/ns/consts_linux_386.go
+++ b/ns/consts_linux_386.go
@@ -1,17 +0,0 @@
-// Copyright 2015 CNI authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ns
-
-const setNsNr = 346
--- a/ns/consts_linux_amd64.go
+++ b/ns/consts_linux_amd64.go
@@ -1,17 +0,0 @@
-// Copyright 2015 CNI authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ns
-
-const setNsNr = 308
--- a/ns/consts_linux_arm.go
+++ b/ns/consts_linux_arm.go
@@ -1,17 +0,0 @@
-// Copyright 2015 CNI authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ns
-
-const setNsNr = 375
--- a/ns/ns.go
+++ b/ns/ns.go
@@ -15,17 +15,211 @@
 package ns

 import (
+	"crypto/rand"
 	"fmt"
 	"os"
+	"path"
 	"runtime"
-	"syscall"
+	"sync"
+
+	"golang.org/x/sys/unix"
 )

-// SetNS sets the network namespace on a target file.
-func SetNS(f *os.File, flags uintptr) error {
-	_, _, err := syscall.RawSyscall(setNsNr, f.Fd(), flags, 0)
-	if err != 0 {
-		return err
+type NetNS interface {
+	// Executes the passed closure in this object's network namespace,
+	// attemtping to restore the original namespace before returning.
+	// However, since each OS thread can have a different network namespace,
+	// and Go's thread scheduling is highly variable, callers cannot
+	// guarantee any specific namespace is set unless operations that
+	// require that namespace are wrapped with Do().  Also, no code called
+	// from Do() should call runtime.UnlockOSThread(), or the risk
+	// of executing code in an incorrect namespace will be greater.  See
+	// https://github.com/golang/go/wiki/LockOSThread for further details.
+	Do(toRun func(NetNS) error) error
+
+	// Sets the current network namespace to this object's network namespace.
+	// Note that since Go's thread scheduling is highly variable, callers
+	// cannot guarantee the requested namespace will be the current namespace
+	// after this function is called; to ensure this wrap operations that
+	// require the namespace with Do() instead.
+	Set() error
+
+	// Returns the filesystem path representing this object's network namespace
+	Path() string
+
+	// Returns a file descriptor representing this object's network namespace
+	Fd() uintptr
+
+	// Cleans up this instance of the network namespace; if this instance
+	// is the last user the namespace will be destroyed
+	Close() error
+}
+
+type netNS struct {
+	file    *os.File
+	mounted bool
+}
+
+func getCurrentThreadNetNSPath() string {
+	// /proc/self/ns/net returns the namespace of the main thread, not
+	// of whatever thread this goroutine is running on.  Make sure we
+	// use the thread's net namespace since the thread is switching around
+	return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
+}
+
+// Returns an object representing the current OS thread's network namespace
+func GetCurrentNS() (NetNS, error) {
+	return GetNS(getCurrentThreadNetNSPath())
+}
+
+// Returns an object representing the namespace referred to by @path
+func GetNS(nspath string) (NetNS, error) {
+	fd, err := os.Open(nspath)
+	if err != nil {
+		return nil, err
+	}
+	return &netNS{file: fd}, nil
+}
+
+// Creates a new persistent network namespace and returns an object
+// representing that namespace, without switching to it
+func NewNS() (NetNS, error) {
+	const nsRunDir = "/var/run/netns"
+
+	b := make([]byte, 16)
+	_, err := rand.Reader.Read(b)
+	if err != nil {
+		return nil, fmt.Errorf("failed to generate random netns name: %v", err)
+	}
+
+	err = os.MkdirAll(nsRunDir, 0755)
+	if err != nil {
+		return nil, err
+	}
+
+	// create an empty file at the mount point
+	nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
+	nsPath := path.Join(nsRunDir, nsName)
+	mountPointFd, err := os.Create(nsPath)
+	if err != nil {
+		return nil, err
+	}
+	mountPointFd.Close()
+
+	// Ensure the mount point is cleaned up on errors; if the namespace
+	// was successfully mounted this will have no effect because the file
+	// is in-use
+	defer os.RemoveAll(nsPath)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	// do namespace work in a dedicated goroutine, so that we can safely
+	// Lock/Unlock OSThread without upsetting the lock/unlock state of
+	// the caller of this function
+	var fd *os.File
+	go (func() {
+		defer wg.Done()
+		runtime.LockOSThread()
+
+		var origNS NetNS
+		origNS, err = GetNS(getCurrentThreadNetNSPath())
+		if err != nil {
+			return
+		}
+		defer origNS.Close()
+
+		// create a new netns on the current thread
+		err = unix.Unshare(unix.CLONE_NEWNET)
+		if err != nil {
+			return
+		}
+		defer origNS.Set()
+
+		// bind mount the new netns from the current thread onto the mount point
+		err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "")
+		if err != nil {
+			return
+		}
+
+		fd, err = os.Open(nsPath)
+		if err != nil {
+			return
+		}
+	})()
+	wg.Wait()
+
+	if err != nil {
+		unix.Unmount(nsPath, unix.MNT_DETACH)
+		return nil, fmt.Errorf("failed to create namespace: %v", err)
+	}
+
+	return &netNS{file: fd, mounted: true}, nil
+}
+
+func (ns *netNS) Path() string {
+	return ns.file.Name()
+}
+
+func (ns *netNS) Fd() uintptr {
+	return ns.file.Fd()
+}
+
+func (ns *netNS) Close() error {
+	ns.file.Close()
+
+	if ns.mounted {
+		if err := unix.Unmount(ns.file.Name(), unix.MNT_DETACH); err != nil {
+			return fmt.Errorf("Failed to unmount namespace %s: %v", ns.file.Name(), err)
+		}
+		if err := os.RemoveAll(ns.file.Name()); err != nil {
+			return fmt.Errorf("Failed to clean up namespace %s: %v", ns.file.Name(), err)
+		}
+	}
+	return nil
+}
+
+func (ns *netNS) Do(toRun func(NetNS) error) error {
+	containedCall := func(hostNS NetNS) error {
+		threadNS, err := GetNS(getCurrentThreadNetNSPath())
+		if err != nil {
+			return fmt.Errorf("failed to open current netns: %v", err)
+		}
+		defer threadNS.Close()
+
+		// switch to target namespace
+		if err = ns.Set(); err != nil {
+			return fmt.Errorf("error switching to ns %v: %v", ns.file.Name(), err)
+		}
+		defer threadNS.Set() // switch back
+
+		return toRun(hostNS)
+	}
+
+	// save a handle to current network namespace
+	hostNS, err := GetNS(getCurrentThreadNetNSPath())
+	if err != nil {
+		return fmt.Errorf("Failed to open current namespace: %v", err)
+	}
+	defer hostNS.Close()
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	var innerError error
+	go func() {
+		defer wg.Done()
+		runtime.LockOSThread()
+		innerError = containedCall(hostNS)
+	}()
+	wg.Wait()
+
+	return innerError
+}
+
+func (ns *netNS) Set() error {
+	if _, _, err := unix.Syscall(unix.SYS_SETNS, ns.Fd(), uintptr(unix.CLONE_NEWNET), 0); err != 0 {
+		return fmt.Errorf("Error switching to ns %v: %v", ns.file.Name(), err)
 	}

 	return nil
@@ -33,46 +227,11 @@ func SetNS(f *os.File, flags uintptr) error {

 // WithNetNSPath executes the passed closure under the given network
 // namespace, restoring the original namespace afterwards.
-// Changing namespaces must be done on a goroutine that has been
-// locked to an OS thread. If lockThread arg is true, this function
-// locks the goroutine prior to change namespace and unlocks before
-// returning
-func WithNetNSPath(nspath string, lockThread bool, f func(*os.File) error) error {
-	ns, err := os.Open(nspath)
+func WithNetNSPath(nspath string, toRun func(NetNS) error) error {
+	ns, err := GetNS(nspath)
 	if err != nil {
 		return fmt.Errorf("Failed to open %v: %v", nspath, err)
 	}
 	defer ns.Close()
-	return WithNetNS(ns, lockThread, f)
-}
-
-// WithNetNS executes the passed closure under the given network
-// namespace, restoring the original namespace afterwards.
-// Changing namespaces must be done on a goroutine that has been
-// locked to an OS thread. If lockThread arg is true, this function
-// locks the goroutine prior to change namespace and unlocks before
-// returning.  If the closure returns an error, WithNetNS attempts to
-// restore the original namespace before returning.
-func WithNetNS(ns *os.File, lockThread bool, f func(*os.File) error) error {
-	if lockThread {
-		runtime.LockOSThread()
-		defer runtime.UnlockOSThread()
-	}
-	// save a handle to current (host) network namespace
-	thisNS, err := os.Open("/proc/self/ns/net")
-	if err != nil {
-		return fmt.Errorf("Failed to open /proc/self/ns/net: %v", err)
-	}
-	defer thisNS.Close()
-
-	if err = SetNS(ns, syscall.CLONE_NEWNET); err != nil {
-		return fmt.Errorf("Error switching to ns %v: %v", ns.Name(), err)
-	}
-	defer SetNS(thisNS, syscall.CLONE_NEWNET) // switch back
-
-	if err = f(thisNS); err != nil {
-		return err
-	}
-
-	return nil
+	return ns.Do(toRun)
 }
--- a/ns/ns_test.go
+++ b/ns/ns_test.go
@@ -17,109 +17,123 @@ package ns_test
 import (
 	"errors"
 	"fmt"
-	"math/rand"
 	"os"
-	"os/exec"
 	"path/filepath"

 	"github.com/containernetworking/cni/pkg/ns"
-	"github.com/containernetworking/cni/pkg/testhelpers"
 	. "github.com/onsi/ginkgo"
 	. "github.com/onsi/gomega"
+	"golang.org/x/sys/unix"
 )

+func getInodeCurNetNS() (uint64, error) {
+	curNS, err := ns.GetCurrentNS()
+	if err != nil {
+		return 0, err
+	}
+	defer curNS.Close()
+	return getInodeNS(curNS)
+}
+
+func getInodeNS(netns ns.NetNS) (uint64, error) {
+	return getInodeFd(int(netns.Fd()))
+}
+
+func getInode(path string) (uint64, error) {
+	file, err := os.Open(path)
+	if err != nil {
+		return 0, err
+	}
+	defer file.Close()
+	return getInodeFd(int(file.Fd()))
+}
+
+func getInodeFd(fd int) (uint64, error) {
+	stat := &unix.Stat_t{}
+	err := unix.Fstat(fd, stat)
+	return stat.Ino, err
+}
+
 var _ = Describe("Linux namespace operations", func() {
 	Describe("WithNetNS", func() {
 		var (
-			targetNetNSName string
-			targetNetNSPath string
-			targetNetNS     *os.File
+			originalNetNS ns.NetNS
+			targetNetNS   ns.NetNS
 		)

 		BeforeEach(func() {
 			var err error

-			targetNetNSName = fmt.Sprintf("test-netns-%d", rand.Int())
-
-			err = exec.Command("ip", "netns", "add", targetNetNSName).Run()
+			originalNetNS, err = ns.NewNS()
 			Expect(err).NotTo(HaveOccurred())

-			targetNetNSPath = filepath.Join("/var/run/netns/", targetNetNSName)
-			targetNetNS, err = os.Open(targetNetNSPath)
+			targetNetNS, err = ns.NewNS()
 			Expect(err).NotTo(HaveOccurred())
 		})

 		AfterEach(func() {
 			Expect(targetNetNS.Close()).To(Succeed())
-
-			err := exec.Command("ip", "netns", "del", targetNetNSName).Run()
-			Expect(err).NotTo(HaveOccurred())
+			Expect(originalNetNS.Close()).To(Succeed())
 		})

 		It("executes the callback within the target network namespace", func() {
-			expectedInode, err := testhelpers.GetInode(targetNetNSPath)
+			expectedInode, err := getInodeNS(targetNetNS)
 			Expect(err).NotTo(HaveOccurred())

-			var actualInode uint64
-			var innerErr error
-			err = ns.WithNetNS(targetNetNS, false, func(*os.File) error {
-				actualInode, innerErr = testhelpers.GetInodeCurNetNS()
+			err = targetNetNS.Do(func(ns.NetNS) error {
+				defer GinkgoRecover()
+
+				actualInode, err := getInodeCurNetNS()
+				Expect(err).NotTo(HaveOccurred())
+				Expect(actualInode).To(Equal(expectedInode))
 				return nil
 			})
 			Expect(err).NotTo(HaveOccurred())
-
-			Expect(innerErr).NotTo(HaveOccurred())
-			Expect(actualInode).To(Equal(expectedInode))
 		})

 		It("provides the original namespace as the argument to the callback", func() {
-			hostNSInode, err := testhelpers.GetInodeCurNetNS()
-			Expect(err).NotTo(HaveOccurred())
+			// Ensure we start in originalNetNS
+			err := originalNetNS.Do(func(ns.NetNS) error {
+				defer GinkgoRecover()

-			var inputNSInode uint64
-			var innerErr error
-			err = ns.WithNetNS(targetNetNS, false, func(inputNS *os.File) error {
-				inputNSInode, err = testhelpers.GetInodeF(inputNS)
+				origNSInode, err := getInodeNS(originalNetNS)
+				Expect(err).NotTo(HaveOccurred())
+
+				err = targetNetNS.Do(func(hostNS ns.NetNS) error {
+					defer GinkgoRecover()
+
+					hostNSInode, err := getInodeNS(hostNS)
+					Expect(err).NotTo(HaveOccurred())
+					Expect(hostNSInode).To(Equal(origNSInode))
+					return nil
+				})
 				return nil
 			})
 			Expect(err).NotTo(HaveOccurred())
-
-			Expect(innerErr).NotTo(HaveOccurred())
-			Expect(inputNSInode).To(Equal(hostNSInode))
-		})
-
-		It("restores the calling thread to the original network namespace", func() {
-			preTestInode, err := testhelpers.GetInodeCurNetNS()
-			Expect(err).NotTo(HaveOccurred())
-
-			err = ns.WithNetNS(targetNetNS, false, func(*os.File) error {
-				return nil
-			})
-			Expect(err).NotTo(HaveOccurred())
-
-			postTestInode, err := testhelpers.GetInodeCurNetNS()
-			Expect(err).NotTo(HaveOccurred())
-
-			Expect(postTestInode).To(Equal(preTestInode))
 		})

 		Context("when the callback returns an error", func() {
 			It("restores the calling thread to the original namespace before returning", func() {
-				preTestInode, err := testhelpers.GetInodeCurNetNS()
-				Expect(err).NotTo(HaveOccurred())
+				err := originalNetNS.Do(func(ns.NetNS) error {
+					defer GinkgoRecover()

-				_ = ns.WithNetNS(targetNetNS, false, func(*os.File) error {
-					return errors.New("potato")
+					preTestInode, err := getInodeCurNetNS()
+					Expect(err).NotTo(HaveOccurred())
+
+					_ = targetNetNS.Do(func(ns.NetNS) error {
+						return errors.New("potato")
+					})
+
+					postTestInode, err := getInodeCurNetNS()
+					Expect(err).NotTo(HaveOccurred())
+					Expect(postTestInode).To(Equal(preTestInode))
+					return nil
 				})
-
-				postTestInode, err := testhelpers.GetInodeCurNetNS()
 				Expect(err).NotTo(HaveOccurred())
-
-				Expect(postTestInode).To(Equal(preTestInode))
 			})

 			It("returns the error from the callback", func() {
-				err := ns.WithNetNS(targetNetNS, false, func(*os.File) error {
+				err := targetNetNS.Do(func(ns.NetNS) error {
 					return errors.New("potato")
 				})
 				Expect(err).To(MatchError("potato"))
@@ -128,16 +142,40 @@ var _ = Describe("Linux namespace operations", func() {

 		Describe("validating inode mapping to namespaces", func() {
 			It("checks that different namespaces have different inodes", func() {
-				hostNSInode, err := testhelpers.GetInodeCurNetNS()
+				origNSInode, err := getInodeNS(originalNetNS)
 				Expect(err).NotTo(HaveOccurred())

-				testNsInode, err := testhelpers.GetInode(targetNetNSPath)
+				testNsInode, err := getInodeNS(targetNetNS)
 				Expect(err).NotTo(HaveOccurred())

-				Expect(hostNSInode).NotTo(Equal(0))
 				Expect(testNsInode).NotTo(Equal(0))
-				Expect(testNsInode).NotTo(Equal(hostNSInode))
+				Expect(testNsInode).NotTo(Equal(origNSInode))
+			})
+
+			It("should not leak a closed netns onto any threads in the process", func() {
+				By("creating a new netns")
+				createdNetNS, err := ns.NewNS()
+				Expect(err).NotTo(HaveOccurred())
+
+				By("discovering the inode of the created netns")
+				createdNetNSInode, err := getInodeNS(createdNetNS)
+				Expect(err).NotTo(HaveOccurred())
+				createdNetNS.Close()
+
+				By("comparing against the netns inode of every thread in the process")
+				for _, netnsPath := range allNetNSInCurrentProcess() {
+					netnsInode, err := getInode(netnsPath)
+					Expect(err).NotTo(HaveOccurred())
+					Expect(netnsInode).NotTo(Equal(createdNetNSInode))
+				}
 			})
 		})
 	})
 })
+
+func allNetNSInCurrentProcess() []string {
+	pid := unix.Getpid()
+	paths, err := filepath.Glob(fmt.Sprintf("/proc/%d/task/*/ns/net", pid))
+	Expect(err).NotTo(HaveOccurred())
+	return paths
+}