// Copyright 2023 CNI authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package ip import ( "context" "fmt" "net" "strings" "sigs.k8s.io/knftables" "github.com/containernetworking/cni/pkg/types" "github.com/containernetworking/plugins/pkg/utils" ) const ( ipMasqTableName = "cni_plugins_masquerade" ipMasqChainName = "masq_checks" ) // The nftables ipmasq implementation is mostly like the iptables implementation, with // minor updates to fix a bug (adding `ifname`) and to allow future GC support. // // We add a rule for each mapping, with a comment containing a hash of its identifiers, // so that we can later reliably delete the rules we want. (This is important because in // edge cases, it's possible the plugin might see "ADD container A with IP 192.168.1.3", // followed by "ADD container B with IP 192.168.1.3" followed by "DEL container A with IP // 192.168.1.3", and we need to make sure that the DEL causes us to delete the rule for // container A, and not the rule for container B.) // // It would be more nftables-y to have a chain with a single rule doing a lookup against a // set with an element per mapping, rather than having a chain with a rule per mapping. // But there's no easy, non-racy way to say "delete the element 192.168.1.3 from the set, // but only if it was added for container A, not if it was added for container B". // hashForNetwork returns a unique hash for this network func hashForNetwork(network string) string { return utils.MustFormatHashWithPrefix(16, "", network) } // hashForInstance returns a unique hash identifying the rules for this // network/ifname/containerID func hashForInstance(network, ifname, containerID string) string { return hashForNetwork(network) + "-" + utils.MustFormatHashWithPrefix(16, "", ifname+":"+containerID) } // commentForInstance returns a comment string that begins with a unique hash and // ends with a (possibly-truncated) human-readable description. func commentForInstance(network, ifname, containerID string) string { comment := fmt.Sprintf("%s, net: %s, if: %s, id: %s", hashForInstance(network, ifname, containerID), strings.ReplaceAll(network, `"`, ``), strings.ReplaceAll(ifname, `"`, ``), strings.ReplaceAll(containerID, `"`, ``), ) if len(comment) > knftables.CommentLengthMax { comment = comment[:knftables.CommentLengthMax] } return comment } // setupIPMasqNFTables is the nftables-based implementation of SetupIPMasqForNetwork func setupIPMasqNFTables(ipn *net.IPNet, network, ifname, containerID string) error { nft, err := knftables.New(knftables.InetFamily, ipMasqTableName) if err != nil { return err } return setupIPMasqNFTablesWithInterface(nft, ipn, network, ifname, containerID) } func setupIPMasqNFTablesWithInterface(nft knftables.Interface, ipn *net.IPNet, network, ifname, containerID string) error { staleRules, err := findRules(nft, hashForInstance(network, ifname, containerID)) if err != nil { return err } tx := nft.NewTransaction() // Ensure that our table and chains exist. tx.Add(&knftables.Table{ Comment: knftables.PtrTo("Masquerading for plugins from github.com/containernetworking/plugins"), }) tx.Add(&knftables.Chain{ Name: ipMasqChainName, Comment: knftables.PtrTo("Masquerade traffic from certain IPs to any (non-multicast) IP outside their subnet"), }) // Ensure that the postrouting chain exists and has the correct rules. (Has to be // done after creating ipMasqChainName, so we can jump to it.) tx.Add(&knftables.Chain{ Name: "postrouting", Type: knftables.PtrTo(knftables.NATType), Hook: knftables.PtrTo(knftables.PostroutingHook), Priority: knftables.PtrTo(knftables.SNATPriority), }) tx.Flush(&knftables.Chain{ Name: "postrouting", }) tx.Add(&knftables.Rule{ Chain: "postrouting", Rule: "ip daddr == 224.0.0.0/4 return", }) tx.Add(&knftables.Rule{ Chain: "postrouting", Rule: "ip6 daddr == ff00::/8 return", }) tx.Add(&knftables.Rule{ Chain: "postrouting", Rule: knftables.Concat( "goto", ipMasqChainName, ), }) // Delete stale rules, add new rules to masquerade chain for _, rule := range staleRules { tx.Delete(rule) } ip := "ip" if ipn.IP.To4() == nil { ip = "ip6" } // e.g. if ipn is "192.168.1.4/24", then dstNet is "192.168.1.0/24" dstNet := &net.IPNet{IP: ipn.IP.Mask(ipn.Mask), Mask: ipn.Mask} tx.Add(&knftables.Rule{ Chain: ipMasqChainName, Rule: knftables.Concat( ip, "saddr", "==", ipn.IP, ip, "daddr", "!=", dstNet, "masquerade", ), Comment: knftables.PtrTo(commentForInstance(network, ifname, containerID)), }) return nft.Run(context.TODO(), tx) } // teardownIPMasqNFTables is the nftables-based implementation of TeardownIPMasqForNetwork func teardownIPMasqNFTables(ipn *net.IPNet, network, ifname, containerID string) error { nft, err := knftables.New(knftables.InetFamily, ipMasqTableName) if err != nil { return err } return teardownIPMasqNFTablesWithInterface(nft, ipn, network, ifname, containerID) } func teardownIPMasqNFTablesWithInterface(nft knftables.Interface, _ *net.IPNet, network, ifname, containerID string) error { rules, err := findRules(nft, hashForInstance(network, ifname, containerID)) if err != nil { return err } else if len(rules) == 0 { return nil } tx := nft.NewTransaction() for _, rule := range rules { tx.Delete(rule) } return nft.Run(context.TODO(), tx) } // gcIPMasqNFTables is the nftables-based implementation of GCIPMasqForNetwork func gcIPMasqNFTables(network string, attachments []types.GCAttachment) error { nft, err := knftables.New(knftables.InetFamily, ipMasqTableName) if err != nil { return err } return gcIPMasqNFTablesWithInterface(nft, network, attachments) } func gcIPMasqNFTablesWithInterface(nft knftables.Interface, network string, attachments []types.GCAttachment) error { // Find all rules for the network rules, err := findRules(nft, hashForNetwork(network)) if err != nil { return err } else if len(rules) == 0 { return nil } // Compute the comments for all elements of attachments validAttachments := map[string]bool{} for _, attachment := range attachments { validAttachments[commentForInstance(network, attachment.IfName, attachment.ContainerID)] = true } // Delete anything in rules that isn't in validAttachments tx := nft.NewTransaction() for _, rule := range rules { if !validAttachments[*rule.Comment] { tx.Delete(rule) } } return nft.Run(context.TODO(), tx) } // findRules finds rules with comments that start with commentPrefix. func findRules(nft knftables.Interface, commentPrefix string) ([]*knftables.Rule, error) { rules, err := nft.ListRules(context.TODO(), ipMasqChainName) if err != nil { if knftables.IsNotFound(err) { // If ipMasqChainName doesn't exist yet, that's fine return nil, nil } return nil, err } matchingRules := make([]*knftables.Rule, 0, 1) for _, rule := range rules { if rule.Comment != nil && strings.HasPrefix(*rule.Comment, commentPrefix) { matchingRules = append(matchingRules, rule) } } return matchingRules, nil }