diff --git a/plugins/meta/portmap/README.md b/plugins/meta/portmap/README.md index fc6b86c9..748c5080 100644 --- a/plugins/meta/portmap/README.md +++ b/plugins/meta/portmap/README.md @@ -8,6 +8,8 @@ You should use this plugin as part of a network configuration list. It accepts the following configuration options: * `snat` - boolean, default true. If true or omitted, set up the SNAT chains +* `markMasqBit` - int, (0-31), default 13. The mark bit to use for masquerading (see section SNAT). Cannot be set when `externalSetMarkChain` is used. +* `externalSetMarkChain` - string, default nil. If you already have a Masquerade mark chain (e.g. Kubernetes), specify it here. This will use that instead of creating a separate chain. When this is set, `markMasqBit` must be unspecified. * `conditionsV4`, `conditionsV6` - array of strings. A list of arbitrary `iptables` matches to add to the per-container rule. This may be useful if you wish to exclude specific IPs from port-mapping @@ -15,7 +17,7 @@ exclude specific IPs from port-mapping The plugin expects to receive the actual list of port mappings via the `portMappings` [capability argument](https://github.com/containernetworking/cni/blob/master/CONVENTIONS.md) -So a sample standalone config list (with the file extension .conflist) might +A sample standalone config list for Kubernetes (with the file extension .conflist) might look like: ```json @@ -39,21 +41,31 @@ look like: { "type": "portmap", "capabilities": {"portMappings": true}, - "snat": false, - "conditionsV4": ["!", "-d", "192.0.2.0/24"], - "conditionsV6": ["!", "-d", "fc00::/7"] + "externalSetMarkChain": "KUBE-MARK-MASQ" } ] } ``` +A configuration file with all options set: +```json +{ + "type": "portmap", + "capabilities": {"portMappings": true}, + "snat": true, + "markMasqBit": 13, + "externalSetMarkChain": "CNI-HOSTPORT-SETMARK", + "conditionsV4": ["!", "-d", "192.0.2.0/24"], + "conditionsV6": ["!", "-d", "fc00::/7"] +} +``` + ## Rule structure The plugin sets up two sequences of chains and rules - one "primary" DNAT sequence to rewrite the destination, and one additional SNAT sequence that -rewrites the source address for packets from localhost. The sequence is somewhat -complex to minimize the number of rules non-forwarded packets must traverse. +will masquerade traffic as needed. ### DNAT @@ -68,50 +80,54 @@ rules look like this: - `--dst-type LOCAL -j CNI-HOSTPORT-DNAT` `CNI-HOSTPORT-DNAT` chain: -- `${ConditionsV4/6} -j CNI-DN-xxxxxx` (where xxxxxx is a function of the ContainerID and network name) +- `${ConditionsV4/6} -p tcp --destination-ports 8080,8043 -j CNI-DN-xxxxxx` (where xxxxxx is a function of the ContainerID and network name) -`CNI-DN-xxxxxx` chain: -- `-p tcp --dport 8080 -j DNAT --to-destination 172.16.30.2:80` +`CNI-HOSTPORT-SETMARK` chain: +- `-j MARK --set-xmark 0x2000/0x2000` + +`CNI-DN-xxxxxx` chain: +- `-p tcp -s 172.16.30.2 --dport 8080 -j CNI-HOSTPORT-SETMARK` (masquerade hairpin traffic) +- `-p tcp -s 127.0.0.1 --dport 8080 -j CNI-HOSTPORT-SETMARK` (masquerade localhost traffic) +- `-p tcp --dport 8080 -j DNAT --to-destination 172.16.30.2:80` (rewrite destination) +- `-p tcp -s 172.16.30.2 --dport 8043 -j CNI-HOSTPORT-SETMARK` +- `-p tcp -s 127.0.0.1 --dport 8043 -j CNI-HOSTPORT-SETMARK` - `-p tcp --dport 8043 -j DNAT --to-destination 172.16.30.2:443` New connections to the host will have to traverse every rule, so large numbers of port forwards may have a performance impact. This won't affect established connections, just the first packet. -### SNAT -The SNAT rule enables port-forwarding from the localhost IP on the host. -This rule rewrites (masquerades) the source address for connections from -localhost. If this rule did not exist, a connection to `localhost:80` would -still have a source IP of 127.0.0.1 when received by the container, so no -packets would respond. Again, it is a sequence of 3 chains. Because SNAT has to -occur in the `POSTROUTING` chain, the packet has already been through the DNAT -chain. +### SNAT (Masquerade) +Some packets also need to have the source address rewritten: +* connections from localhost +* Hairpin traffic back to the container. + +In the DNAT chain, a bit is set on the mark for packets that need snat. This +chain performs that masquerading. By default, bit 13 is set, but this is +configurable. If you are using other tools that also use the iptables mark, +you should make sure this doesn't conflict. + +Some container runtimes, most notably Kubernetes, already have a set of rules +for masquerading when a specific mark bit is set. If so enabled, the plugin +will use that chain instead. `POSTROUTING`: -- `-s 127.0.0.1 ! -d 127.0.0.1 -j CNI-HOSTPORT-SNAT` +- `-j CNI-HOSTPORT-MASQ` -`CNI-HOSTPORT-SNAT`: -- `-j CNI-SN-xxxxx` - -`CNI-SN-xxxxx`: -- `-p tcp -s 127.0.0.1 -d 172.16.30.2 --dport 80 -j MASQUERADE` -- `-p tcp -s 127.0.0.1 -d 172.16.30.2 --dport 443 -j MASQUERADE` - -Only new connections from the host, where the source address is 127.0.0.1 but -not the destination will traverse this chain. It is unlikely that any packets -will reach these rules without being SNATted, so the cost should be minimal. +`CNI-HOSTPORT-MASQ`: +- `--mark 0x2000 -j MASQUERADE` Because MASQUERADE happens in POSTROUTING, it means that packets with source ip -127.0.0.1 need to pass a routing boundary. By default, that is not allowed -in Linux. So, need to enable the sysctl `net.ipv4.conf.IFNAME.route_localnet`, -where IFNAME is the name of the host-side interface that routes traffic to the -container. +127.0.0.1 need to first pass a routing boundary before being masqueraded. By +default, that is not allowed in Linux. So, the plugin needs to enable the sysctl +`net.ipv4.conf.IFNAME.route_localnet`, where IFNAME is the name of the host-side +interface that routes traffic to the container. -There is no equivalent to `route_localnet` for ipv6, so SNAT does not work -for ipv6. If you need port forwarding from localhost, your container must have -an ipv4 address. +There is no equivalent to `route_localnet` for ipv6, so connections to ::1 +will not be portmapped for ipv6. If you need port forwarding from localhost, +your container must have an ipv4 address. ## Known issues - ipsets could improve efficiency -- SNAT does not work with ipv6. +- forwarding from localhost does not work with ipv6. diff --git a/plugins/meta/portmap/chain.go b/plugins/meta/portmap/chain.go index f8a53a45..5ebfe6a3 100644 --- a/plugins/meta/portmap/chain.go +++ b/plugins/meta/portmap/chain.go @@ -25,12 +25,14 @@ import ( type chain struct { table string name string - entryRule []string // the rule that enters this chain entryChains []string // the chains to add the entry rule + + entryRules [][]string // the rules that "point" to this chain + rules [][]string // the rules this chain contains } // setup idempotently creates the chain. It will not error if the chain exists. -func (c *chain) setup(ipt *iptables.IPTables, rules [][]string) error { +func (c *chain) setup(ipt *iptables.IPTables) error { // create the chain exists, err := chainExists(ipt, c.table, c.name) if err != nil { @@ -43,17 +45,21 @@ func (c *chain) setup(ipt *iptables.IPTables, rules [][]string) error { } // Add the rules to the chain - for i := len(rules) - 1; i >= 0; i-- { - if err := prependUnique(ipt, c.table, c.name, rules[i]); err != nil { + for i := len(c.rules) - 1; i >= 0; i-- { + if err := prependUnique(ipt, c.table, c.name, c.rules[i]); err != nil { return err } } - // Add the entry rules - entryRule := append(c.entryRule, "-j", c.name) + // Add the entry rules to the entry chains for _, entryChain := range c.entryChains { - if err := prependUnique(ipt, c.table, entryChain, entryRule); err != nil { - return err + for i := len(c.entryRules) - 1; i >= 0; i-- { + r := []string{} + r = append(r, c.entryRules[i]...) + r = append(r, "-j", c.name) + if err := prependUnique(ipt, c.table, entryChain, r); err != nil { + return err + } } } diff --git a/plugins/meta/portmap/chain_test.go b/plugins/meta/portmap/chain_test.go index 5cc4cf64..bff85c12 100644 --- a/plugins/meta/portmap/chain_test.go +++ b/plugins/meta/portmap/chain_test.go @@ -49,8 +49,12 @@ var _ = Describe("chain tests", func() { testChain = chain{ table: TABLE, name: chainName, - entryRule: []string{"-d", "203.0.113.1"}, entryChains: []string{tlChainName}, + entryRules: [][]string{{"-d", "203.0.113.1"}}, + rules: [][]string{ + {"-m", "comment", "--comment", "test 1", "-j", "RETURN"}, + {"-m", "comment", "--comment", "test 2", "-j", "RETURN"}, + }, } ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv4) @@ -90,11 +94,7 @@ var _ = Describe("chain tests", func() { Expect(err).NotTo(HaveOccurred()) // Create the chain - chainRules := [][]string{ - {"-m", "comment", "--comment", "test 1", "-j", "RETURN"}, - {"-m", "comment", "--comment", "test 2", "-j", "RETURN"}, - } - err = testChain.setup(ipt, chainRules) + err = testChain.setup(ipt) Expect(err).NotTo(HaveOccurred()) // Verify the chain exists @@ -151,15 +151,11 @@ var _ = Describe("chain tests", func() { It("creates chains idempotently", func() { defer cleanup() - // Create the chain - chainRules := [][]string{ - {"-m", "comment", "--comment", "test", "-j", "RETURN"}, - } - err := testChain.setup(ipt, chainRules) + err := testChain.setup(ipt) Expect(err).NotTo(HaveOccurred()) // Create it again! - err = testChain.setup(ipt, chainRules) + err = testChain.setup(ipt) Expect(err).NotTo(HaveOccurred()) // Make sure there are only two rules @@ -167,18 +163,14 @@ var _ = Describe("chain tests", func() { rules, err := ipt.List(TABLE, testChain.name) Expect(err).NotTo(HaveOccurred()) - Expect(len(rules)).To(Equal(2)) + Expect(len(rules)).To(Equal(3)) }) It("deletes chains idempotently", func() { defer cleanup() - // Create the chain - chainRules := [][]string{ - {"-m", "comment", "--comment", "test", "-j", "RETURN"}, - } - err := testChain.setup(ipt, chainRules) + err := testChain.setup(ipt) Expect(err).NotTo(HaveOccurred()) err = testChain.teardown(ipt) diff --git a/plugins/meta/portmap/main.go b/plugins/meta/portmap/main.go index 6424a777..dfc52994 100644 --- a/plugins/meta/portmap/main.go +++ b/plugins/meta/portmap/main.go @@ -47,10 +47,12 @@ type PortMapEntry struct { type PortMapConf struct { types.NetConf - SNAT *bool `json:"snat,omitempty"` - ConditionsV4 *[]string `json:"conditionsV4"` - ConditionsV6 *[]string `json:"conditionsV6"` - RuntimeConfig struct { + SNAT *bool `json:"snat,omitempty"` + ConditionsV4 *[]string `json:"conditionsV4"` + ConditionsV6 *[]string `json:"conditionsV6"` + MarkMasqBit *int `json:"markMasqBit"` + ExternalSetMarkChain *string `json:"externalSetMarkChain"` + RuntimeConfig struct { PortMaps []PortMapEntry `json:"portMappings,omitempty"` } `json:"runtimeConfig,omitempty"` RawPrevResult map[string]interface{} `json:"prevResult,omitempty"` @@ -63,6 +65,10 @@ type PortMapConf struct { ContIPv6 net.IP `json:"-"` } +// The default mark bit to signal that masquerading is required +// Kubernetes uses 14 and 15, Calico uses 20-31. +const DefaultMarkBit = 13 + func cmdAdd(args *skel.CmdArgs) error { netConf, err := parseConfig(args.StdinData, args.IfName) if err != nil { @@ -145,6 +151,19 @@ func parseConfig(stdin []byte, ifName string) (*PortMapConf, error) { conf.SNAT = &tvar } + if conf.MarkMasqBit != nil && conf.ExternalSetMarkChain != nil { + return nil, fmt.Errorf("Cannot specify externalSetMarkChain and markMasqBit") + } + + if conf.MarkMasqBit == nil { + bvar := DefaultMarkBit // go constants are "special" + conf.MarkMasqBit = &bvar + } + + if *conf.MarkMasqBit < 0 || *conf.MarkMasqBit > 31 { + return nil, fmt.Errorf("MasqMarkBit must be between 0 and 31") + } + // Reject invalid port numbers for _, pm := range conf.RuntimeConfig.PortMaps { if pm.ContainerPort <= 0 { diff --git a/plugins/meta/portmap/portmap.go b/plugins/meta/portmap/portmap.go index 133dfef2..870552f6 100644 --- a/plugins/meta/portmap/portmap.go +++ b/plugins/meta/portmap/portmap.go @@ -17,6 +17,7 @@ package main import ( "fmt" "net" + "sort" "strconv" "github.com/containernetworking/plugins/pkg/utils/sysctl" @@ -24,33 +25,26 @@ import ( ) // This creates the chains to be added to iptables. The basic structure is -// a bit complex for efficiencies sake. We create 2 chains: a summary chain +// a bit complex for efficiency's sake. We create 2 chains: a summary chain // that is shared between invocations, and an invocation (container)-specific // chain. This minimizes the number of operations on the top level, but allows // for easy cleanup. // -// We also create DNAT chains to rewrite destinations, and SNAT chains so that -// connections to localhost work. -// // The basic setup (all operations are on the nat table) is: // // DNAT case (rewrite destination IP and port): -// PREROUTING, OUTPUT: --dst-type local -j CNI-HOSTPORT_DNAT -// CNI-HOSTPORT-DNAT: -j CNI-DN-abcd123 +// PREROUTING, OUTPUT: --dst-type local -j CNI-HOSTPORT-DNAT +// CNI-HOSTPORT-DNAT: --destination-ports 8080,8081 -j CNI-DN-abcd123 // CNI-DN-abcd123: -p tcp --dport 8080 -j DNAT --to-destination 192.0.2.33:80 // CNI-DN-abcd123: -p tcp --dport 8081 -j DNAT ... -// -// SNAT case (rewrite source IP from localhost after dnat): -// POSTROUTING: -s 127.0.0.1 ! -d 127.0.0.1 -j CNI-HOSTPORT-SNAT -// CNI-HOSTPORT-SNAT: -j CNI-SN-abcd123 -// CNI-SN-abcd123: -p tcp -s 127.0.0.1 -d 192.0.2.33 --dport 80 -j MASQUERADE -// CNI-SN-abcd123: -p tcp -s 127.0.0.1 -d 192.0.2.33 --dport 90 -j MASQUERADE // The names of the top-level summary chains. // These should never be changed, or else upgrading will require manual // intervention. const TopLevelDNATChainName = "CNI-HOSTPORT-DNAT" -const TopLevelSNATChainName = "CNI-HOSTPORT-SNAT" +const SetMarkChainName = "CNI-HOSTPORT-SETMARK" +const MarkMasqChainName = "CNI-HOSTPORT-MASQ" +const OldTopLevelSNATChainName = "CNI-HOSTPORT-SNAT" // forwardPorts establishes port forwarding to a given container IP. // containerIP can be either v4 or v6. @@ -59,48 +53,35 @@ func forwardPorts(config *PortMapConf, containerIP net.IP) error { var ipt *iptables.IPTables var err error - var conditions *[]string if isV6 { ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv6) - conditions = config.ConditionsV6 } else { ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv4) - conditions = config.ConditionsV4 } if err != nil { return fmt.Errorf("failed to open iptables: %v", err) } - toplevelDnatChain := genToplevelDnatChain() - if err := toplevelDnatChain.setup(ipt, nil); err != nil { - return fmt.Errorf("failed to create top-level DNAT chain: %v", err) - } + // Enable masquerading for traffic as necessary. + // The DNAT chain sets a mark bit for traffic that needs masq: + // - connections from localhost + // - hairpin traffic back to the container + // Idempotently create the rule that masquerades traffic with this mark. + // Need to do this first; the DNAT rules reference these chains + if *config.SNAT { + if config.ExternalSetMarkChain == nil { + setMarkChain := genSetMarkChain(*config.MarkMasqBit) + if err := setMarkChain.setup(ipt); err != nil { + return fmt.Errorf("unable to create chain %s: %v", setMarkChain.name, err) + } - dnatChain := genDnatChain(config.Name, config.ContainerID, conditions) - _ = dnatChain.teardown(ipt) // If we somehow collide on this container ID + network, cleanup - - dnatRules := dnatRules(config.RuntimeConfig.PortMaps, containerIP) - if err := dnatChain.setup(ipt, dnatRules); err != nil { - return fmt.Errorf("unable to setup DNAT: %v", err) - } - - // Enable SNAT for connections to localhost. - // This won't work for ipv6, since the kernel doesn't have the equvalent - // route_localnet sysctl. - if *config.SNAT && !isV6 { - toplevelSnatChain := genToplevelSnatChain(isV6) - if err := toplevelSnatChain.setup(ipt, nil); err != nil { - return fmt.Errorf("failed to create top-level SNAT chain: %v", err) + masqChain := genMarkMasqChain(*config.MarkMasqBit) + if err := masqChain.setup(ipt); err != nil { + return fmt.Errorf("unable to create chain %s: %v", setMarkChain.name, err) + } } - snatChain := genSnatChain(config.Name, config.ContainerID) - _ = snatChain.teardown(ipt) - - snatRules := snatRules(config.RuntimeConfig.PortMaps, containerIP) - if err := snatChain.setup(ipt, snatRules); err != nil { - return fmt.Errorf("unable to setup SNAT: %v", err) - } if !isV6 { // Set the route_localnet bit on the host interface, so that // 127/8 can cross a routing boundary. @@ -113,6 +94,20 @@ func forwardPorts(config *PortMapConf, containerIP net.IP) error { } } + // Generate the DNAT (actual port forwarding) rules + toplevelDnatChain := genToplevelDnatChain() + if err := toplevelDnatChain.setup(ipt); err != nil { + return fmt.Errorf("failed to create top-level DNAT chain: %v", err) + } + + dnatChain := genDnatChain(config.Name, config.ContainerID) + // First, idempotently tear down this chain in case there was some + // sort of collision or bad state. + fillDnatRules(&dnatChain, config, containerIP) + if err := dnatChain.setup(ipt); err != nil { + return fmt.Errorf("unable to setup DNAT: %v", err) + } + return nil } @@ -124,106 +119,153 @@ func genToplevelDnatChain() chain { return chain{ table: "nat", name: TopLevelDNATChainName, - entryRule: []string{ + entryRules: [][]string{{ "-m", "addrtype", "--dst-type", "LOCAL", - }, + }}, entryChains: []string{"PREROUTING", "OUTPUT"}, } } // genDnatChain creates the per-container chain. // Conditions are any static entry conditions for the chain. -func genDnatChain(netName, containerID string, conditions *[]string) chain { - name := formatChainName("DN-", netName, containerID) - comment := fmt.Sprintf(`dnat name: "%s" id: "%s"`, netName, containerID) - - ch := chain{ - table: "nat", - name: name, - entryRule: []string{ - "-m", "comment", - "--comment", comment, - }, +func genDnatChain(netName, containerID string) chain { + return chain{ + table: "nat", + name: formatChainName("DN-", netName, containerID), entryChains: []string{TopLevelDNATChainName}, } - if conditions != nil && len(*conditions) != 0 { - ch.entryRule = append(ch.entryRule, *conditions...) - } - - return ch } // dnatRules generates the destination NAT rules, one per port, to direct // traffic from hostip:hostport to podip:podport -func dnatRules(entries []PortMapEntry, containerIP net.IP) [][]string { - out := make([][]string, 0, len(entries)) +func fillDnatRules(c *chain, config *PortMapConf, containerIP net.IP) { + isV6 := (containerIP.To4() == nil) + comment := trimComment(fmt.Sprintf(`dnat name: "%s" id: "%s"`, config.Name, config.ContainerID)) + entries := config.RuntimeConfig.PortMaps + setMarkChainName := SetMarkChainName + if config.ExternalSetMarkChain != nil { + setMarkChainName = *config.ExternalSetMarkChain + } + + //Generate the dnat entry rules. We'll use multiport, but it ony accepts + // up to 15 rules, so partition the list if needed. + // Do it in a stable order for testing + protoPorts := groupByProto(entries) + protos := []string{} + for proto := range protoPorts { + protos = append(protos, proto) + } + sort.Strings(protos) + for _, proto := range protos { + for _, portSpec := range splitPortList(protoPorts[proto]) { + r := []string{ + "-m", "comment", + "--comment", comment, + "-m", "multiport", + "-p", proto, + "--destination-ports", portSpec, + } + + if isV6 && config.ConditionsV6 != nil && len(*config.ConditionsV6) > 0 { + r = append(r, *config.ConditionsV6...) + } else if !isV6 && config.ConditionsV4 != nil && len(*config.ConditionsV4) > 0 { + r = append(r, *config.ConditionsV4...) + } + c.entryRules = append(c.entryRules, r) + } + } + + // For every entry, generate 3 rules: + // - mark hairpin for masq + // - mark localhost for masq (for v4) + // - do dnat + // the ordering is important here; the mark rules must be first. + c.rules = make([][]string, 0, 3*len(entries)) for _, entry := range entries { - rule := []string{ + ruleBase := []string{ "-p", entry.Protocol, "--dport", strconv.Itoa(entry.HostPort)} - if entry.HostIP != "" { - rule = append(rule, + ruleBase = append(ruleBase, "-d", entry.HostIP) } - rule = append(rule, + // Add mark-to-masquerade rules for hairpin and localhost + if *config.SNAT { + // hairpin + hpRule := make([]string, len(ruleBase), len(ruleBase)+4) + copy(hpRule, ruleBase) + + hpRule = append(hpRule, + "-s", containerIP.String(), + "-j", setMarkChainName, + ) + c.rules = append(c.rules, hpRule) + + if !isV6 { + // localhost + localRule := make([]string, len(ruleBase), len(ruleBase)+4) + copy(localRule, ruleBase) + + localRule = append(localRule, + "-s", "127.0.0.1", + "-j", setMarkChainName, + ) + c.rules = append(c.rules, localRule) + } + } + + // The actual dnat rule + dnatRule := make([]string, len(ruleBase), len(ruleBase)+4) + copy(dnatRule, ruleBase) + dnatRule = append(dnatRule, "-j", "DNAT", - "--to-destination", fmtIpPort(containerIP, entry.ContainerPort)) - - out = append(out, rule) - } - return out -} - -// genToplevelSnatChain creates the top-level summary snat chain. -// IMPORTANT: do not change this, or else upgrading plugins will require -// manual intervention -func genToplevelSnatChain(isV6 bool) chain { - return chain{ - table: "nat", - name: TopLevelSNATChainName, - entryRule: []string{ - "-s", localhostIP(isV6), - "!", "-d", localhostIP(isV6), - }, - entryChains: []string{"POSTROUTING"}, + "--to-destination", fmtIpPort(containerIP, entry.ContainerPort), + ) + c.rules = append(c.rules, dnatRule) } } -// genSnatChain creates the snat (localhost) chain for this container. -func genSnatChain(netName, containerID string) chain { - name := formatChainName("SN-", netName, containerID) - comment := fmt.Sprintf(`snat name: "%s" id: "%s"`, netName, containerID) - - return chain{ +// genSetMarkChain creates the SETMARK chain - the chain that sets the +// "to-be-masqueraded" mark and returns. +// Chains are idempotent, so we'll always create this. +func genSetMarkChain(markBit int) chain { + markValue := 1 << uint(markBit) + markDef := fmt.Sprintf("%#x/%#x", markValue, markValue) + ch := chain{ table: "nat", - name: name, - entryRule: []string{ + name: SetMarkChainName, + rules: [][]string{{ "-m", "comment", - "--comment", comment, - }, - entryChains: []string{TopLevelSNATChainName}, + "--comment", "CNI portfwd masquerade mark", + "-j", "MARK", + "--set-xmark", markDef, + }}, } + return ch } -// snatRules sets up masquerading for connections to localhost:hostport, -// rewriting the source so that returning packets are correct. -func snatRules(entries []PortMapEntry, containerIP net.IP) [][]string { - isV6 := (containerIP.To4() == nil) - - out := make([][]string, 0, len(entries)) - for _, entry := range entries { - out = append(out, []string{ - "-p", entry.Protocol, - "-s", localhostIP(isV6), - "-d", containerIP.String(), - "--dport", strconv.Itoa(entry.ContainerPort), +// genMarkMasqChain creates the chain that masquerades all packets marked +// in the SETMARK chain +func genMarkMasqChain(markBit int) chain { + markValue := 1 << uint(markBit) + markDef := fmt.Sprintf("%#x/%#x", markValue, markValue) + ch := chain{ + table: "nat", + name: MarkMasqChainName, + entryChains: []string{"POSTROUTING"}, + entryRules: [][]string{{ + "-m", "comment", + "--comment", "CNI portfwd requiring masquerade", + }}, + rules: [][]string{{ + "-m", "mark", + "--mark", markDef, "-j", "MASQUERADE", - }) + }}, } - return out + return ch } // enableLocalnetRouting tells the kernel not to treat 127/8 as a martian, @@ -234,6 +276,18 @@ func enableLocalnetRouting(ifName string) error { return err } +// genOldSnatChain is no longer used, but used to be created. We'll try and +// tear it down in case the plugin version changed between ADD and DEL +func genOldSnatChain(netName, containerID string) chain { + name := formatChainName("SN-", netName, containerID) + + return chain{ + table: "nat", + name: name, + entryChains: []string{OldTopLevelSNATChainName}, + } +} + // unforwardPorts deletes any iptables rules created by this plugin. // It should be idempotent - it will not error if the chain does not exist. // @@ -245,8 +299,10 @@ func enableLocalnetRouting(ifName string) error { // So, we first check that iptables is "generally OK" by doing a check. If // not, we ignore the error, unless neither v4 nor v6 are OK. func unforwardPorts(config *PortMapConf) error { - dnatChain := genDnatChain(config.Name, config.ContainerID, nil) - snatChain := genSnatChain(config.Name, config.ContainerID) + dnatChain := genDnatChain(config.Name, config.ContainerID) + + // Might be lying around from old versions + oldSnatChain := genOldSnatChain(config.Name, config.ContainerID) ip4t := maybeGetIptables(false) ip6t := maybeGetIptables(true) @@ -258,16 +314,14 @@ func unforwardPorts(config *PortMapConf) error { if err := dnatChain.teardown(ip4t); err != nil { return fmt.Errorf("could not teardown ipv4 dnat: %v", err) } - if err := snatChain.teardown(ip4t); err != nil { - return fmt.Errorf("could not teardown ipv4 snat: %v", err) - } + oldSnatChain.teardown(ip4t) } if ip6t != nil { if err := dnatChain.teardown(ip6t); err != nil { return fmt.Errorf("could not teardown ipv6 dnat: %v", err) } - // no SNAT teardown because it doesn't work for v6 + oldSnatChain.teardown(ip6t) } return nil } diff --git a/plugins/meta/portmap/portmap_integ_test.go b/plugins/meta/portmap/portmap_integ_test.go index 91a7518f..7bd9e0cf 100644 --- a/plugins/meta/portmap/portmap_integ_test.go +++ b/plugins/meta/portmap/portmap_integ_test.go @@ -15,12 +15,14 @@ package main import ( + "bytes" "fmt" "math/rand" "net" "os" + "os/exec" "path/filepath" - "time" + "strconv" "github.com/containernetworking/cni/libcni" "github.com/containernetworking/cni/pkg/types/current" @@ -124,13 +126,20 @@ var _ = Describe("portmap integration tests", func() { // we'll also manually check the iptables chains ipt, err := iptables.NewWithProtocol(iptables.ProtocolIPv4) Expect(err).NotTo(HaveOccurred()) - dnatChainName := genDnatChain("cni-portmap-unit-test", runtimeConfig.ContainerID, nil).name + dnatChainName := genDnatChain("cni-portmap-unit-test", runtimeConfig.ContainerID).name // Create the network resI, err := cniConf.AddNetworkList(configList, &runtimeConfig) Expect(err).NotTo(HaveOccurred()) defer deleteNetwork() + // Undo Docker's forwarding policy + cmd := exec.Command("iptables", "-t", "filter", + "-P", "FORWARD", "ACCEPT") + cmd.Stderr = GinkgoWriter + err = cmd.Run() + Expect(err).NotTo(HaveOccurred()) + // Check the chain exists _, err = ipt.List("nat", dnatChainName) Expect(err).NotTo(HaveOccurred()) @@ -155,13 +164,16 @@ var _ = Describe("portmap integration tests", func() { hostIP, hostPort, contIP, containerPort) // Sanity check: verify that the container is reachable directly - contOK := testEchoServer(fmt.Sprintf("%s:%d", contIP.String(), containerPort)) + contOK := testEchoServer(contIP.String(), containerPort, "") // Verify that a connection to the forwarded port works - dnatOK := testEchoServer(fmt.Sprintf("%s:%d", hostIP, hostPort)) + dnatOK := testEchoServer(hostIP, hostPort, "") // Verify that a connection to localhost works - snatOK := testEchoServer(fmt.Sprintf("%s:%d", "127.0.0.1", hostPort)) + snatOK := testEchoServer("127.0.0.1", hostPort, "") + + // verify that hairpin works + hairpinOK := testEchoServer(hostIP, hostPort, targetNS.Path()) // Cleanup session.Terminate() @@ -182,6 +194,9 @@ var _ = Describe("portmap integration tests", func() { if !snatOK { Fail("connection to 127.0.0.1 was not forwarded") } + if !hairpinOK { + Fail("Hairpin connection failed") + } close(done) @@ -189,40 +204,33 @@ var _ = Describe("portmap integration tests", func() { }) // testEchoServer returns true if we found an echo server on the port -func testEchoServer(address string) bool { - fmt.Fprintln(GinkgoWriter, "dialing", address) - conn, err := net.Dial("tcp", address) - if err != nil { - fmt.Fprintln(GinkgoWriter, "connection to", address, "failed:", err) - return false - } - defer conn.Close() - - conn.SetDeadline(time.Now().Add(TIMEOUT * time.Second)) - fmt.Fprintln(GinkgoWriter, "connected to", address) - +func testEchoServer(address string, port int, netns string) bool { message := "Aliquid melius quam pessimum optimum non est." - _, err = fmt.Fprint(conn, message) + + bin, err := exec.LookPath("nc") + Expect(err).NotTo(HaveOccurred()) + var cmd *exec.Cmd + if netns != "" { + netns = filepath.Base(netns) + cmd = exec.Command("ip", "netns", "exec", netns, bin, "-v", address, strconv.Itoa(port)) + } else { + cmd = exec.Command("nc", address, strconv.Itoa(port)) + } + cmd.Stdin = bytes.NewBufferString(message) + cmd.Stderr = GinkgoWriter + out, err := cmd.Output() if err != nil { - fmt.Fprintln(GinkgoWriter, "sending message to", address, " failed:", err) + fmt.Fprintln(GinkgoWriter, "got non-zero exit from ", cmd.Args) return false } - conn.SetDeadline(time.Now().Add(TIMEOUT * time.Second)) - fmt.Fprintln(GinkgoWriter, "reading...") - response := make([]byte, len(message)) - _, err = conn.Read(response) - if err != nil { - fmt.Fprintln(GinkgoWriter, "receiving message from", address, " failed:", err) + if string(out) != message { + fmt.Fprintln(GinkgoWriter, "returned message didn't match?") + fmt.Fprintln(GinkgoWriter, string(out)) return false } - fmt.Fprintln(GinkgoWriter, "read...") - if string(response) == message { - return true - } - fmt.Fprintln(GinkgoWriter, "returned message didn't match?") - return false + return true } func getLocalIP() string { diff --git a/plugins/meta/portmap/portmap_test.go b/plugins/meta/portmap/portmap_test.go index cf5ed5fb..d47b483c 100644 --- a/plugins/meta/portmap/portmap_test.go +++ b/plugins/meta/portmap/portmap_test.go @@ -15,6 +15,7 @@ package main import ( + "fmt" "net" . "github.com/onsi/ginkgo" @@ -25,13 +26,6 @@ var _ = Describe("portmapping configuration", func() { netName := "testNetName" containerID := "icee6giejonei6sohng6ahngee7laquohquee9shiGo7fohferakah3Feiyoolu2pei7ciPhoh7shaoX6vai3vuf0ahfaeng8yohb9ceu0daez5hashee8ooYai5wa3y" - mappings := []PortMapEntry{ - {80, 90, "tcp", ""}, - {1000, 2000, "udp", ""}, - } - ipv4addr := net.ParseIP("192.2.0.1") - ipv6addr := net.ParseIP("2001:db8::1") - Context("config parsing", func() { It("Correctly parses an ADD config", func() { configBytes := []byte(`{ @@ -156,101 +150,179 @@ var _ = Describe("portmapping configuration", func() { Describe("Generating chains", func() { Context("for DNAT", func() { - It("generates a correct container chain", func() { - ch := genDnatChain(netName, containerID, &[]string{"-m", "hello"}) + It("generates a correct standard container chain", func() { + ch := genDnatChain(netName, containerID) Expect(ch).To(Equal(chain{ - table: "nat", - name: "CNI-DN-bfd599665540dd91d5d28", - entryRule: []string{ - "-m", "comment", - "--comment", `dnat name: "testNetName" id: "` + containerID + `"`, - "-m", "hello", - }, + table: "nat", + name: "CNI-DN-bfd599665540dd91d5d28", entryChains: []string{TopLevelDNATChainName}, })) + configBytes := []byte(`{ + "name": "test", + "type": "portmap", + "cniVersion": "0.3.1", + "runtimeConfig": { + "portMappings": [ + { "hostPort": 8080, "containerPort": 80, "protocol": "tcp"}, + { "hostPort": 8081, "containerPort": 80, "protocol": "tcp"}, + { "hostPort": 8080, "containerPort": 81, "protocol": "udp"}, + { "hostPort": 8082, "containerPort": 82, "protocol": "udp"} + ] + }, + "snat": true, + "conditionsV4": ["a", "b"], + "conditionsV6": ["c", "d"] +}`) + + conf, err := parseConfig(configBytes, "foo") + Expect(err).NotTo(HaveOccurred()) + conf.ContainerID = containerID + + ch = genDnatChain(conf.Name, containerID) + Expect(ch).To(Equal(chain{ + table: "nat", + name: "CNI-DN-67e92b96e692a494b6b85", + entryChains: []string{"CNI-HOSTPORT-DNAT"}, + })) + + fillDnatRules(&ch, conf, net.ParseIP("10.0.0.2")) + + Expect(ch.entryRules).To(Equal([][]string{ + {"-m", "comment", "--comment", + fmt.Sprintf("dnat name: \"test\" id: \"%s\"", containerID), + "-m", "multiport", + "-p", "tcp", + "--destination-ports", "8080,8081", + "a", "b"}, + {"-m", "comment", "--comment", + fmt.Sprintf("dnat name: \"test\" id: \"%s\"", containerID), + "-m", "multiport", + "-p", "udp", + "--destination-ports", "8080,8082", + "a", "b"}, + })) + + Expect(ch.rules).To(Equal([][]string{ + {"-p", "tcp", "--dport", "8080", "-s", "10.0.0.2", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8080", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + {"-p", "tcp", "--dport", "8081", "-s", "10.0.0.2", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8081", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8081", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + {"-p", "udp", "--dport", "8080", "-s", "10.0.0.2", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8080", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:81"}, + {"-p", "udp", "--dport", "8082", "-s", "10.0.0.2", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8082", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8082", "-j", "DNAT", "--to-destination", "10.0.0.2:82"}, + })) + + ch.rules = nil + ch.entryRules = nil + + fillDnatRules(&ch, conf, net.ParseIP("2001:db8::2")) + + Expect(ch.rules).To(Equal([][]string{ + {"-p", "tcp", "--dport", "8080", "-s", "2001:db8::2", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "[2001:db8::2]:80"}, + {"-p", "tcp", "--dport", "8081", "-s", "2001:db8::2", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8081", "-j", "DNAT", "--to-destination", "[2001:db8::2]:80"}, + {"-p", "udp", "--dport", "8080", "-s", "2001:db8::2", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8080", "-j", "DNAT", "--to-destination", "[2001:db8::2]:81"}, + {"-p", "udp", "--dport", "8082", "-s", "2001:db8::2", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8082", "-j", "DNAT", "--to-destination", "[2001:db8::2]:82"}, + })) + + // Disable snat, generate rules + ch.rules = nil + ch.entryRules = nil + fvar := false + conf.SNAT = &fvar + + fillDnatRules(&ch, conf, net.ParseIP("10.0.0.2")) + Expect(ch.rules).To(Equal([][]string{ + {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + {"-p", "tcp", "--dport", "8081", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + {"-p", "udp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:81"}, + {"-p", "udp", "--dport", "8082", "-j", "DNAT", "--to-destination", "10.0.0.2:82"}, + })) + }) + + It("generates a correct chain with external mark", func() { + ch := genDnatChain(netName, containerID) + + Expect(ch).To(Equal(chain{ + table: "nat", + name: "CNI-DN-bfd599665540dd91d5d28", + entryChains: []string{TopLevelDNATChainName}, + })) + configBytes := []byte(`{ + "name": "test", + "type": "portmap", + "cniVersion": "0.3.1", + "runtimeConfig": { + "portMappings": [ + { "hostPort": 8080, "containerPort": 80, "protocol": "tcp"} + ] + }, + "externalSetMarkChain": "PLZ-SET-MARK", + "conditionsV4": ["a", "b"], + "conditionsV6": ["c", "d"] +}`) + + conf, err := parseConfig(configBytes, "foo") + Expect(err).NotTo(HaveOccurred()) + conf.ContainerID = containerID + + ch = genDnatChain(conf.Name, containerID) + fillDnatRules(&ch, conf, net.ParseIP("10.0.0.2")) + Expect(ch.rules).To(Equal([][]string{ + {"-p", "tcp", "--dport", "8080", "-s", "10.0.0.2", "-j", "PLZ-SET-MARK"}, + {"-p", "tcp", "--dport", "8080", "-s", "127.0.0.1", "-j", "PLZ-SET-MARK"}, + {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + })) }) It("generates a correct top-level chain", func() { ch := genToplevelDnatChain() Expect(ch).To(Equal(chain{ - table: "nat", - name: "CNI-HOSTPORT-DNAT", - entryRule: []string{ - "-m", "addrtype", - "--dst-type", "LOCAL", - }, + table: "nat", + name: "CNI-HOSTPORT-DNAT", entryChains: []string{"PREROUTING", "OUTPUT"}, + entryRules: [][]string{{"-m", "addrtype", "--dst-type", "LOCAL"}}, })) }) - }) - - Context("for SNAT", func() { - It("generates a correct container chain", func() { - ch := genSnatChain(netName, containerID) + It("generates the correct mark chains", func() { + masqBit := 5 + ch := genSetMarkChain(masqBit) Expect(ch).To(Equal(chain{ table: "nat", - name: "CNI-SN-bfd599665540dd91d5d28", - entryRule: []string{ + name: "CNI-HOSTPORT-SETMARK", + rules: [][]string{{ "-m", "comment", - "--comment", `snat name: "testNetName" id: "` + containerID + `"`, - }, - entryChains: []string{TopLevelSNATChainName}, + "--comment", "CNI portfwd masquerade mark", + "-j", "MARK", + "--set-xmark", "0x20/0x20", + }}, })) - }) - It("generates a correct top-level chain", func() { - Context("for ipv4", func() { - ch := genToplevelSnatChain(false) - Expect(ch).To(Equal(chain{ - table: "nat", - name: "CNI-HOSTPORT-SNAT", - entryRule: []string{ - "-s", "127.0.0.1", - "!", "-d", "127.0.0.1", - }, - entryChains: []string{"POSTROUTING"}, - })) - }) - }) - }) - }) - - Describe("Forwarding rules", func() { - Context("for DNAT", func() { - It("generates correct ipv4 rules", func() { - rules := dnatRules(mappings, ipv4addr) - Expect(rules).To(Equal([][]string{ - {"-p", "tcp", "--dport", "80", "-j", "DNAT", "--to-destination", "192.2.0.1:90"}, - {"-p", "udp", "--dport", "1000", "-j", "DNAT", "--to-destination", "192.2.0.1:2000"}, - })) - }) - It("generates correct ipv6 rules", func() { - rules := dnatRules(mappings, ipv6addr) - Expect(rules).To(Equal([][]string{ - {"-p", "tcp", "--dport", "80", "-j", "DNAT", "--to-destination", "[2001:db8::1]:90"}, - {"-p", "udp", "--dport", "1000", "-j", "DNAT", "--to-destination", "[2001:db8::1]:2000"}, - })) - }) - }) - - Context("for SNAT", func() { - - It("generates correct ipv4 rules", func() { - rules := snatRules(mappings, ipv4addr) - Expect(rules).To(Equal([][]string{ - {"-p", "tcp", "-s", "127.0.0.1", "-d", "192.2.0.1", "--dport", "90", "-j", "MASQUERADE"}, - {"-p", "udp", "-s", "127.0.0.1", "-d", "192.2.0.1", "--dport", "2000", "-j", "MASQUERADE"}, - })) - }) - - It("generates correct ipv6 rules", func() { - rules := snatRules(mappings, ipv6addr) - Expect(rules).To(Equal([][]string{ - {"-p", "tcp", "-s", "::1", "-d", "2001:db8::1", "--dport", "90", "-j", "MASQUERADE"}, - {"-p", "udp", "-s", "::1", "-d", "2001:db8::1", "--dport", "2000", "-j", "MASQUERADE"}, + ch = genMarkMasqChain(masqBit) + Expect(ch).To(Equal(chain{ + table: "nat", + name: "CNI-HOSTPORT-MASQ", + entryChains: []string{"POSTROUTING"}, + entryRules: [][]string{{ + "-m", "comment", + "--comment", "CNI portfwd requiring masquerade", + }}, + rules: [][]string{{ + "-m", "mark", + "--mark", "0x20/0x20", + "-j", "MASQUERADE", + }}, })) }) }) diff --git a/plugins/meta/portmap/utils.go b/plugins/meta/portmap/utils.go index a0c9b33b..6d6bc298 100644 --- a/plugins/meta/portmap/utils.go +++ b/plugins/meta/portmap/utils.go @@ -18,6 +18,8 @@ import ( "crypto/sha512" "fmt" "net" + "strconv" + "strings" "github.com/vishvananda/netlink" ) @@ -65,3 +67,51 @@ func formatChainName(prefix, name, id string) string { chain := fmt.Sprintf("CNI-%s%x", prefix, chainBytes) return chain[:maxChainNameLength] } + +// groupByProto groups port numbers by protocol +func groupByProto(entries []PortMapEntry) map[string][]int { + if len(entries) == 0 { + return map[string][]int{} + } + out := map[string][]int{} + for _, e := range entries { + _, ok := out[e.Protocol] + if ok { + out[e.Protocol] = append(out[e.Protocol], e.HostPort) + } else { + out[e.Protocol] = []int{e.HostPort} + } + } + + return out +} + +// splitPortList splits a list of integers in to one or more comma-separated +// string values, for use by multiport. Multiport only allows up to 15 ports +// per entry. +func splitPortList(l []int) []string { + out := []string{} + + acc := []string{} + for _, i := range l { + acc = append(acc, strconv.Itoa(i)) + if len(acc) == 15 { + out = append(out, strings.Join(acc, ",")) + acc = []string{} + } + } + + if len(acc) > 0 { + out = append(out, strings.Join(acc, ",")) + } + return out +} + +// trimComment makes sure no comment is over the iptables limit of 255 chars +func trimComment(val string) string { + if len(val) <= 255 { + return val + } + + return val[0:253] + "..." +}