From 94be1cfaabec001d2f5c61a1832436e7fb49f9ef Mon Sep 17 00:00:00 2001 From: Eugene Yakubovich Date: Thu, 17 Sep 2015 11:51:14 -0700 Subject: [PATCH 1/5] revendoring netlink --- Godeps/Godeps.json | 2 +- .../github.com/vishvananda/netlink/README.md | 8 +- .../github.com/vishvananda/netlink/addr.go | 4 +- .../vishvananda/netlink/addr_linux.go | 18 +- .../github.com/vishvananda/netlink/class.go | 110 +++++ .../vishvananda/netlink/class_linux.go | 144 ++++++ .../vishvananda/netlink/class_test.go | 102 +++++ .../github.com/vishvananda/netlink/filter.go | 55 +++ .../vishvananda/netlink/filter_linux.go | 191 ++++++++ .../vishvananda/netlink/filter_test.go | 91 ++++ .../github.com/vishvananda/netlink/link.go | 40 +- .../vishvananda/netlink/link_linux.go | 101 +++-- .../vishvananda/netlink/link_test.go | 188 +++++++- .../vishvananda/netlink/neigh_linux.go | 2 +- .../vishvananda/netlink/nl/link_linux.go | 10 +- .../vishvananda/netlink/nl/nl_linux.go | 27 +- .../vishvananda/netlink/nl/route_linux.go | 9 + .../vishvananda/netlink/nl/tc_linux.go | 425 ++++++++++++++++++ .../vishvananda/netlink/nl/tc_linux_test.go | 173 +++++++ .../vishvananda/netlink/nl/xfrm_linux.go | 7 +- .../vishvananda/netlink/protinfo.go | 2 +- .../github.com/vishvananda/netlink/qdisc.go | 167 +++++++ .../vishvananda/netlink/qdisc_linux.go | 316 +++++++++++++ .../vishvananda/netlink/qdisc_test.go | 171 +++++++ .../github.com/vishvananda/netlink/route.go | 6 + .../vishvananda/netlink/route_linux.go | 124 ++--- .../vishvananda/netlink/route_test.go | 62 +++ .../vishvananda/netlink/xfrm_policy_linux.go | 2 +- .../vishvananda/netlink/xfrm_state_linux.go | 2 +- 29 files changed, 2442 insertions(+), 117 deletions(-) create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/class.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/class_linux.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/class_test.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/filter.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/filter_linux.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/filter_test.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux_test.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_linux.go create mode 100644 Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_test.go diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 56691c81..d1de7671 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -24,7 +24,7 @@ }, { "ImportPath": "github.com/vishvananda/netlink", - "Rev": "ae3e7dba57271b4e976c4f91637861ee477135e2" + "Rev": "ecf47fd5739b3d2c3daf7c89c4b9715a2605c21b" }, { "ImportPath": "golang.org/x/sys/unix", diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/README.md b/Godeps/_workspace/src/github.com/vishvananda/netlink/README.md index 555f8865..8cd50a93 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/README.md +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/README.md @@ -43,13 +43,19 @@ import ( ) func main() { - mybridge := &netlink.Bridge{netlink.LinkAttrs{Name: "foo"}} + la := netlink.NewLinkAttrs() + la.Name = "foo" + mybridge := &netlink.Bridge{la}} _ := netlink.LinkAdd(mybridge) eth1, _ := netlink.LinkByName("eth1") netlink.LinkSetMaster(eth1, mybridge) } ``` +Note `NewLinkAttrs` constructor, it sets default values in structure. For now +it sets only `TxQLen` to `-1`, so kernel will set default by itself. If you're +using simple initialization(`LinkAttrs{Name: "foo"}`) `TxQLen` will be set to +`0` unless you specify it like `LinkAttrs{Name: "foo", TxQLen: 1000}`. Add a new ip address to loopback: diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/addr.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/addr.go index 5c12f4e9..9bbaf508 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/addr.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/addr.go @@ -14,8 +14,8 @@ type Addr struct { } // String returns $ip/$netmask $label -func (addr Addr) String() string { - return fmt.Sprintf("%s %s", addr.IPNet, addr.Label) +func (a Addr) String() string { + return fmt.Sprintf("%s %s", a.IPNet, a.Label) } // ParseAddr parses the string representation of an address in the diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/addr_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/addr_linux.go index dd26f4ae..19aac0fb 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/addr_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/addr_linux.go @@ -81,7 +81,7 @@ func AddrList(link Link, family int) ([]Addr, error) { index = base.Index } - res := make([]Addr, 0) + var res []Addr for _, m := range msgs { msg := nl.DeserializeIfAddrmsg(m) @@ -95,11 +95,17 @@ func AddrList(link Link, family int) ([]Addr, error) { return nil, err } + var local, dst *net.IPNet var addr Addr for _, attr := range attrs { switch attr.Attr.Type { case syscall.IFA_ADDRESS: - addr.IPNet = &net.IPNet{ + dst = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)), + } + case syscall.IFA_LOCAL: + local = &net.IPNet{ IP: attr.Value, Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)), } @@ -107,6 +113,14 @@ func AddrList(link Link, family int) ([]Addr, error) { addr.Label = string(attr.Value[:len(attr.Value)-1]) } } + + // IFA_LOCAL should be there but if not, fall back to IFA_ADDRESS + if local != nil { + addr.IPNet = local + } else { + addr.IPNet = dst + } + res = append(res, addr) } diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/class.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/class.go new file mode 100644 index 00000000..35bdb331 --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/class.go @@ -0,0 +1,110 @@ +package netlink + +import ( + "fmt" +) + +type Class interface { + Attrs() *ClassAttrs + Type() string +} + +// Class represents a netlink class. A filter is associated with a link, +// has a handle and a parent. The root filter of a device should have a +// parent == HANDLE_ROOT. +type ClassAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Leaf uint32 +} + +func (q ClassAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf) +} + +type HtbClassAttrs struct { + // TODO handle all attributes + Rate uint64 + Ceil uint64 + Buffer uint32 + Cbuffer uint32 + Quantum uint32 + Level uint32 + Prio uint32 +} + +func (q HtbClassAttrs) String() string { + return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer) +} + +// Htb class +type HtbClass struct { + ClassAttrs + Rate uint64 + Ceil uint64 + Buffer uint32 + Cbuffer uint32 + Quantum uint32 + Level uint32 + Prio uint32 +} + +func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { + mtu := 1600 + rate := cattrs.Rate / 8 + ceil := cattrs.Ceil / 8 + buffer := cattrs.Buffer + cbuffer := cattrs.Cbuffer + if ceil == 0 { + ceil = rate + } + + if buffer == 0 { + buffer = uint32(float64(rate)/Hz() + float64(mtu)) + } + buffer = uint32(Xmittime(rate, buffer)) + + if cbuffer == 0 { + cbuffer = uint32(float64(ceil)/Hz() + float64(mtu)) + } + cbuffer = uint32(Xmittime(ceil, cbuffer)) + + return &HtbClass{ + ClassAttrs: attrs, + Rate: rate, + Ceil: ceil, + Buffer: buffer, + Cbuffer: cbuffer, + Quantum: 10, + Level: 0, + Prio: 0, + } +} + +func (q HtbClass) String() string { + return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer) +} + +func (class *HtbClass) Attrs() *ClassAttrs { + return &class.ClassAttrs +} + +func (class *HtbClass) Type() string { + return "htb" +} + +// GenericClass classes represent types that are not currently understood +// by this netlink library. +type GenericClass struct { + ClassAttrs + ClassType string +} + +func (class *GenericClass) Attrs() *ClassAttrs { + return &class.ClassAttrs +} + +func (class *GenericClass) Type() string { + return class.ClassType +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/class_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/class_linux.go new file mode 100644 index 00000000..3dcc542b --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/class_linux.go @@ -0,0 +1,144 @@ +package netlink + +import ( + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// ClassDel will delete a class from the system. +// Equivalent to: `tc class del $class` +func ClassDel(class Class) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELTCLASS, syscall.NLM_F_ACK) + base := class.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// ClassAdd will add a class to the system. +// Equivalent to: `tc class add $class` +func ClassAdd(class Class) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + base := class.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if htb, ok := class.(*HtbClass); ok { + opt := nl.TcHtbCopt{} + opt.Rate.Rate = uint32(htb.Rate) + opt.Ceil.Rate = uint32(htb.Ceil) + opt.Buffer = htb.Buffer + opt.Cbuffer = htb.Cbuffer + opt.Quantum = htb.Quantum + opt.Level = htb.Level + opt.Prio = htb.Prio + // TODO: Handle Debug properly. For now default to 0 + nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize()) + } + req.AddData(options) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// ClassList gets a list of classes in the system. +// Equivalent to: `tc class show`. +// Generally retunrs nothing if link and parent are not specified. +func ClassList(link Link, parent uint32) ([]Class, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP) + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Parent: parent, + } + if link != nil { + base := link.Attrs() + ensureIndex(base) + msg.Ifindex = int32(base.Index) + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS) + if err != nil { + return nil, err + } + + var res []Class + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := ClassAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + } + + var class Class + classType := "" + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + classType = string(attr.Value[:len(attr.Value)-1]) + switch classType { + case "htb": + class = &HtbClass{} + default: + class = &GenericClass{ClassType: classType} + } + case nl.TCA_OPTIONS: + switch classType { + case "htb": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + _, err = parseHtbClassData(class, data) + if err != nil { + return nil, err + } + } + } + } + *class.Attrs() = base + res = append(res, class) + } + + return res, nil +} + +func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) { + htb := class.(*HtbClass) + detailed := false + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_HTB_PARMS: + opt := nl.DeserializeTcHtbCopt(datum.Value) + htb.Rate = uint64(opt.Rate.Rate) + htb.Ceil = uint64(opt.Ceil.Rate) + htb.Buffer = opt.Buffer + htb.Cbuffer = opt.Cbuffer + htb.Quantum = opt.Quantum + htb.Level = opt.Level + htb.Prio = opt.Prio + } + } + return detailed, nil +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/class_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/class_test.go new file mode 100644 index 00000000..d163f73e --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/class_test.go @@ -0,0 +1,102 @@ +package netlink + +import ( + "testing" +) + +func TestClassAddDel(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil { + t.Fatal(err) + } + if err := LinkAdd(&Ifb{LinkAttrs{Name: "bar"}}); err != nil { + t.Fatal(err) + } + link, err := LinkByName("foo") + if err != nil { + t.Fatal(err) + } + if err := LinkSetUp(link); err != nil { + t.Fatal(err) + } + attrs := QdiscAttrs{ + LinkIndex: link.Attrs().Index, + Handle: MakeHandle(0xffff, 0), + Parent: HANDLE_ROOT, + } + qdisc := NewHtb(attrs) + if err := QdiscAdd(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err := QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 1 { + t.Fatal("Failed to add qdisc") + } + _, ok := qdiscs[0].(*Htb) + if !ok { + t.Fatal("Qdisc is the wrong type") + } + + classattrs := ClassAttrs{ + LinkIndex: link.Attrs().Index, + Parent: MakeHandle(0xffff, 0), + Handle: MakeHandle(0xffff, 2), + } + + htbclassattrs := HtbClassAttrs{ + Rate: 1234000, + Cbuffer: 1690, + } + class := NewHtbClass(classattrs, htbclassattrs) + if err := ClassAdd(class); err != nil { + t.Fatal(err) + } + classes, err := ClassList(link, MakeHandle(0xffff, 2)) + if err != nil { + t.Fatal(err) + } + if len(classes) != 1 { + t.Fatal("Failed to add class") + } + + htb, ok := classes[0].(*HtbClass) + if !ok { + t.Fatal("Class is the wrong type") + } + if htb.Rate != class.Rate { + t.Fatal("Rate doesn't match") + } + if htb.Ceil != class.Ceil { + t.Fatal("Ceil doesn't match") + } + if htb.Buffer != class.Buffer { + t.Fatal("Buffer doesn't match") + } + if htb.Cbuffer != class.Cbuffer { + t.Fatal("Cbuffer doesn't match") + } + if err := ClassDel(class); err != nil { + t.Fatal(err) + } + classes, err = ClassList(link, MakeHandle(0xffff, 0)) + if err != nil { + t.Fatal(err) + } + if len(classes) != 0 { + t.Fatal("Failed to remove class") + } + if err := QdiscDel(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err = QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 0 { + t.Fatal("Failed to remove qdisc") + } +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/filter.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/filter.go new file mode 100644 index 00000000..83ad7007 --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/filter.go @@ -0,0 +1,55 @@ +package netlink + +import ( + "fmt" +) + +type Filter interface { + Attrs() *FilterAttrs + Type() string +} + +// Filter represents a netlink filter. A filter is associated with a link, +// has a handle and a parent. The root filter of a device should have a +// parent == HANDLE_ROOT. +type FilterAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Priority uint16 // lower is higher priority + Protocol uint16 // syscall.ETH_P_* +} + +func (q FilterAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol) +} + +// U32 filters on many packet related properties +type U32 struct { + FilterAttrs + // Currently only supports redirecting to another interface + RedirIndex int +} + +func (filter *U32) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *U32) Type() string { + return "u32" +} + +// GenericFilter filters represent types that are not currently understood +// by this netlink library. +type GenericFilter struct { + FilterAttrs + FilterType string +} + +func (filter *GenericFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *GenericFilter) Type() string { + return filter.FilterType +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/filter_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/filter_linux.go new file mode 100644 index 00000000..1ec69870 --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/filter_linux.go @@ -0,0 +1,191 @@ +package netlink + +import ( + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// FilterDel will delete a filter from the system. +// Equivalent to: `tc filter del $filter` +func FilterDel(filter Filter) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), + } + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// FilterAdd will add a filter to the system. +// Equivalent to: `tc filter add $filter` +func FilterAdd(filter Filter) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if u32, ok := filter.(*U32); ok { + // match all + sel := nl.TcU32Sel{ + Nkeys: 1, + Flags: nl.TC_U32_TERMINAL, + } + sel.Keys = append(sel.Keys, nl.TcU32Key{}) + nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize()) + actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil) + table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil) + nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred")) + // redirect to other interface + mir := nl.TcMirred{ + Action: nl.TC_ACT_STOLEN, + Eaction: nl.TCA_EGRESS_REDIR, + Ifindex: uint32(u32.RedirIndex), + } + aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil) + nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize()) + } + req.AddData(options) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// FilterList gets a list of filters in the system. +// Equivalent to: `tc filter show`. +// Generally retunrs nothing if link and parent are not specified. +func FilterList(link Link, parent uint32) ([]Filter, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP) + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Parent: parent, + } + if link != nil { + base := link.Attrs() + ensureIndex(base) + msg.Ifindex = int32(base.Index) + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER) + if err != nil { + return nil, err + } + + var res []Filter + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := FilterAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + } + base.Priority, base.Protocol = MajorMinor(msg.Info) + base.Protocol = nl.Swap16(base.Protocol) + + var filter Filter + filterType := "" + detailed := false + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + filterType = string(attr.Value[:len(attr.Value)-1]) + switch filterType { + case "u32": + filter = &U32{} + default: + filter = &GenericFilter{FilterType: filterType} + } + case nl.TCA_OPTIONS: + switch filterType { + case "u32": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + detailed, err = parseU32Data(filter, data) + if err != nil { + return nil, err + } + } + } + } + // only return the detailed version of the filter + if detailed { + *filter.Attrs() = base + res = append(res, filter) + } + } + + return res, nil +} + +func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + native = nl.NativeEndian() + u32 := filter.(*U32) + detailed := false + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_U32_SEL: + detailed = true + sel := nl.DeserializeTcU32Sel(datum.Value) + // only parse if we have a very basic redirect + if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 { + return detailed, nil + } + case nl.TCA_U32_ACT: + table, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB { + return detailed, fmt.Errorf("Action table not formed properly") + } + aattrs, err := nl.ParseRouteAttr(table[0].Value) + for _, aattr := range aattrs { + switch aattr.Attr.Type { + case nl.TCA_KIND: + actionType := string(aattr.Value[:len(aattr.Value)-1]) + // only parse if the action is mirred + if actionType != "mirred" { + return detailed, nil + } + case nl.TCA_OPTIONS: + adata, err := nl.ParseRouteAttr(aattr.Value) + if err != nil { + return detailed, err + } + for _, adatum := range adata { + switch adatum.Attr.Type { + case nl.TCA_MIRRED_PARMS: + mir := nl.DeserializeTcMirred(adatum.Value) + u32.RedirIndex = int(mir.Ifindex) + } + } + } + } + } + } + return detailed, nil +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/filter_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/filter_test.go new file mode 100644 index 00000000..206699cf --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/filter_test.go @@ -0,0 +1,91 @@ +package netlink + +import ( + "syscall" + "testing" +) + +func TestFilterAddDel(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil { + t.Fatal(err) + } + if err := LinkAdd(&Ifb{LinkAttrs{Name: "bar"}}); err != nil { + t.Fatal(err) + } + link, err := LinkByName("foo") + if err != nil { + t.Fatal(err) + } + if err := LinkSetUp(link); err != nil { + t.Fatal(err) + } + redir, err := LinkByName("bar") + if err != nil { + t.Fatal(err) + } + if err := LinkSetUp(redir); err != nil { + t.Fatal(err) + } + qdisc := &Ingress{ + QdiscAttrs: QdiscAttrs{ + LinkIndex: link.Attrs().Index, + Handle: MakeHandle(0xffff, 0), + Parent: HANDLE_INGRESS, + }, + } + if err := QdiscAdd(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err := QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 1 { + t.Fatal("Failed to add qdisc") + } + _, ok := qdiscs[0].(*Ingress) + if !ok { + t.Fatal("Qdisc is the wrong type") + } + filter := &U32{ + FilterAttrs: FilterAttrs{ + LinkIndex: link.Attrs().Index, + Parent: MakeHandle(0xffff, 0), + Priority: 1, + Protocol: syscall.ETH_P_IP, + }, + RedirIndex: redir.Attrs().Index, + } + if err := FilterAdd(filter); err != nil { + t.Fatal(err) + } + filters, err := FilterList(link, MakeHandle(0xffff, 0)) + if err != nil { + t.Fatal(err) + } + if len(filters) != 1 { + t.Fatal("Failed to add filter") + } + if err := FilterDel(filter); err != nil { + t.Fatal(err) + } + filters, err = FilterList(link, MakeHandle(0xffff, 0)) + if err != nil { + t.Fatal(err) + } + if len(filters) != 0 { + t.Fatal("Failed to remove filter") + } + if err := QdiscDel(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err = QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 0 { + t.Fatal("Failed to remove qdisc") + } +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/link.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/link.go index 0cb6fc37..18fd1759 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/link.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/link.go @@ -19,7 +19,7 @@ type ( type LinkAttrs struct { Index int MTU int - TxQLen uint32 // Transmit Queue Length + TxQLen int // Transmit Queue Length Name string HardwareAddr net.HardwareAddr Flags net.Flags @@ -28,6 +28,13 @@ type LinkAttrs struct { Namespace interface{} // nil | NsPid | NsFd } +// NewLinkAttrs returns LinkAttrs structure filled with default values +func NewLinkAttrs() LinkAttrs { + return LinkAttrs{ + TxQLen: -1, + } +} + // Device links cannot be created via netlink. These links // are links created by udev like 'lo' and 'etho0' type Device struct { @@ -55,6 +62,19 @@ func (dummy *Dummy) Type() string { return "dummy" } +// Ifb links are advanced dummy devices for packet filtering +type Ifb struct { + LinkAttrs +} + +func (ifb *Ifb) Attrs() *LinkAttrs { + return &ifb.LinkAttrs +} + +func (ifb *Ifb) Type() string { + return "ifb" +} + // Bridge links are simple linux bridges type Bridge struct { LinkAttrs @@ -107,6 +127,15 @@ func (macvlan *Macvlan) Type() string { return "macvlan" } +// Macvtap - macvtap is a virtual interfaces based on macvlan +type Macvtap struct { + Macvlan +} + +func (macvtap Macvtap) Type() string { + return "macvtap" +} + // Veth devices must specify PeerName on create type Veth struct { LinkAttrs @@ -121,18 +150,18 @@ func (veth *Veth) Type() string { return "veth" } -// Generic links represent types that are not currently understood +// GenericLink links represent types that are not currently understood // by this netlink library. -type Generic struct { +type GenericLink struct { LinkAttrs LinkType string } -func (generic *Generic) Attrs() *LinkAttrs { +func (generic *GenericLink) Attrs() *LinkAttrs { return &generic.LinkAttrs } -func (generic *Generic) Type() string { +func (generic *GenericLink) Type() string { return generic.LinkType } @@ -150,6 +179,7 @@ type Vxlan struct { L2miss bool L3miss bool NoAge bool + GBP bool Age int Limit int Port int diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/link_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/link_linux.go index 8b0da0d5..842a21f3 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/link_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/link_linux.go @@ -48,7 +48,7 @@ func LinkSetUp(link Link) error { return err } -// LinkSetUp disables link device. +// LinkSetDown disables link device. // Equivalent to: `ip link set $link down` func LinkSetDown(link Link) error { base := link.Attrs() @@ -73,10 +73,7 @@ func LinkSetMTU(link Link, mtu int) error { req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(base.Index) - msg.Change = nl.DEFAULT_CHANGE req.AddData(msg) b := make([]byte, 4) @@ -97,10 +94,7 @@ func LinkSetName(link Link, name string) error { req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(base.Index) - msg.Change = nl.DEFAULT_CHANGE req.AddData(msg) data := nl.NewRtAttr(syscall.IFLA_IFNAME, []byte(name)) @@ -118,10 +112,7 @@ func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(base.Index) - msg.Change = nl.DEFAULT_CHANGE req.AddData(msg) data := nl.NewRtAttr(syscall.IFLA_ADDRESS, []byte(hwaddr)) @@ -151,10 +142,7 @@ func LinkSetMasterByIndex(link Link, masterIndex int) error { req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(base.Index) - msg.Change = nl.DEFAULT_CHANGE req.AddData(msg) b := make([]byte, 4) @@ -176,10 +164,7 @@ func LinkSetNsPid(link Link, nspid int) error { req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(base.Index) - msg.Change = nl.DEFAULT_CHANGE req.AddData(msg) b := make([]byte, 4) @@ -192,7 +177,7 @@ func LinkSetNsPid(link Link, nspid int) error { return err } -// LinkSetNsPid puts the device into a new network namespace. The +// LinkSetNsFd puts the device into a new network namespace. The // fd must be an open file descriptor to a network namespace. // Similar to: `ip link set $link netns $ns` func LinkSetNsFd(link Link, fd int) error { @@ -201,10 +186,7 @@ func LinkSetNsFd(link Link, fd int) error { req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(base.Index) - msg.Change = nl.DEFAULT_CHANGE req.AddData(msg) b := make([]byte, 4) @@ -266,6 +248,10 @@ func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) { nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss)) nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss)) + if vxlan.GBP { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, boolAttr(vxlan.GBP)) + } + if vxlan.NoAge { nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0)) } else if vxlan.Age > 0 { @@ -321,6 +307,11 @@ func LinkAdd(link Link) error { req.AddData(mtu) } + if base.TxQLen >= 0 { + qlen := nl.NewRtAttr(syscall.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) + req.AddData(qlen) + } + if base.Namespace != nil { var attr *nl.RtAttr switch base.Namespace.(type) { @@ -338,8 +329,6 @@ func LinkAdd(link Link) error { linkInfo := nl.NewRtAttr(syscall.IFLA_LINKINFO, nil) nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) - nl.NewRtAttrChild(linkInfo, syscall.IFLA_TXQLEN, nl.Uint32Attr(base.TxQLen)) - if vlan, ok := link.(*Vlan); ok { b := make([]byte, 2) native.PutUint16(b, uint16(vlan.VlanId)) @@ -350,10 +339,13 @@ func LinkAdd(link Link) error { peer := nl.NewRtAttrChild(data, nl.VETH_INFO_PEER, nil) nl.NewIfInfomsgChild(peer, syscall.AF_UNSPEC) nl.NewRtAttrChild(peer, syscall.IFLA_IFNAME, nl.ZeroTerminated(veth.PeerName)) - nl.NewRtAttrChild(peer, syscall.IFLA_TXQLEN, nl.Uint32Attr(base.TxQLen)) + if base.TxQLen >= 0 { + nl.NewRtAttrChild(peer, syscall.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) + } if base.MTU > 0 { nl.NewRtAttrChild(peer, syscall.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) } + } else if vxlan, ok := link.(*Vxlan); ok { addVxlanAttrs(vxlan, linkInfo) } else if ipv, ok := link.(*IPVlan); ok { @@ -501,6 +493,8 @@ func linkDeserialize(m []byte) (Link, error) { switch linkType { case "dummy": link = &Dummy{} + case "ifb": + link = &Ifb{} case "bridge": link = &Bridge{} case "vlan": @@ -513,8 +507,10 @@ func linkDeserialize(m []byte) (Link, error) { link = &IPVlan{} case "macvlan": link = &Macvlan{} + case "macvtap": + link = &Macvtap{} default: - link = &Generic{LinkType: linkType} + link = &GenericLink{LinkType: linkType} } case nl.IFLA_INFO_DATA: data, err := nl.ParseRouteAttr(info.Value) @@ -530,6 +526,8 @@ func linkDeserialize(m []byte) (Link, error) { parseIPVlanData(link, data) case "macvlan": parseMacvlanData(link, data) + case "macvtap": + parseMacvtapData(link, data) } } } @@ -552,7 +550,7 @@ func linkDeserialize(m []byte) (Link, error) { case syscall.IFLA_MASTER: base.MasterIndex = int(native.Uint32(attr.Value[0:4])) case syscall.IFLA_TXQLEN: - base.TxQLen = native.Uint32(attr.Value[0:4]) + base.TxQLen = int(native.Uint32(attr.Value[0:4])) } } // Links that don't have IFLA_INFO_KIND are hardware devices @@ -579,8 +577,7 @@ func LinkList() ([]Link, error) { return nil, err } - res := make([]Link, 0) - + var res []Link for _, m := range msgs { link, err := linkDeserialize(m) if err != nil { @@ -592,6 +589,46 @@ func LinkList() ([]Link, error) { return res, nil } +// LinkUpdate is used to pass information back from LinkSubscribe() +type LinkUpdate struct { + nl.IfInfomsg + Link +} + +// LinkSubscribe takes a chan down which notifications will be sent +// when links change. Close the 'done' chan to stop subscription. +func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error { + s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK) + if err != nil { + return err + } + if done != nil { + go func() { + <-done + s.Close() + }() + } + go func() { + defer close(ch) + for { + msgs, err := s.Receive() + if err != nil { + return + } + for _, m := range msgs { + ifmsg := nl.DeserializeIfInfomsg(m.Data) + link, err := linkDeserialize(m.Data) + if err != nil { + return + } + ch <- LinkUpdate{IfInfomsg: *ifmsg, Link: link} + } + } + }() + + return nil +} + func LinkSetHairpin(link Link, mode bool) error { return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE) } @@ -622,10 +659,7 @@ func setProtinfoAttr(link Link, mode bool, attr int) error { req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) msg := nl.NewIfInfomsg(syscall.AF_BRIDGE) - msg.Type = syscall.RTM_SETLINK - msg.Flags = syscall.NLM_F_REQUEST msg.Index = int32(base.Index) - msg.Change = nl.DEFAULT_CHANGE req.AddData(msg) br := nl.NewRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil) @@ -678,6 +712,8 @@ func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) { vxlan.L2miss = int8(datum.Value[0]) != 0 case nl.IFLA_VXLAN_L3MISS: vxlan.L3miss = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_GBP: + vxlan.GBP = int8(datum.Value[0]) != 0 case nl.IFLA_VXLAN_AGEING: vxlan.Age = int(native.Uint32(datum.Value[0:4])) vxlan.NoAge = vxlan.Age == 0 @@ -706,6 +742,11 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) { } } +func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) { + macv := link.(*Macvtap) + parseMacvlanData(&macv.Macvlan, data) +} + func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { macv := link.(*Macvlan) for _, datum := range data { diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/link_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/link_test.go index 41349ca4..3b640f1b 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/link_test.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/link_test.go @@ -3,12 +3,17 @@ package netlink import ( "bytes" "net" + "syscall" "testing" + "time" "github.com/vishvananda/netns" ) -const testTxQLen uint32 = 100 +const ( + testTxQLen int = 100 + defaultTxQLen int = 1000 +) func testLinkAddDel(t *testing.T, link Link) { links, err := LinkList() @@ -50,9 +55,9 @@ func testLinkAddDel(t *testing.T, link Link) { } } - if veth, ok := link.(*Veth); ok { - if veth.TxQLen != testTxQLen { - t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, testTxQLen) + if veth, ok := result.(*Veth); ok { + if rBase.TxQLen != base.TxQLen { + t.Fatalf("qlen is %d, should be %d", rBase.TxQLen, base.TxQLen) } if rBase.MTU != base.MTU { t.Fatalf("MTU is %d, should be %d", rBase.MTU, base.MTU) @@ -147,6 +152,9 @@ func compareVxlan(t *testing.T, expected, actual *Vxlan) { if actual.L3miss != expected.L3miss { t.Fatal("Vxlan.L3miss doesn't match") } + if actual.GBP != expected.GBP { + t.Fatal("Vxlan.GBP doesn't match") + } if expected.NoAge { if !actual.NoAge { t.Fatal("Vxlan.NoAge doesn't match") @@ -177,6 +185,13 @@ func TestLinkAddDelDummy(t *testing.T) { testLinkAddDel(t, &Dummy{LinkAttrs{Name: "foo"}}) } +func TestLinkAddDelIfb(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + + testLinkAddDel(t, &Ifb{LinkAttrs{Name: "foo"}}) +} + func TestLinkAddDelBridge(t *testing.T) { tearDown := setUpNetlinkTest(t) defer tearDown() @@ -219,6 +234,27 @@ func TestLinkAddDelMacvlan(t *testing.T) { } } +func TestLinkAddDelMacvtap(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + + parent := &Dummy{LinkAttrs{Name: "foo"}} + if err := LinkAdd(parent); err != nil { + t.Fatal(err) + } + + testLinkAddDel(t, &Macvtap{ + Macvlan: Macvlan{ + LinkAttrs: LinkAttrs{Name: "bar", ParentIndex: parent.Attrs().Index}, + Mode: MACVLAN_MODE_PRIVATE, + }, + }) + + if err := LinkDel(parent); err != nil { + t.Fatal(err) + } +} + func TestLinkAddDelVeth(t *testing.T) { tearDown := setUpNetlinkTest(t) defer tearDown() @@ -226,6 +262,99 @@ func TestLinkAddDelVeth(t *testing.T) { testLinkAddDel(t, &Veth{LinkAttrs{Name: "foo", TxQLen: testTxQLen, MTU: 1400}, "bar"}) } +func TestLinkAddVethWithDefaultTxQLen(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + la := NewLinkAttrs() + la.Name = "foo" + + veth := &Veth{LinkAttrs: la, PeerName: "bar"} + if err := LinkAdd(veth); err != nil { + t.Fatal(err) + } + link, err := LinkByName("foo") + if err != nil { + t.Fatal(err) + } + if veth, ok := link.(*Veth); !ok { + t.Fatalf("unexpected link type: %T", link) + } else { + if veth.TxQLen != defaultTxQLen { + t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, defaultTxQLen) + } + } + peer, err := LinkByName("bar") + if err != nil { + t.Fatal(err) + } + if veth, ok := peer.(*Veth); !ok { + t.Fatalf("unexpected link type: %T", link) + } else { + if veth.TxQLen != defaultTxQLen { + t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, defaultTxQLen) + } + } +} + +func TestLinkAddVethWithZeroTxQLen(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + la := NewLinkAttrs() + la.Name = "foo" + la.TxQLen = 0 + + veth := &Veth{LinkAttrs: la, PeerName: "bar"} + if err := LinkAdd(veth); err != nil { + t.Fatal(err) + } + link, err := LinkByName("foo") + if err != nil { + t.Fatal(err) + } + if veth, ok := link.(*Veth); !ok { + t.Fatalf("unexpected link type: %T", link) + } else { + if veth.TxQLen != 0 { + t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, 0) + } + } + peer, err := LinkByName("bar") + if err != nil { + t.Fatal(err) + } + if veth, ok := peer.(*Veth); !ok { + t.Fatalf("unexpected link type: %T", link) + } else { + if veth.TxQLen != 0 { + t.Fatalf("TxQLen is %d, should be %d", veth.TxQLen, 0) + } + } +} + +func TestLinkAddDummyWithTxQLen(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + la := NewLinkAttrs() + la.Name = "foo" + la.TxQLen = 1500 + + dummy := &Dummy{LinkAttrs: la} + if err := LinkAdd(dummy); err != nil { + t.Fatal(err) + } + link, err := LinkByName("foo") + if err != nil { + t.Fatal(err) + } + if dummy, ok := link.(*Dummy); !ok { + t.Fatalf("unexpected link type: %T", link) + } else { + if dummy.TxQLen != 1500 { + t.Fatalf("TxQLen is %d, should be %d", dummy.TxQLen, 1500) + } + } +} + func TestLinkAddDelBridgeMaster(t *testing.T) { tearDown := setUpNetlinkTest(t) defer tearDown() @@ -542,3 +671,54 @@ func TestLinkSet(t *testing.T) { t.Fatalf("hardware address not changed!") } } + +func expectLinkUpdate(ch <-chan LinkUpdate, ifaceName string, up bool) bool { + for { + timeout := time.After(time.Minute) + select { + case update := <-ch: + if ifaceName == update.Link.Attrs().Name && (update.IfInfomsg.Flags&syscall.IFF_UP != 0) == up { + return true + } + case <-timeout: + return false + } + } +} + +func TestLinkSubscribe(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + + ch := make(chan LinkUpdate) + done := make(chan struct{}) + defer close(done) + if err := LinkSubscribe(ch, done); err != nil { + t.Fatal(err) + } + + link := &Veth{LinkAttrs{Name: "foo", TxQLen: testTxQLen, MTU: 1400}, "bar"} + if err := LinkAdd(link); err != nil { + t.Fatal(err) + } + + if !expectLinkUpdate(ch, "foo", false) { + t.Fatal("Add update not received as expected") + } + + if err := LinkSetUp(link); err != nil { + t.Fatal(err) + } + + if !expectLinkUpdate(ch, "foo", true) { + t.Fatal("Link Up update not received as expected") + } + + if err := LinkDel(link); err != nil { + t.Fatal(err) + } + + if !expectLinkUpdate(ch, "foo", false) { + t.Fatal("Del update not received as expected") + } +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_linux.go index 1fdaa3a3..620a0ee7 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/neigh_linux.go @@ -141,7 +141,7 @@ func NeighList(linkIndex, family int) ([]Neigh, error) { return nil, err } - res := make([]Neigh, 0) + var res []Neigh for _, m := range msgs { ndm := deserializeNdmsg(m) if linkIndex != 0 && int(ndm.Index) != linkIndex { diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/link_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/link_linux.go index 64ef5fdb..1f9ab088 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/link_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/link_linux.go @@ -47,7 +47,15 @@ const ( IFLA_VXLAN_PORT IFLA_VXLAN_GROUP6 IFLA_VXLAN_LOCAL6 - IFLA_VXLAN_MAX = IFLA_VXLAN_LOCAL6 + IFLA_VXLAN_UDP_CSUM + IFLA_VXLAN_UDP_ZERO_CSUM6_TX + IFLA_VXLAN_UDP_ZERO_CSUM6_RX + IFLA_VXLAN_REMCSUM_TX + IFLA_VXLAN_REMCSUM_RX + IFLA_VXLAN_GBP + IFLA_VXLAN_REMCSUM_NOPARTIAL + IFLA_VXLAN_FLOWBASED + IFLA_VXLAN_MAX = IFLA_VXLAN_FLOWBASED ) const ( diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux.go index 72f28137..8dbd92b8 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/nl_linux.go @@ -39,8 +39,9 @@ func NativeEndian() binary.ByteOrder { var x uint32 = 0x01020304 if *(*byte)(unsafe.Pointer(&x)) == 0x01 { nativeEndian = binary.BigEndian + } else { + nativeEndian = binary.LittleEndian } - nativeEndian = binary.LittleEndian } return nativeEndian } @@ -141,7 +142,7 @@ func (a *RtAttr) Len() int { } // Serialize the RtAttr into a byte array -// This can't ust unsafe.cast because it must iterate through children. +// This can't just unsafe.cast because it must iterate through children. func (a *RtAttr) Serialize() []byte { native := NativeEndian() @@ -172,16 +173,16 @@ type NetlinkRequest struct { } // Serialize the Netlink Request into a byte array -func (msg *NetlinkRequest) Serialize() []byte { +func (req *NetlinkRequest) Serialize() []byte { length := syscall.SizeofNlMsghdr - dataBytes := make([][]byte, len(msg.Data)) - for i, data := range msg.Data { + dataBytes := make([][]byte, len(req.Data)) + for i, data := range req.Data { dataBytes[i] = data.Serialize() length = length + len(dataBytes[i]) } - msg.Len = uint32(length) + req.Len = uint32(length) b := make([]byte, length) - hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(msg)))[:] + hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:] next := syscall.SizeofNlMsghdr copy(b[0:next], hdr) for _, data := range dataBytes { @@ -193,9 +194,9 @@ func (msg *NetlinkRequest) Serialize() []byte { return b } -func (msg *NetlinkRequest) AddData(data NetlinkRequestData) { +func (req *NetlinkRequest) AddData(data NetlinkRequestData) { if data != nil { - msg.Data = append(msg.Data, data) + req.Data = append(req.Data, data) } } @@ -218,11 +219,11 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro return nil, err } - res := make([][]byte, 0) + var res [][]byte done: for { - msgs, err := s.Recieve() + msgs, err := s.Receive() if err != nil { return nil, err } @@ -294,7 +295,7 @@ func getNetlinkSocket(protocol int) (*NetlinkSocket, error) { // Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE) // and subscribe it to multicast groups passed in variable argument list. -// Returns the netlink socket on whic hReceive() method can be called +// Returns the netlink socket on which Receive() method can be called // to retrieve the messages from the kernel. func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) { fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, protocol) @@ -329,7 +330,7 @@ func (s *NetlinkSocket) Send(request *NetlinkRequest) error { return nil } -func (s *NetlinkSocket) Recieve() ([]syscall.NetlinkMessage, error) { +func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) { rb := make([]byte, syscall.Getpagesize()) nr, _, err := syscall.Recvfrom(s.fd, rb, 0) if err != nil { diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux.go index 5dde998e..447e83e5 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/route_linux.go @@ -20,6 +20,15 @@ func NewRtMsg() *RtMsg { } } +func NewRtDelMsg() *RtMsg { + return &RtMsg{ + RtMsg: syscall.RtMsg{ + Table: syscall.RT_TABLE_MAIN, + Scope: syscall.RT_SCOPE_NOWHERE, + }, + } +} + func (msg *RtMsg) Len() int { return syscall.SizeofRtMsg } diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux.go new file mode 100644 index 00000000..890979e0 --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux.go @@ -0,0 +1,425 @@ +package nl + +import ( + "unsafe" +) + +// Message types +const ( + TCA_UNSPEC = iota + TCA_KIND + TCA_OPTIONS + TCA_STATS + TCA_XSTATS + TCA_RATE + TCA_FCNT + TCA_STATS2 + TCA_STAB + TCA_MAX = TCA_STAB +) + +const ( + TCA_ACT_TAB = 1 + TCAA_MAX = 1 +) + +const ( + TCA_PRIO_UNSPEC = iota + TCA_PRIO_MQ + TCA_PRIO_MAX = TCA_PRIO_MQ +) + +const ( + SizeofTcMsg = 0x14 + SizeofTcActionMsg = 0x04 + SizeofTcPrioMap = 0x14 + SizeofTcRateSpec = 0x0c + SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c + SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14 + SizeofTcHtbGlob = 0x14 + SizeofTcU32Key = 0x10 + SizeofTcU32Sel = 0x10 // without keys + SizeofTcMirred = 0x1c +) + +// struct tcmsg { +// unsigned char tcm_family; +// unsigned char tcm__pad1; +// unsigned short tcm__pad2; +// int tcm_ifindex; +// __u32 tcm_handle; +// __u32 tcm_parent; +// __u32 tcm_info; +// }; + +type TcMsg struct { + Family uint8 + Pad [3]byte + Ifindex int32 + Handle uint32 + Parent uint32 + Info uint32 +} + +func (msg *TcMsg) Len() int { + return SizeofTcMsg +} + +func DeserializeTcMsg(b []byte) *TcMsg { + return (*TcMsg)(unsafe.Pointer(&b[0:SizeofTcMsg][0])) +} + +func (x *TcMsg) Serialize() []byte { + return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:] +} + +// struct tcamsg { +// unsigned char tca_family; +// unsigned char tca__pad1; +// unsigned short tca__pad2; +// }; + +type TcActionMsg struct { + Family uint8 + Pad [3]byte +} + +func (msg *TcActionMsg) Len() int { + return SizeofTcActionMsg +} + +func DeserializeTcActionMsg(b []byte) *TcActionMsg { + return (*TcActionMsg)(unsafe.Pointer(&b[0:SizeofTcActionMsg][0])) +} + +func (x *TcActionMsg) Serialize() []byte { + return (*(*[SizeofTcActionMsg]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TC_PRIO_MAX = 15 +) + +// struct tc_prio_qopt { +// int bands; /* Number of bands */ +// __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +// }; + +type TcPrioMap struct { + Bands int32 + Priomap [TC_PRIO_MAX + 1]uint8 +} + +func (msg *TcPrioMap) Len() int { + return SizeofTcPrioMap +} + +func DeserializeTcPrioMap(b []byte) *TcPrioMap { + return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0])) +} + +func (x *TcPrioMap) Serialize() []byte { + return (*(*[SizeofTcPrioMap]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_TBF_UNSPEC = iota + TCA_TBF_PARMS + TCA_TBF_RTAB + TCA_TBF_PTAB + TCA_TBF_RATE64 + TCA_TBF_PRATE64 + TCA_TBF_BURST + TCA_TBF_PBURST + TCA_TBF_MAX = TCA_TBF_PBURST +) + +// struct tc_ratespec { +// unsigned char cell_log; +// __u8 linklayer; /* lower 4 bits */ +// unsigned short overhead; +// short cell_align; +// unsigned short mpu; +// __u32 rate; +// }; + +type TcRateSpec struct { + CellLog uint8 + Linklayer uint8 + Overhead uint16 + CellAlign int16 + Mpu uint16 + Rate uint32 +} + +func (msg *TcRateSpec) Len() int { + return SizeofTcRateSpec +} + +func DeserializeTcRateSpec(b []byte) *TcRateSpec { + return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0])) +} + +func (x *TcRateSpec) Serialize() []byte { + return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_tbf_qopt { +// struct tc_ratespec rate; +// struct tc_ratespec peakrate; +// __u32 limit; +// __u32 buffer; +// __u32 mtu; +// }; + +type TcTbfQopt struct { + Rate TcRateSpec + Peakrate TcRateSpec + Limit uint32 + Buffer uint32 + Mtu uint32 +} + +func (msg *TcTbfQopt) Len() int { + return SizeofTcTbfQopt +} + +func DeserializeTcTbfQopt(b []byte) *TcTbfQopt { + return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0])) +} + +func (x *TcTbfQopt) Serialize() []byte { + return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_HTB_UNSPEC = iota + TCA_HTB_PARMS + TCA_HTB_INIT + TCA_HTB_CTAB + TCA_HTB_RTAB + TCA_HTB_DIRECT_QLEN + TCA_HTB_RATE64 + TCA_HTB_CEIL64 + TCA_HTB_MAX = TCA_HTB_CEIL64 +) + +//struct tc_htb_opt { +// struct tc_ratespec rate; +// struct tc_ratespec ceil; +// __u32 buffer; +// __u32 cbuffer; +// __u32 quantum; +// __u32 level; /* out only */ +// __u32 prio; +//}; + +type TcHtbCopt struct { + Rate TcRateSpec + Ceil TcRateSpec + Buffer uint32 + Cbuffer uint32 + Quantum uint32 + Level uint32 + Prio uint32 +} + +func (msg *TcHtbCopt) Len() int { + return SizeofTcHtbCopt +} + +func DeserializeTcHtbCopt(b []byte) *TcHtbCopt { + return (*TcHtbCopt)(unsafe.Pointer(&b[0:SizeofTcHtbCopt][0])) +} + +func (x *TcHtbCopt) Serialize() []byte { + return (*(*[SizeofTcHtbCopt]byte)(unsafe.Pointer(x)))[:] +} + +type TcHtbGlob struct { + Version uint32 + Rate2Quantum uint32 + Defcls uint32 + Debug uint32 + DirectPkts uint32 +} + +func (msg *TcHtbGlob) Len() int { + return SizeofTcHtbGlob +} + +func DeserializeTcHtbGlob(b []byte) *TcHtbGlob { + return (*TcHtbGlob)(unsafe.Pointer(&b[0:SizeofTcHtbGlob][0])) +} + +func (x *TcHtbGlob) Serialize() []byte { + return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_U32_UNSPEC = iota + TCA_U32_CLASSID + TCA_U32_HASH + TCA_U32_LINK + TCA_U32_DIVISOR + TCA_U32_SEL + TCA_U32_POLICE + TCA_U32_ACT + TCA_U32_INDEV + TCA_U32_PCNT + TCA_U32_MARK + TCA_U32_MAX = TCA_U32_MARK +) + +// struct tc_u32_key { +// __be32 mask; +// __be32 val; +// int off; +// int offmask; +// }; + +type TcU32Key struct { + Mask uint32 // big endian + Val uint32 // big endian + Off int32 + OffMask int32 +} + +func (msg *TcU32Key) Len() int { + return SizeofTcU32Key +} + +func DeserializeTcU32Key(b []byte) *TcU32Key { + return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0])) +} + +func (x *TcU32Key) Serialize() []byte { + return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_u32_sel { +// unsigned char flags; +// unsigned char offshift; +// unsigned char nkeys; +// +// __be16 offmask; +// __u16 off; +// short offoff; +// +// short hoff; +// __be32 hmask; +// struct tc_u32_key keys[0]; +// }; + +const ( + TC_U32_TERMINAL = 1 << iota + TC_U32_OFFSET = 1 << iota + TC_U32_VAROFFSET = 1 << iota + TC_U32_EAT = 1 << iota +) + +type TcU32Sel struct { + Flags uint8 + Offshift uint8 + Nkeys uint8 + Pad uint8 + Offmask uint16 // big endian + Off uint16 + Offoff int16 + Hoff int16 + Hmask uint32 // big endian + Keys []TcU32Key +} + +func (msg *TcU32Sel) Len() int { + return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key +} + +func DeserializeTcU32Sel(b []byte) *TcU32Sel { + x := &TcU32Sel{} + copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b) + next := SizeofTcU32Sel + var i uint8 + for i = 0; i < x.Nkeys; i++ { + x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:])) + next += SizeofTcU32Key + } + return x +} + +func (x *TcU32Sel) Serialize() []byte { + // This can't just unsafe.cast because it must iterate through keys. + buf := make([]byte, x.Len()) + copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:]) + next := SizeofTcU32Sel + for _, key := range x.Keys { + keyBuf := key.Serialize() + copy(buf[next:], keyBuf) + next += SizeofTcU32Key + } + return buf +} + +const ( + TCA_ACT_MIRRED = 8 +) + +const ( + TCA_MIRRED_UNSPEC = iota + TCA_MIRRED_TM + TCA_MIRRED_PARMS + TCA_MIRRED_MAX = TCA_MIRRED_PARMS +) + +const ( + TCA_EGRESS_REDIR = 1 /* packet redirect to EGRESS*/ + TCA_EGRESS_MIRROR = 2 /* mirror packet to EGRESS */ + TCA_INGRESS_REDIR = 3 /* packet redirect to INGRESS*/ + TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */ +) + +const ( + TC_ACT_UNSPEC = int32(-1) + TC_ACT_OK = 0 + TC_ACT_RECLASSIFY = 1 + TC_ACT_SHOT = 2 + TC_ACT_PIPE = 3 + TC_ACT_STOLEN = 4 + TC_ACT_QUEUED = 5 + TC_ACT_REPEAT = 6 + TC_ACT_JUMP = 0x10000000 +) + +// #define tc_gen \ +// __u32 index; \ +// __u32 capab; \ +// int action; \ +// int refcnt; \ +// int bindcnt +// struct tc_mirred { +// tc_gen; +// int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ +// __u32 ifindex; /* ifindex of egress port */ +// }; + +type TcMirred struct { + Index uint32 + Capab uint32 + Action int32 + Refcnt int32 + Bindcnt int32 + Eaction int32 + Ifindex uint32 +} + +func (msg *TcMirred) Len() int { + return SizeofTcMirred +} + +func DeserializeTcMirred(b []byte) *TcMirred { + return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0])) +} + +func (x *TcMirred) Serialize() []byte { + return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:] +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux_test.go new file mode 100644 index 00000000..148b2b02 --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/tc_linux_test.go @@ -0,0 +1,173 @@ +package nl + +import ( + "bytes" + "crypto/rand" + "encoding/binary" + "testing" +) + +/* TcMsg */ +func (msg *TcMsg) write(b []byte) { + native := NativeEndian() + b[0] = msg.Family + copy(b[1:4], msg.Pad[:]) + native.PutUint32(b[4:8], uint32(msg.Ifindex)) + native.PutUint32(b[8:12], msg.Handle) + native.PutUint32(b[12:16], msg.Parent) + native.PutUint32(b[16:20], msg.Info) +} + +func (msg *TcMsg) serializeSafe() []byte { + length := SizeofTcMsg + b := make([]byte, length) + msg.write(b) + return b +} + +func deserializeTcMsgSafe(b []byte) *TcMsg { + var msg = TcMsg{} + binary.Read(bytes.NewReader(b[0:SizeofTcMsg]), NativeEndian(), &msg) + return &msg +} + +func TestTcMsgDeserializeSerialize(t *testing.T) { + var orig = make([]byte, SizeofTcMsg) + rand.Read(orig) + safemsg := deserializeTcMsgSafe(orig) + msg := DeserializeTcMsg(orig) + testDeserializeSerialize(t, orig, safemsg, msg) +} + +/* TcActionMsg */ +func (msg *TcActionMsg) write(b []byte) { + b[0] = msg.Family + copy(b[1:4], msg.Pad[:]) +} + +func (msg *TcActionMsg) serializeSafe() []byte { + length := SizeofTcActionMsg + b := make([]byte, length) + msg.write(b) + return b +} + +func deserializeTcActionMsgSafe(b []byte) *TcActionMsg { + var msg = TcActionMsg{} + binary.Read(bytes.NewReader(b[0:SizeofTcActionMsg]), NativeEndian(), &msg) + return &msg +} + +func TestTcActionMsgDeserializeSerialize(t *testing.T) { + var orig = make([]byte, SizeofTcActionMsg) + rand.Read(orig) + safemsg := deserializeTcActionMsgSafe(orig) + msg := DeserializeTcActionMsg(orig) + testDeserializeSerialize(t, orig, safemsg, msg) +} + +/* TcRateSpec */ +func (msg *TcRateSpec) write(b []byte) { + native := NativeEndian() + b[0] = msg.CellLog + b[1] = msg.Linklayer + native.PutUint16(b[2:4], msg.Overhead) + native.PutUint16(b[4:6], uint16(msg.CellAlign)) + native.PutUint16(b[6:8], msg.Mpu) + native.PutUint32(b[8:12], msg.Rate) +} + +func (msg *TcRateSpec) serializeSafe() []byte { + length := SizeofTcRateSpec + b := make([]byte, length) + msg.write(b) + return b +} + +func deserializeTcRateSpecSafe(b []byte) *TcRateSpec { + var msg = TcRateSpec{} + binary.Read(bytes.NewReader(b[0:SizeofTcRateSpec]), NativeEndian(), &msg) + return &msg +} + +func TestTcRateSpecDeserializeSerialize(t *testing.T) { + var orig = make([]byte, SizeofTcRateSpec) + rand.Read(orig) + safemsg := deserializeTcRateSpecSafe(orig) + msg := DeserializeTcRateSpec(orig) + testDeserializeSerialize(t, orig, safemsg, msg) +} + +/* TcTbfQopt */ +func (msg *TcTbfQopt) write(b []byte) { + native := NativeEndian() + msg.Rate.write(b[0:SizeofTcRateSpec]) + start := SizeofTcRateSpec + msg.Peakrate.write(b[start : start+SizeofTcRateSpec]) + start += SizeofTcRateSpec + native.PutUint32(b[start:start+4], msg.Limit) + start += 4 + native.PutUint32(b[start:start+4], msg.Buffer) + start += 4 + native.PutUint32(b[start:start+4], msg.Mtu) +} + +func (msg *TcTbfQopt) serializeSafe() []byte { + length := SizeofTcTbfQopt + b := make([]byte, length) + msg.write(b) + return b +} + +func deserializeTcTbfQoptSafe(b []byte) *TcTbfQopt { + var msg = TcTbfQopt{} + binary.Read(bytes.NewReader(b[0:SizeofTcTbfQopt]), NativeEndian(), &msg) + return &msg +} + +func TestTcTbfQoptDeserializeSerialize(t *testing.T) { + var orig = make([]byte, SizeofTcTbfQopt) + rand.Read(orig) + safemsg := deserializeTcTbfQoptSafe(orig) + msg := DeserializeTcTbfQopt(orig) + testDeserializeSerialize(t, orig, safemsg, msg) +} + +/* TcHtbCopt */ +func (msg *TcHtbCopt) write(b []byte) { + native := NativeEndian() + msg.Rate.write(b[0:SizeofTcRateSpec]) + start := SizeofTcRateSpec + msg.Ceil.write(b[start : start+SizeofTcRateSpec]) + start += SizeofTcRateSpec + native.PutUint32(b[start:start+4], msg.Buffer) + start += 4 + native.PutUint32(b[start:start+4], msg.Cbuffer) + start += 4 + native.PutUint32(b[start:start+4], msg.Quantum) + start += 4 + native.PutUint32(b[start:start+4], msg.Level) + start += 4 + native.PutUint32(b[start:start+4], msg.Prio) +} + +func (msg *TcHtbCopt) serializeSafe() []byte { + length := SizeofTcHtbCopt + b := make([]byte, length) + msg.write(b) + return b +} + +func deserializeTcHtbCoptSafe(b []byte) *TcHtbCopt { + var msg = TcHtbCopt{} + binary.Read(bytes.NewReader(b[0:SizeofTcHtbCopt]), NativeEndian(), &msg) + return &msg +} + +func TestTcHtbCoptDeserializeSerialize(t *testing.T) { + var orig = make([]byte, SizeofTcHtbCopt) + rand.Read(orig) + safemsg := deserializeTcHtbCoptSafe(orig) + msg := DeserializeTcHtbCopt(orig) + testDeserializeSerialize(t, orig, safemsg, msg) +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux.go index d9531308..d24637d2 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/nl/xfrm_linux.go @@ -104,9 +104,8 @@ func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet { ip := x.ToIP() if GetIPFamily(ip) == FAMILY_V4 { return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)} - } else { - return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 128)} } + return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 128)} } func (x *XfrmAddress) FromIP(ip net.IP) { @@ -125,8 +124,8 @@ func DeserializeXfrmAddress(b []byte) *XfrmAddress { return (*XfrmAddress)(unsafe.Pointer(&b[0:SizeofXfrmAddress][0])) } -func (msg *XfrmAddress) Serialize() []byte { - return (*(*[SizeofXfrmAddress]byte)(unsafe.Pointer(msg)))[:] +func (x *XfrmAddress) Serialize() []byte { + return (*(*[SizeofXfrmAddress]byte)(unsafe.Pointer(x)))[:] } // struct xfrm_selector { diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo.go index 79396da7..f39ab8f4 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/protinfo.go @@ -16,7 +16,7 @@ type Protinfo struct { // String returns a list of enabled flags func (prot *Protinfo) String() string { - boolStrings := make([]string, 0) + var boolStrings []string if prot.Hairpin { boolStrings = append(boolStrings, "Hairpin") } diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc.go new file mode 100644 index 00000000..41a4aa8e --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc.go @@ -0,0 +1,167 @@ +package netlink + +import ( + "fmt" +) + +const ( + HANDLE_NONE = 0 + HANDLE_INGRESS = 0xFFFFFFF1 + HANDLE_ROOT = 0xFFFFFFFF + PRIORITY_MAP_LEN = 16 +) + +type Qdisc interface { + Attrs() *QdiscAttrs + Type() string +} + +// Qdisc represents a netlink qdisc. A qdisc is associated with a link, +// has a handle, a parent and a refcnt. The root qdisc of a device should +// have parent == HANDLE_ROOT. +type QdiscAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Refcnt uint32 // read only +} + +func (q QdiscAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt) +} + +func MakeHandle(major, minor uint16) uint32 { + return (uint32(major) << 16) | uint32(minor) +} + +func MajorMinor(handle uint32) (uint16, uint16) { + return uint16((handle & 0xFFFF0000) >> 16), uint16(handle & 0x0000FFFFF) +} + +func HandleStr(handle uint32) string { + switch handle { + case HANDLE_NONE: + return "none" + case HANDLE_INGRESS: + return "ingress" + case HANDLE_ROOT: + return "root" + default: + major, minor := MajorMinor(handle) + return fmt.Sprintf("%x:%x", major, minor) + } +} + +// PfifoFast is the default qdisc created by the kernel if one has not +// been defined for the interface +type PfifoFast struct { + QdiscAttrs + Bands uint8 + PriorityMap [PRIORITY_MAP_LEN]uint8 +} + +func (qdisc *PfifoFast) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *PfifoFast) Type() string { + return "pfifo_fast" +} + +// Prio is a basic qdisc that works just like PfifoFast +type Prio struct { + QdiscAttrs + Bands uint8 + PriorityMap [PRIORITY_MAP_LEN]uint8 +} + +func NewPrio(attrs QdiscAttrs) *Prio { + return &Prio{ + QdiscAttrs: attrs, + Bands: 3, + PriorityMap: [PRIORITY_MAP_LEN]uint8{1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + } +} + +func (qdisc *Prio) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Prio) Type() string { + return "prio" +} + +// Htb is a classful qdisc that rate limits based on tokens +type Htb struct { + QdiscAttrs + Version uint32 + Rate2Quantum uint32 + Defcls uint32 + Debug uint32 + DirectPkts uint32 +} + +func NewHtb(attrs QdiscAttrs) *Htb { + return &Htb{ + QdiscAttrs: attrs, + Version: 3, + Defcls: 0, + Rate2Quantum: 10, + Debug: 0, + DirectPkts: 0, + } +} + +func (qdisc *Htb) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Htb) Type() string { + return "htb" +} + +// Tbf is a classless qdisc that rate limits based on tokens +type Tbf struct { + QdiscAttrs + // TODO: handle 64bit rate properly + Rate uint64 + Limit uint32 + Buffer uint32 + // TODO: handle other settings +} + +func (qdisc *Tbf) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Tbf) Type() string { + return "tbf" +} + +// Ingress is a qdisc for adding ingress filters +type Ingress struct { + QdiscAttrs +} + +func (qdisc *Ingress) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Ingress) Type() string { + return "ingress" +} + +// GenericQdisc qdiscs represent types that are not currently understood +// by this netlink library. +type GenericQdisc struct { + QdiscAttrs + QdiscType string +} + +func (qdisc *GenericQdisc) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *GenericQdisc) Type() string { + return qdisc.QdiscType +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_linux.go new file mode 100644 index 00000000..a16eb99b --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_linux.go @@ -0,0 +1,316 @@ +package netlink + +import ( + "fmt" + "io/ioutil" + "strconv" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// QdiscDel will delete a qdisc from the system. +// Equivalent to: `tc qdisc del $qdisc` +func QdiscDel(qdisc Qdisc) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELQDISC, syscall.NLM_F_ACK) + base := qdisc.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// QdiscAdd will add a qdisc to the system. +// Equivalent to: `tc qdisc add $qdisc` +func QdiscAdd(qdisc Qdisc) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWQDISC, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + base := qdisc.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if prio, ok := qdisc.(*Prio); ok { + tcmap := nl.TcPrioMap{ + Bands: int32(prio.Bands), + Priomap: prio.PriorityMap, + } + options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize()) + } else if tbf, ok := qdisc.(*Tbf); ok { + opt := nl.TcTbfQopt{} + // TODO: handle rate > uint32 + opt.Rate.Rate = uint32(tbf.Rate) + opt.Limit = tbf.Limit + opt.Buffer = tbf.Buffer + nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize()) + } else if htb, ok := qdisc.(*Htb); ok { + opt := nl.TcHtbGlob{} + opt.Version = htb.Version + opt.Rate2Quantum = htb.Rate2Quantum + opt.Defcls = htb.Defcls + // TODO: Handle Debug properly. For now default to 0 + opt.Debug = htb.Debug + opt.DirectPkts = htb.DirectPkts + nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize()) + // nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) + } else if _, ok := qdisc.(*Ingress); ok { + // ingress filters must use the proper handle + if msg.Parent != HANDLE_INGRESS { + return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS") + } + } + req.AddData(options) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// QdiscList gets a list of qdiscs in the system. +// Equivalent to: `tc qdisc show`. +// The list can be filtered by link. +func QdiscList(link Link) ([]Qdisc, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP) + index := int32(0) + if link != nil { + base := link.Attrs() + ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC) + if err != nil { + return nil, err + } + + var res []Qdisc + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + // skip qdiscs from other interfaces + if link != nil && msg.Ifindex != index { + continue + } + + base := QdiscAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + Refcnt: msg.Info, + } + var qdisc Qdisc + qdiscType := "" + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + qdiscType = string(attr.Value[:len(attr.Value)-1]) + switch qdiscType { + case "pfifo_fast": + qdisc = &PfifoFast{} + case "prio": + qdisc = &Prio{} + case "tbf": + qdisc = &Tbf{} + case "ingress": + qdisc = &Ingress{} + case "htb": + qdisc = &Htb{} + default: + qdisc = &GenericQdisc{QdiscType: qdiscType} + } + case nl.TCA_OPTIONS: + switch qdiscType { + case "pfifo_fast": + // pfifo returns TcPrioMap directly without wrapping it in rtattr + if err := parsePfifoFastData(qdisc, attr.Value); err != nil { + return nil, err + } + case "prio": + // prio returns TcPrioMap directly without wrapping it in rtattr + if err := parsePrioData(qdisc, attr.Value); err != nil { + return nil, err + } + case "tbf": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseTbfData(qdisc, data); err != nil { + return nil, err + } + case "htb": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseHtbData(qdisc, data); err != nil { + return nil, err + } + + // no options for ingress + } + } + } + *qdisc.Attrs() = base + res = append(res, qdisc) + } + + return res, nil +} + +func parsePfifoFastData(qdisc Qdisc, value []byte) error { + pfifo := qdisc.(*PfifoFast) + tcmap := nl.DeserializeTcPrioMap(value) + pfifo.PriorityMap = tcmap.Priomap + pfifo.Bands = uint8(tcmap.Bands) + return nil +} + +func parsePrioData(qdisc Qdisc, value []byte) error { + prio := qdisc.(*Prio) + tcmap := nl.DeserializeTcPrioMap(value) + prio.PriorityMap = tcmap.Priomap + prio.Bands = uint8(tcmap.Bands) + return nil +} + +func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + native = nl.NativeEndian() + htb := qdisc.(*Htb) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_HTB_INIT: + opt := nl.DeserializeTcHtbGlob(datum.Value) + htb.Version = opt.Version + htb.Rate2Quantum = opt.Rate2Quantum + htb.Defcls = opt.Defcls + htb.Debug = opt.Debug + htb.DirectPkts = opt.DirectPkts + case nl.TCA_HTB_DIRECT_QLEN: + // TODO + //htb.DirectQlen = native.uint32(datum.Value) + } + } + return nil +} +func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + native = nl.NativeEndian() + tbf := qdisc.(*Tbf) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_TBF_PARMS: + opt := nl.DeserializeTcTbfQopt(datum.Value) + tbf.Rate = uint64(opt.Rate.Rate) + tbf.Limit = opt.Limit + tbf.Buffer = opt.Buffer + case nl.TCA_TBF_RATE64: + tbf.Rate = native.Uint64(datum.Value[0:4]) + } + } + return nil +} + +const ( + TIME_UNITS_PER_SEC = 1000000 +) + +var ( + tickInUsec float64 = 0.0 + clockFactor float64 = 0.0 + hz float64 = 0.0 +) + +func initClock() { + data, err := ioutil.ReadFile("/proc/net/psched") + if err != nil { + return + } + parts := strings.Split(strings.TrimSpace(string(data)), " ") + if len(parts) < 3 { + return + } + var vals [3]uint64 + for i := range vals { + val, err := strconv.ParseUint(parts[i], 16, 32) + if err != nil { + return + } + vals[i] = val + } + // compatibility + if vals[2] == 1000000000 { + vals[0] = vals[1] + } + clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC + tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor + hz = float64(vals[0]) +} + +func TickInUsec() float64 { + if tickInUsec == 0.0 { + initClock() + } + return tickInUsec +} + +func ClockFactor() float64 { + if clockFactor == 0.0 { + initClock() + } + return clockFactor +} + +func Hz() float64 { + if hz == 0.0 { + initClock() + } + return hz +} + +func time2Tick(time uint32) uint32 { + return uint32(float64(time) * TickInUsec()) +} + +func tick2Time(tick uint32) uint32 { + return uint32(float64(tick) / TickInUsec()) +} + +func time2Ktime(time uint32) uint32 { + return uint32(float64(time) * ClockFactor()) +} + +func ktime2Time(ktime uint32) uint32 { + return uint32(float64(ktime) / ClockFactor()) +} + +func burst(rate uint64, buffer uint32) uint32 { + return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC) +} + +func latency(rate uint64, limit, buffer uint32) float64 { + return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer)) +} + +func Xmittime(rate uint64, size uint32) float64 { + return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate)) +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_test.go new file mode 100644 index 00000000..1dc92dbb --- /dev/null +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/qdisc_test.go @@ -0,0 +1,171 @@ +package netlink + +import ( + "testing" +) + +func TestTbfAddDel(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil { + t.Fatal(err) + } + link, err := LinkByName("foo") + if err != nil { + t.Fatal(err) + } + if err := LinkSetUp(link); err != nil { + t.Fatal(err) + } + qdisc := &Tbf{ + QdiscAttrs: QdiscAttrs{ + LinkIndex: link.Attrs().Index, + Handle: MakeHandle(1, 0), + Parent: HANDLE_ROOT, + }, + Rate: 131072, + Limit: 1220703, + Buffer: 16793, + } + if err := QdiscAdd(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err := QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 1 { + t.Fatal("Failed to add qdisc") + } + tbf, ok := qdiscs[0].(*Tbf) + if !ok { + t.Fatal("Qdisc is the wrong type") + } + if tbf.Rate != qdisc.Rate { + t.Fatal("Rate doesn't match") + } + if tbf.Limit != qdisc.Limit { + t.Fatal("Limit doesn't match") + } + if tbf.Buffer != qdisc.Buffer { + t.Fatal("Buffer doesn't match") + } + if err := QdiscDel(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err = QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 0 { + t.Fatal("Failed to remove qdisc") + } +} + +func TestHtbAddDel(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil { + t.Fatal(err) + } + link, err := LinkByName("foo") + if err != nil { + t.Fatal(err) + } + if err := LinkSetUp(link); err != nil { + t.Fatal(err) + } + + attrs := QdiscAttrs{ + LinkIndex: link.Attrs().Index, + Handle: MakeHandle(1, 0), + Parent: HANDLE_ROOT, + } + + qdisc := NewHtb(attrs) + qdisc.Rate2Quantum = 5 + if err := QdiscAdd(qdisc); err != nil { + t.Fatal(err) + } + + /* + cmd := exec.Command("tc", "qdisc") + out, err := cmd.CombinedOutput() + if err == nil { + fmt.Printf("%s\n", out) + } + */ + qdiscs, err := QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 1 { + t.Fatal("Failed to add qdisc") + } + htb, ok := qdiscs[0].(*Htb) + if !ok { + t.Fatal("Qdisc is the wrong type") + } + if htb.Defcls != qdisc.Defcls { + t.Fatal("Defcls doesn't match") + } + if htb.Rate2Quantum != qdisc.Rate2Quantum { + t.Fatal("Rate2Quantum doesn't match") + } + if htb.Debug != qdisc.Debug { + t.Fatal("Debug doesn't match") + } + if err := QdiscDel(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err = QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 0 { + t.Fatal("Failed to remove qdisc") + } +} +func TestPrioAddDel(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + if err := LinkAdd(&Ifb{LinkAttrs{Name: "foo"}}); err != nil { + t.Fatal(err) + } + link, err := LinkByName("foo") + if err != nil { + t.Fatal(err) + } + if err := LinkSetUp(link); err != nil { + t.Fatal(err) + } + qdisc := NewPrio(QdiscAttrs{ + LinkIndex: link.Attrs().Index, + Handle: MakeHandle(1, 0), + Parent: HANDLE_ROOT, + }) + if err := QdiscAdd(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err := QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 1 { + t.Fatal("Failed to add qdisc") + } + _, ok := qdiscs[0].(*Prio) + if !ok { + t.Fatal("Qdisc is the wrong type") + } + if err := QdiscDel(qdisc); err != nil { + t.Fatal(err) + } + qdiscs, err = QdiscList(link) + if err != nil { + t.Fatal(err) + } + if len(qdiscs) != 0 { + t.Fatal("Failed to remove qdisc") + } +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/route.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/route.go index 6218546f..578270fb 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/route.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/route.go @@ -33,3 +33,9 @@ func (r Route) String() string { return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst, r.Src, r.Gw) } + +// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE +type RouteUpdate struct { + Type uint16 + Route +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/route_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/route_linux.go index 43872aa4..693e6cbc 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/route_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/route_linux.go @@ -14,22 +14,21 @@ import ( // Equivalent to: `ip route add $route` func RouteAdd(route *Route) error { req := nl.NewNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) - return routeHandle(route, req) + return routeHandle(route, req, nl.NewRtMsg()) } -// RouteAdd will delete a route from the system. +// RouteDel will delete a route from the system. // Equivalent to: `ip route del $route` func RouteDel(route *Route) error { req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK) - return routeHandle(route, req) + return routeHandle(route, req, nl.NewRtDelMsg()) } -func routeHandle(route *Route, req *nl.NetlinkRequest) error { +func routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil { return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil") } - msg := nl.NewRtMsg() msg.Scope = uint8(route.Scope) family := -1 var rtAttrs []*nl.RtAttr @@ -118,8 +117,7 @@ func RouteList(link Link, family int) ([]Route, error) { index = base.Index } - native := nl.NativeEndian() - res := make([]Route, 0) + var res []Route for _, m := range msgs { msg := nl.DeserializeRtMsg(m) @@ -133,31 +131,14 @@ func RouteList(link Link, family int) ([]Route, error) { continue } - attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + route, err := deserializeRoute(m) if err != nil { return nil, err } - route := Route{Scope: Scope(msg.Scope)} - for _, attr := range attrs { - switch attr.Attr.Type { - case syscall.RTA_GATEWAY: - route.Gw = net.IP(attr.Value) - case syscall.RTA_PREFSRC: - route.Src = net.IP(attr.Value) - case syscall.RTA_DST: - route.Dst = &net.IPNet{ - IP: attr.Value, - Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), - } - case syscall.RTA_OIF: - routeIndex := int(native.Uint32(attr.Value[0:4])) - if link != nil && routeIndex != index { - // Ignore routes from other interfaces - continue - } - route.LinkIndex = routeIndex - } + if link != nil && route.LinkIndex != index { + // Ignore routes from other interfaces + continue } res = append(res, route) } @@ -165,6 +146,36 @@ func RouteList(link Link, family int) ([]Route, error) { return res, nil } +// deserializeRoute decodes a binary netlink message into a Route struct +func deserializeRoute(m []byte) (Route, error) { + route := Route{} + msg := nl.DeserializeRtMsg(m) + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return route, err + } + route.Scope = Scope(msg.Scope) + + native := nl.NativeEndian() + for _, attr := range attrs { + switch attr.Attr.Type { + case syscall.RTA_GATEWAY: + route.Gw = net.IP(attr.Value) + case syscall.RTA_PREFSRC: + route.Src = net.IP(attr.Value) + case syscall.RTA_DST: + route.Dst = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), + } + case syscall.RTA_OIF: + routeIndex := int(native.Uint32(attr.Value[0:4])) + route.LinkIndex = routeIndex + } + } + return route, nil +} + // RouteGet gets a route to a specific destination from the host system. // Equivalent to: 'ip route get'. func RouteGet(destination net.IP) ([]Route, error) { @@ -192,34 +203,47 @@ func RouteGet(destination net.IP) ([]Route, error) { return nil, err } - native := nl.NativeEndian() - res := make([]Route, 0) + var res []Route for _, m := range msgs { - msg := nl.DeserializeRtMsg(m) - attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + route, err := deserializeRoute(m) if err != nil { return nil, err } - - route := Route{} - for _, attr := range attrs { - switch attr.Attr.Type { - case syscall.RTA_GATEWAY: - route.Gw = net.IP(attr.Value) - case syscall.RTA_PREFSRC: - route.Src = net.IP(attr.Value) - case syscall.RTA_DST: - route.Dst = &net.IPNet{ - IP: attr.Value, - Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), - } - case syscall.RTA_OIF: - routeIndex := int(native.Uint32(attr.Value[0:4])) - route.LinkIndex = routeIndex - } - } res = append(res, route) } return res, nil } + +// RouteSubscribe takes a chan down which notifications will be sent +// when routes are added or deleted. Close the 'done' chan to stop subscription. +func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error { + s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE) + if err != nil { + return err + } + if done != nil { + go func() { + <-done + s.Close() + }() + } + go func() { + defer close(ch) + for { + msgs, err := s.Receive() + if err != nil { + return + } + for _, m := range msgs { + route, err := deserializeRoute(m.Data) + if err != nil { + return + } + ch <- RouteUpdate{Type: m.Header.Type, Route: route} + } + } + }() + + return nil +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/route_test.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/route_test.go index f02bef8c..3d170ccb 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/route_test.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/route_test.go @@ -2,7 +2,9 @@ package netlink import ( "net" + "syscall" "testing" + "time" ) func TestRouteAddDel(t *testing.T) { @@ -82,3 +84,63 @@ func TestRouteAddIncomplete(t *testing.T) { t.Fatal("Adding incomplete route should fail") } } + +func expectRouteUpdate(ch <-chan RouteUpdate, t uint16, dst net.IP) bool { + for { + timeout := time.After(time.Minute) + select { + case update := <-ch: + if update.Type == t && update.Route.Dst.IP.Equal(dst) { + return true + } + case <-timeout: + return false + } + } +} + +func TestRouteSubscribe(t *testing.T) { + tearDown := setUpNetlinkTest(t) + defer tearDown() + + ch := make(chan RouteUpdate) + done := make(chan struct{}) + defer close(done) + if err := RouteSubscribe(ch, done); err != nil { + t.Fatal(err) + } + + // get loopback interface + link, err := LinkByName("lo") + if err != nil { + t.Fatal(err) + } + + // bring the interface up + if err = LinkSetUp(link); err != nil { + t.Fatal(err) + } + + // add a gateway route + _, dst, err := net.ParseCIDR("192.168.0.0/24") + + ip := net.ParseIP("127.1.1.1") + route := Route{LinkIndex: link.Attrs().Index, Dst: dst, Src: ip} + err = RouteAdd(&route) + if err != nil { + t.Fatal(err) + } + + if !expectRouteUpdate(ch, syscall.RTM_NEWROUTE, dst.IP) { + t.Fatal("Add update not received as expected") + } + + err = RouteDel(&route) + if err != nil { + t.Fatal(err) + } + + if !expectRouteUpdate(ch, syscall.RTM_DELROUTE, dst.IP) { + t.Fatal("Del update not received as expected") + } +} diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_linux.go index 6fe1b637..2daf6dc8 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_policy_linux.go @@ -84,7 +84,7 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) { return nil, err } - res := make([]XfrmPolicy, 0) + var res []XfrmPolicy for _, m := range msgs { msg := nl.DeserializeXfrmUserpolicyInfo(m) diff --git a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_linux.go b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_linux.go index 0f1fbd0e..5f44ec85 100644 --- a/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_linux.go +++ b/Godeps/_workspace/src/github.com/vishvananda/netlink/xfrm_state_linux.go @@ -118,7 +118,7 @@ func XfrmStateList(family int) ([]XfrmState, error) { return nil, err } - res := make([]XfrmState, 0) + var res []XfrmState for _, m := range msgs { msg := nl.DeserializeXfrmUsersaInfo(m) From 7d8d6b2a7e3cf13b2137a6a771d1a4d6a1fcdfbd Mon Sep 17 00:00:00 2001 From: Eugene Yakubovich Date: Thu, 17 Sep 2015 11:49:21 -0700 Subject: [PATCH 2/5] Use single IP on the host for ptp veths Instead of allocating a /31 for each container, use the same IP on the host side for all veths. This is very similar how real point-to-point devices work (using donor IPs). --- plugins/main/ptp/ptp.go | 64 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/plugins/main/ptp/ptp.go b/plugins/main/ptp/ptp.go index f14b55ea..58d52ef9 100644 --- a/plugins/main/ptp/ptp.go +++ b/plugins/main/ptp/ptp.go @@ -46,6 +46,17 @@ type NetConf struct { } func setupContainerVeth(netns, ifName string, mtu int, pr *types.Result) (string, error) { + // The IPAM result will be something like IP=192.168.3.5/24, GW=192.168.3.1. + // What we want is really a point-to-point link but veth does not support IFF_POINTOPONT. + // Next best thing would be to let it ARP but set interface to 192.168.3.5/32 and + // add a route like "192.168.3.0/24 via 192.168.3.1 dev $ifName". + // Unfortunately that won't work as the GW will be outside the interface's subnet. + + // Our solution is to configure the interface with 192.168.3.5/24, then delete the + // "192.168.3.0/24 dev $ifName" route that was automatically added. Then we add + // "192.168.3.1/32 dev $ifName" and "192.168.3.0/24 via 192.168.3.1 dev $ifName". + // In other words we force all traffic to ARP via the gateway except for GW itself. + var hostVethName string err := ns.WithNetNSPath(netns, false, func(hostNS *os.File) error { hostVeth, _, err := ip.SetupVeth(ifName, mtu, hostNS) @@ -53,11 +64,56 @@ func setupContainerVeth(netns, ifName string, mtu int, pr *types.Result) (string return err } - err = ipam.ConfigureIface(ifName, pr) + if err = ipam.ConfigureIface(ifName, pr); err != nil { + return err + } + + contVeth, err := netlink.LinkByName(ifName) if err != nil { return err } + // Delete the route that was automatically added + route := netlink.Route{ + LinkIndex: contVeth.Attrs().Index, + Dst: &net.IPNet{ + IP: pr.IP4.IP.IP.Mask(pr.IP4.IP.Mask), + Mask: pr.IP4.IP.Mask, + }, + Scope: netlink.SCOPE_LINK, + Src: pr.IP4.IP.IP, + } + + if err := netlink.RouteDel(&route); err != nil { + return err + } + + for _, r := range []netlink.Route{ + netlink.Route{ + LinkIndex: contVeth.Attrs().Index, + Dst: &net.IPNet{ + IP: pr.IP4.Gateway, + Mask: net.CIDRMask(32, 32), + }, + Scope: netlink.SCOPE_LINK, + Src: pr.IP4.IP.IP, + }, + netlink.Route{ + LinkIndex: contVeth.Attrs().Index, + Dst: &net.IPNet{ + IP: pr.IP4.IP.IP.Mask(pr.IP4.IP.Mask), + Mask: pr.IP4.IP.Mask, + }, + Scope: netlink.SCOPE_UNIVERSE, + Gw: pr.IP4.Gateway, + Src: pr.IP4.IP.IP, + }, + } { + if err := netlink.RouteAdd(&r); err != nil { + return err + } + } + hostVethName = hostVeth.Attrs().Name return nil @@ -75,13 +131,17 @@ func setupHostVeth(vethName string, ipConf *types.IPConfig) error { // TODO(eyakubovich): IPv6 ipn := &net.IPNet{ IP: ipConf.Gateway, - Mask: net.CIDRMask(31, 32), + Mask: net.CIDRMask(32, 32), } addr := &netlink.Addr{IPNet: ipn, Label: ""} if err = netlink.AddrAdd(veth, addr); err != nil { return fmt.Errorf("failed to add IP addr (%#v) to veth: %v", ipn, err) } + ipn = &net.IPNet{ + IP: ipConf.IP.IP, + Mask: net.CIDRMask(32, 32), + } // dst happens to be the same as IP/net of host veth if err = ip.AddHostRoute(ipn, nil, veth); err != nil && !os.IsExist(err) { return fmt.Errorf("failed to add route on host: %v", err) From 60be55a7d1ea49913cb3953b60d1eb5aabd879f1 Mon Sep 17 00:00:00 2001 From: Eugene Yakubovich Date: Thu, 17 Sep 2015 14:56:13 -0700 Subject: [PATCH 3/5] remove host-local-ptp plugin --- build | 4 -- plugins/ipam/host-local/allocator.go | 56 ---------------------------- plugins/ipam/host-local/main.go | 14 +------ 3 files changed, 1 insertion(+), 73 deletions(-) diff --git a/build b/build index 99f42fb4..e2bb47ab 100755 --- a/build +++ b/build @@ -27,7 +27,3 @@ for d in $PLUGINS; do go install ${REPO_PATH}/$d fi done - -if [ ! -h $GOBIN/host-local-ptp ]; then - ln -s host-local $GOBIN/host-local-ptp -fi diff --git a/plugins/ipam/host-local/allocator.go b/plugins/ipam/host-local/allocator.go index 56c79479..9756a117 100644 --- a/plugins/ipam/host-local/allocator.go +++ b/plugins/ipam/host-local/allocator.go @@ -133,50 +133,6 @@ func (a *IPAllocator) Get(id string) (*types.IPConfig, error) { return nil, fmt.Errorf("no IP addresses available in network: %s", a.conf.Name) } -// Allocates both an IP and the Gateway IP, i.e. a /31 -// This is used for Point-to-Point links -func (a *IPAllocator) GetPtP(id string) (*types.IPConfig, error) { - a.store.Lock() - defer a.store.Unlock() - - for cur := a.start; !cur.Equal(a.end); cur = ip.NextIP(cur) { - // we're looking for unreserved even, odd pair - if !evenIP(cur) { - continue - } - - gw := cur - reserved, err := a.store.Reserve(id, gw) - if err != nil { - return nil, err - } - if reserved { - cur = ip.NextIP(cur) - if cur.Equal(a.end) { - break - } - - reserved, err := a.store.Reserve(id, cur) - if err != nil { - return nil, err - } - if reserved { - // found them both! - _, bits := a.conf.Subnet.Mask.Size() - mask := net.CIDRMask(bits-1, bits) - - return &types.IPConfig{ - IP: net.IPNet{cur, mask}, - Gateway: gw, - Routes: a.conf.Routes, - }, nil - } - } - } - - return nil, fmt.Errorf("no ip addresses available in network: %s", a.conf.Name) -} - // Releases all IPs allocated for the container with given ID func (a *IPAllocator) Release(id string) error { a.store.Lock() @@ -204,15 +160,3 @@ func networkRange(ipnet *net.IPNet) (net.IP, net.IP, error) { } return ipnet.IP, end, nil } - -func evenIP(ip net.IP) bool { - i := ip.To4() - if i == nil { - i = ip.To16() - if i == nil { - panic("IP is not v4 or v6") - } - } - - return i[len(i)-1]%2 == 0 -} diff --git a/plugins/ipam/host-local/main.go b/plugins/ipam/host-local/main.go index 18c97561..06c95c9c 100644 --- a/plugins/ipam/host-local/main.go +++ b/plugins/ipam/host-local/main.go @@ -15,8 +15,6 @@ package main import ( - "errors" - "github.com/appc/cni/plugins/ipam/host-local/backend/disk" "github.com/appc/cni/pkg/skel" @@ -51,17 +49,7 @@ func cmdAdd(args *skel.CmdArgs) error { return err } - var ipConf *types.IPConfig - - switch ipamConf.Type { - case "host-local": - ipConf, err = allocator.Get(args.ContainerID) - case "host-local-ptp": - ipConf, err = allocator.GetPtP(args.ContainerID) - default: - return errors.New("Unsupported IPAM plugin type") - } - + ipConf, err := allocator.Get(args.ContainerID) if err != nil { return err } From 6737bc8207fd58727a46bba2cfb74f4e9391ad4f Mon Sep 17 00:00:00 2001 From: Stefan Junker Date: Wed, 23 Sep 2015 22:11:25 +0200 Subject: [PATCH 4/5] update ptp docs to reflect changes in plugin --- Documentation/ptp.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Documentation/ptp.md b/Documentation/ptp.md index a3270bcf..e7f2ffe7 100644 --- a/Documentation/ptp.md +++ b/Documentation/ptp.md @@ -3,10 +3,8 @@ ## Overview The ptp plugin creates a point-to-point link between a container and the host by using a veth device. One end of the veth pair is placed inside a container and the other end resides on the host. -Both ends receive an IP address out of a /31 range. -The IP of the host end becomes the gateway address inside the container. - -Because ptp plugin requires a pair of IP addresses for each container, it should be used in conjunction with host-local-ptp IPAM plugin. +The host-local IPAM plugin can be used to allocate an IP address to the container. +The traffic of the container interface will be routed through the interface of the host. ## Example network configuration ``` @@ -14,7 +12,7 @@ Because ptp plugin requires a pair of IP addresses for each container, it should "name": "mynet", "type": "ptp", "ipam": { - "type": "host-local-ptp", + "type": "host-local", "subnet": "10.1.1.0/24" } } From 231d2d5a27f3ba54219c3f0b1c8ef2c5dab4faaf Mon Sep 17 00:00:00 2001 From: Stefan Junker Date: Wed, 23 Sep 2015 11:03:22 +0200 Subject: [PATCH 5/5] plugins/ptp: allow host veth to be UP --- pkg/ip/link.go | 10 +++------- plugins/main/ptp/ptp.go | 4 ++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/ip/link.go b/pkg/ip/link.go index 3936ed9d..e97dcd28 100644 --- a/pkg/ip/link.go +++ b/pkg/ip/link.go @@ -77,8 +77,9 @@ func RandomVethName() (string, error) { return fmt.Sprintf("veth%x", entropy), nil } -// SetupVeth sets up a virtual ethernet link. -// Should be in container netns. +// SetupVeth creates the virtual ethernet pair and sets up the container's end in the container netns. +// Setting up the host end up has to be done in the host netns outside of this function. +// This is because moving the host veth end will cause it to be brought down automatically when it is moved to the host netns. func SetupVeth(contVethName string, mtu int, hostNS *os.File) (hostVeth, contVeth netlink.Link, err error) { var hostVethName string hostVethName, contVeth, err = makeVeth(contVethName, mtu) @@ -97,11 +98,6 @@ func SetupVeth(contVethName string, mtu int, hostNS *os.File) (hostVeth, contVet return } - if err = netlink.LinkSetUp(hostVeth); err != nil { - err = fmt.Errorf("failed to set %q up: %v", contVethName, err) - return - } - if err = netlink.LinkSetNsFd(hostVeth, int(hostNS.Fd())); err != nil { err = fmt.Errorf("failed to move veth to host netns: %v", err) return diff --git a/plugins/main/ptp/ptp.go b/plugins/main/ptp/ptp.go index 58d52ef9..c91418aa 100644 --- a/plugins/main/ptp/ptp.go +++ b/plugins/main/ptp/ptp.go @@ -128,6 +128,10 @@ func setupHostVeth(vethName string, ipConf *types.IPConfig) error { return fmt.Errorf("failed to lookup %q: %v", vethName, err) } + if err = netlink.LinkSetUp(veth); err != nil { + return fmt.Errorf("failed to set %q up: %v", vethName, err) + } + // TODO(eyakubovich): IPv6 ipn := &net.IPNet{ IP: ipConf.Gateway,