From f233410d207c2985dd8a34c18a8c140200134d31 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 13 Jan 2015 17:39:32 -0800 Subject: [PATCH 1/9] update kernel headers to 3.19 net-next --- include/linux/if_bridge.h | 2 ++ include/linux/l2tp.h | 1 + include/linux/libc-compat.h | 6 ++++++ include/linux/rtnetlink.h | 3 +++ 4 files changed, 12 insertions(+) diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 19ff22af..913bd8e3 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -125,6 +125,8 @@ enum { #define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ #define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */ #define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */ +#define BRIDGE_VLAN_INFO_RANGE_BEGIN (1<<3) /* VLAN is start of vlan range */ +#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */ struct bridge_vlan_info { __u16 flags; diff --git a/include/linux/l2tp.h b/include/linux/l2tp.h index c0e116a4..5b0e36d0 100644 --- a/include/linux/l2tp.h +++ b/include/linux/l2tp.h @@ -176,5 +176,6 @@ enum l2tp_seqmode { */ #define L2TP_GENL_NAME "l2tp" #define L2TP_GENL_VERSION 0x1 +#define L2TP_GENL_MCGROUP "l2tp" #endif /* _LINUX_L2TP_H_ */ diff --git a/include/linux/libc-compat.h b/include/linux/libc-compat.h index 9e860a0c..990332e0 100644 --- a/include/linux/libc-compat.h +++ b/include/linux/libc-compat.h @@ -70,6 +70,8 @@ #define __UAPI_DEF_IPV6_MREQ 0 #define __UAPI_DEF_IPPROTO_V6 0 #define __UAPI_DEF_IPV6_OPTIONS 0 +#define __UAPI_DEF_IN6_PKTINFO 0 +#define __UAPI_DEF_IP6_MTUINFO 0 #else @@ -84,6 +86,8 @@ #define __UAPI_DEF_IPV6_MREQ 1 #define __UAPI_DEF_IPPROTO_V6 1 #define __UAPI_DEF_IPV6_OPTIONS 1 +#define __UAPI_DEF_IN6_PKTINFO 1 +#define __UAPI_DEF_IP6_MTUINFO 1 #endif /* _NETINET_IN_H */ @@ -106,6 +110,8 @@ #define __UAPI_DEF_IPV6_MREQ 1 #define __UAPI_DEF_IPPROTO_V6 1 #define __UAPI_DEF_IPV6_OPTIONS 1 +#define __UAPI_DEF_IN6_PKTINFO 1 +#define __UAPI_DEF_IP6_MTUINFO 1 /* Definitions for xattr.h */ #define __UAPI_DEF_XATTR 1 diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9aa5c2f9..9111053f 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -389,6 +389,8 @@ enum { #define RTAX_INITRWND RTAX_INITRWND RTAX_QUICKACK, #define RTAX_QUICKACK RTAX_QUICKACK + RTAX_CC_ALGO, +#define RTAX_CC_ALGO RTAX_CC_ALGO __RTAX_MAX }; @@ -632,6 +634,7 @@ struct tcamsg { /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) #define RTEXT_FILTER_BRVLAN (1 << 1) +#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) /* End of information exported to user level */ From 6ef87f9cce213cae66098d08e0abc36d67b95244 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Jan 2015 00:13:06 +0100 Subject: [PATCH 2/9] ip: route: add congestion control metric This patch adds configuration and dumping of congestion control metric for ip route, for example: ip route add dev foo congctl [lock] dctcp Reference: http://thread.gmane.org/gmane.linux.network/344733 Signed-off-by: Daniel Borkmann --- ip/iproute.c | 22 ++++++++++++++++++---- man/man8/ip-route.8.in | 19 ++++++++++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/ip/iproute.c b/ip/iproute.c index 5a496a91..76d8e36c 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -53,6 +53,7 @@ static const char *mx_names[RTAX_MAX+1] = { [RTAX_RTO_MIN] = "rto_min", [RTAX_INITRWND] = "initrwnd", [RTAX_QUICKACK] = "quickack", + [RTAX_CC_ALGO] = "congctl", }; static void usage(void) __attribute__((noreturn)); @@ -80,8 +81,7 @@ static void usage(void) fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n"); fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n"); fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n"); - fprintf(stderr, " [ features FEATURES ]\n"); - fprintf(stderr, " [ quickack BOOL ]\n"); + fprintf(stderr, " [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n"); fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n"); fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n"); fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n"); @@ -536,7 +536,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) mxlock = *(unsigned*)RTA_DATA(mxrta[RTAX_LOCK]); for (i=2; i<= RTAX_MAX; i++) { - unsigned val; + __u32 val; if (mxrta[i] == NULL) continue; @@ -545,10 +545,12 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) fprintf(fp, " %s", mx_names[i]); else fprintf(fp, " metric %d", i); + if (mxlock & (1< Date: Thu, 5 Feb 2015 10:20:58 -0800 Subject: [PATCH 3/9] update kernel headers based on net-next 3.21 Pull in headers from later tree --- include/linux/if_link.h | 4 ++++ include/linux/neighbour.h | 1 + include/linux/rtnetlink.h | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 167ec34b..ac64724c 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -146,6 +146,7 @@ enum { IFLA_PHYS_PORT_ID, IFLA_CARRIER_CHANGES, IFLA_PHYS_SWITCH_ID, + IFLA_LINK_NETNSID, __IFLA_MAX }; @@ -368,6 +369,9 @@ enum { IFLA_VXLAN_UDP_CSUM, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, + IFLA_VXLAN_REMCSUM_TX, + IFLA_VXLAN_REMCSUM_RX, + IFLA_VXLAN_GBP, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index f3d77f9f..3873a355 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -25,6 +25,7 @@ enum { NDA_VNI, NDA_IFINDEX, NDA_MASTER, + NDA_LINK_NETNSID, __NDA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9111053f..3eb78105 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -132,6 +132,11 @@ enum { RTM_GETMDB = 86, #define RTM_GETMDB RTM_GETMDB + RTM_NEWNSID = 88, +#define RTM_NEWNSID RTM_NEWNSID + RTM_GETNSID = 90, +#define RTM_GETNSID RTM_GETNSID + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; From 2eb90dc7622caab72a84897592bbc02375cdd4f0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 15 Jan 2015 14:54:25 +0100 Subject: [PATCH 4/9] vxlan: Group policy extension Signed-off-by: Thomas Graf --- ip/iplink_vxlan.c | 11 +++++++++++ man/man8/ip-link.8.in | 45 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c index 9cc3ec38..473ff97a 100644 --- a/ip/iplink_vxlan.c +++ b/ip/iplink_vxlan.c @@ -30,6 +30,7 @@ static void print_explain(FILE *f) fprintf(f, " [ [no]l2miss ] [ [no]l3miss ]\n"); fprintf(f, " [ ageing SECONDS ] [ maxaddress NUMBER ]\n"); fprintf(f, " [ [no]udpcsum ] [ [no]udp6zerocsumtx ] [ [no]udp6zerocsumrx ]\n"); + fprintf(f, " [ gbp ]\n"); fprintf(f, "\n"); fprintf(f, "Where: VNI := 0-16777215\n"); fprintf(f, " ADDR := { IP_ADDRESS | any }\n"); @@ -68,6 +69,7 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, __u8 udpcsum = 0; __u8 udp6zerocsumtx = 0; __u8 udp6zerocsumrx = 0; + __u8 gbp = 0; int dst_port_set = 0; struct ifla_vxlan_port_range range = { 0, 0 }; @@ -197,6 +199,8 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, udp6zerocsumrx = 1; } else if (!matches(*argv, "noudp6zerocsumrx")) { udp6zerocsumrx = 0; + } else if (!matches(*argv, "gbp")) { + gbp = 1; } else if (matches(*argv, "help") == 0) { explain(); return -1; @@ -268,6 +272,10 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, if (dstport) addattr16(n, 1024, IFLA_VXLAN_PORT, htons(dstport)); + if (gbp) + addattr_l(n, 1024, IFLA_VXLAN_GBP, NULL, 0); + + return 0; } @@ -398,6 +406,9 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (tb[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] && rta_getattr_u8(tb[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) fputs("udp6zerocsumrx ", f); + + if (tb[IFLA_VXLAN_GBP]) + fputs("gbp ", f); } static void vxlan_print_help(struct link_util *lu, int argc, char **argv, diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 313d6f23..8e312978 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -276,6 +276,8 @@ the following additional arguments are supported: .BI ageing " SECONDS " .R " ] [ " .BI maxaddress " NUMBER " +.R " ] [ " +.B gbp .R " ]" .in +8 @@ -348,6 +350,49 @@ are entered into the VXLAN device forwarding database. .BI maxaddress " NUMBER" - specifies the maximum number of FDB entries. +.sp +.B gbp +- enables the Group Policy extension (VXLAN-GBP). + +.in +4 +Allows to transport group policy context across VXLAN network peers. +If enabled, includes the mark of a packet in the VXLAN header for outgoing +packets and fills the packet mark based on the information found in the +VXLAN header for incomming packets. + +Format of upper 16 bits of packet mark (flags); + +.in +2 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +.br +|-|-|-|-|-|-|-|-|-|D|-|-|A|-|-|-| +.br ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +.B D := +Don't Learn bit. When set, this bit indicates that the egress +VTEP MUST NOT learn the source address of the encapsulated frame. + +.B A := +Indicates that the group policy has already been applied to +this packet. Policies MUST NOT be applied by devices when the A bit is set. +.in -2 + +Format of lower 16 bits of packet mark (policy ID): + +.in +2 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +.br +| Group Policy ID | +.br ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +.in -2 + +Example: + iptables -A OUTPUT [...] -j MARK --set-mark 0x800FF + +.in -4 + .in -8 .TP From 1d129d191a3a632e05cf440c15aaffe23e0fa798 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 19 Jan 2015 16:56:29 +0100 Subject: [PATCH 5/9] tc: push bpf common code into separate file Signed-off-by: Jiri Pirko --- tc/Makefile | 2 +- tc/f_bpf.c | 136 ++++-------------------------------------------- tc/tc_bpf.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++ tc/tc_bpf.h | 28 ++++++++++ 4 files changed, 186 insertions(+), 126 deletions(-) create mode 100644 tc/tc_bpf.c create mode 100644 tc/tc_bpf.h diff --git a/tc/Makefile b/tc/Makefile index 9412094f..15f68ce0 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -1,5 +1,5 @@ TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o \ - tc_monitor.o m_police.o m_estimator.o m_action.o \ + tc_monitor.o tc_bpf.o m_police.o m_estimator.o m_action.o \ m_ematch.o emp_ematch.yacc.o emp_ematch.lex.o include ../Config diff --git a/tc/f_bpf.c b/tc/f_bpf.c index 48635a70..e2af94e3 100644 --- a/tc/f_bpf.c +++ b/tc/f_bpf.c @@ -26,6 +26,7 @@ #include "utils.h" #include "tc_util.h" +#include "tc_bpf.h" static void explain(void) { @@ -44,130 +45,6 @@ static void explain(void) fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n"); } -static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, - char **bpf_string, bool *need_release, - const char separator) -{ - char sp; - - if (from_file) { - size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); - char *tmp_string; - FILE *fp; - - tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; - tmp_string = malloc(tmp_len); - if (tmp_string == NULL) - return -ENOMEM; - - memset(tmp_string, 0, tmp_len); - - fp = fopen(arg, "r"); - if (fp == NULL) { - perror("Cannot fopen"); - free(tmp_string); - return -ENOENT; - } - - if (!fgets(tmp_string, tmp_len, fp)) { - free(tmp_string); - fclose(fp); - return -EIO; - } - - fclose(fp); - - *need_release = true; - *bpf_string = tmp_string; - } else { - *need_release = false; - *bpf_string = arg; - } - - if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || - sp != separator) { - if (*need_release) - free(*bpf_string); - return -EINVAL; - } - - return 0; -} - -static int bpf_parse_ops(int argc, char **argv, struct nlmsghdr *n, - bool from_file) -{ - char *bpf_string, *token, separator = ','; - struct sock_filter bpf_ops[BPF_MAXINSNS]; - int ret = 0, i = 0; - bool need_release; - __u16 bpf_len = 0; - - if (argc < 1) - return -EINVAL; - if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, - &need_release, separator)) - return -EINVAL; - if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { - ret = -EINVAL; - goto out; - } - - token = bpf_string; - while ((token = strchr(token, separator)) && (++token)[0]) { - if (i >= bpf_len) { - fprintf(stderr, "Real program length exceeds encoded " - "length parameter!\n"); - ret = -EINVAL; - goto out; - } - - if (sscanf(token, "%hu %hhu %hhu %u,", - &bpf_ops[i].code, &bpf_ops[i].jt, - &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { - fprintf(stderr, "Error at instruction %d!\n", i); - ret = -EINVAL; - goto out; - } - - i++; - } - - if (i != bpf_len) { - fprintf(stderr, "Parsed program length is less than encoded" - "length parameter!\n"); - ret = -EINVAL; - goto out; - } - - addattr_l(n, MAX_MSG, TCA_BPF_OPS_LEN, &bpf_len, sizeof(bpf_len)); - addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, - bpf_len * sizeof(struct sock_filter)); -out: - if (need_release) - free(bpf_string); - - return ret; -} - -static void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) -{ - struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); - int i; - - if (len == 0) - return; - - fprintf(f, "bytecode \'%u,", len); - - for (i = 0; i < len - 1; i++) - fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, - ops[i].jf, ops[i].k); - - fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt, - ops[i].jf, ops[i].k); -} - static int bpf_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n) { @@ -195,6 +72,10 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle, while (argc > 0) { if (matches(*argv, "run") == 0) { bool from_file; + struct sock_filter bpf_ops[BPF_MAXINSNS]; + __u16 bpf_len; + int ret; + NEXT_ARG(); if (strcmp(*argv, "bytecode-file") == 0) { from_file = true; @@ -206,10 +87,15 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle, return -1; } NEXT_ARG(); - if (bpf_parse_ops(argc, argv, n, from_file)) { + ret = bpf_parse_ops(argc, argv, bpf_ops, from_file); + if (ret < 0) { fprintf(stderr, "Illegal \"bytecode\"\n"); return -1; } + bpf_len = ret; + addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, bpf_len); + addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, + bpf_len * sizeof(struct sock_filter)); } else if (matches(*argv, "classid") == 0 || strcmp(*argv, "flowid") == 0) { unsigned handle; diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c new file mode 100644 index 00000000..c6901d6c --- /dev/null +++ b/tc/tc_bpf.c @@ -0,0 +1,146 @@ +/* + * tc_bpf.c BPF common code + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Daniel Borkmann + * Jiri Pirko + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" +#include "tc_bpf.h" + +int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator) +{ + char sp; + + if (from_file) { + size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); + char *tmp_string; + FILE *fp; + + tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; + tmp_string = malloc(tmp_len); + if (tmp_string == NULL) + return -ENOMEM; + + memset(tmp_string, 0, tmp_len); + + fp = fopen(arg, "r"); + if (fp == NULL) { + perror("Cannot fopen"); + free(tmp_string); + return -ENOENT; + } + + if (!fgets(tmp_string, tmp_len, fp)) { + free(tmp_string); + fclose(fp); + return -EIO; + } + + fclose(fp); + + *need_release = true; + *bpf_string = tmp_string; + } else { + *need_release = false; + *bpf_string = arg; + } + + if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || + sp != separator) { + if (*need_release) + free(*bpf_string); + return -EINVAL; + } + + return 0; +} + +int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, + bool from_file) +{ + char *bpf_string, *token, separator = ','; + int ret = 0, i = 0; + bool need_release; + __u16 bpf_len = 0; + + if (argc < 1) + return -EINVAL; + if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, + &need_release, separator)) + return -EINVAL; + if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { + ret = -EINVAL; + goto out; + } + + token = bpf_string; + while ((token = strchr(token, separator)) && (++token)[0]) { + if (i >= bpf_len) { + fprintf(stderr, "Real program length exceeds encoded " + "length parameter!\n"); + ret = -EINVAL; + goto out; + } + + if (sscanf(token, "%hu %hhu %hhu %u,", + &bpf_ops[i].code, &bpf_ops[i].jt, + &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { + fprintf(stderr, "Error at instruction %d!\n", i); + ret = -EINVAL; + goto out; + } + + i++; + } + + if (i != bpf_len) { + fprintf(stderr, "Parsed program length is less than encoded" + "length parameter!\n"); + ret = -EINVAL; + goto out; + } + ret = bpf_len; + +out: + if (need_release) + free(bpf_string); + + return ret; +} + +void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) +{ + struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); + int i; + + if (len == 0) + return; + + fprintf(f, "bytecode \'%u,", len); + + for (i = 0; i < len - 1; i++) + fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, + ops[i].jf, ops[i].k); + + fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt, + ops[i].jf, ops[i].k); +} diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h new file mode 100644 index 00000000..08cca927 --- /dev/null +++ b/tc/tc_bpf.h @@ -0,0 +1,28 @@ +/* + * tc_bpf.h BPF common code + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Daniel Borkmann + * Jiri Pirko + */ + +#ifndef _TC_BPF_H_ +#define _TC_BPF_H_ 1 + +#include +#include +#include +#include + +int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator); +int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, + bool from_file); +void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len); + +#endif From 86ab59a6660f12302049cb3ad88fb2c2c9a716f2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 19 Jan 2015 16:56:30 +0100 Subject: [PATCH 6/9] tc: add support for BPF based actions Signed-off-by: Jiri Pirko --- include/linux/tc_act/tc_bpf.h | 31 ++++++ tc/Makefile | 1 + tc/m_bpf.c | 183 ++++++++++++++++++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 include/linux/tc_act/tc_bpf.h create mode 100644 tc/m_bpf.c diff --git a/include/linux/tc_act/tc_bpf.h b/include/linux/tc_act/tc_bpf.h new file mode 100644 index 00000000..5288bd77 --- /dev/null +++ b/include/linux/tc_act/tc_bpf.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_BPF_H +#define __LINUX_TC_BPF_H + +#include + +#define TCA_ACT_BPF 13 + +struct tc_act_bpf { + tc_gen; +}; + +enum { + TCA_ACT_BPF_UNSPEC, + TCA_ACT_BPF_TM, + TCA_ACT_BPF_PARMS, + TCA_ACT_BPF_OPS_LEN, + TCA_ACT_BPF_OPS, + __TCA_ACT_BPF_MAX, +}; +#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) + +#endif diff --git a/tc/Makefile b/tc/Makefile index 15f68ce0..d831a153 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -46,6 +46,7 @@ TCMODULES += m_skbedit.o TCMODULES += m_csum.o TCMODULES += m_simple.o TCMODULES += m_vlan.o +TCMODULES += m_bpf.o TCMODULES += p_ip.o TCMODULES += p_icmp.o TCMODULES += p_tcp.o diff --git a/tc/m_bpf.c b/tc/m_bpf.c new file mode 100644 index 00000000..611135ea --- /dev/null +++ b/tc/m_bpf.c @@ -0,0 +1,183 @@ +/* + * m_bpf.c BFP based action module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Jiri Pirko + */ + +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "rt_names.h" +#include "tc_util.h" +#include "tc_bpf.h" + +static void explain(void) +{ + fprintf(stderr, "Usage: ... bpf ...\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n"); + fprintf(stderr, " [from file]: run bytecode-file FILE\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n"); + fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); + fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n"); + fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n"); + fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n"); +} + +static void usage(void) +{ + explain(); + exit(-1); +} + +static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p, + int tca_id, struct nlmsghdr *n) +{ + int argc = *argc_p; + char **argv = *argv_p; + struct rtattr *tail; + struct tc_act_bpf parm = { 0 }; + struct sock_filter bpf_ops[BPF_MAXINSNS]; + __u16 bpf_len = 0; + + if (matches(*argv, "bpf") != 0) + return -1; + + NEXT_ARG(); + + while (argc > 0) { + if (matches(*argv, "run") == 0) { + bool from_file; + int ret; + + NEXT_ARG(); + if (strcmp(*argv, "bytecode-file") == 0) { + from_file = true; + } else if (strcmp(*argv, "bytecode") == 0) { + from_file = false; + } else { + fprintf(stderr, "unexpected \"%s\"\n", *argv); + explain(); + return -1; + } + NEXT_ARG(); + ret = bpf_parse_ops(argc, argv, bpf_ops, from_file); + if (ret < 0) { + fprintf(stderr, "Illegal \"bytecode\"\n"); + return -1; + } + bpf_len = ret; + } else if (matches(*argv, "help") == 0) { + usage(); + } else { + break; + } + argc--; + argv++; + } + + parm.action = TC_ACT_PIPE; + if (argc) { + if (matches(*argv, "reclassify") == 0) { + parm.action = TC_ACT_RECLASSIFY; + NEXT_ARG(); + } else if (matches(*argv, "pipe") == 0) { + parm.action = TC_ACT_PIPE; + NEXT_ARG(); + } else if (matches(*argv, "drop") == 0 || + matches(*argv, "shot") == 0) { + parm.action = TC_ACT_SHOT; + NEXT_ARG(); + } else if (matches(*argv, "continue") == 0) { + parm.action = TC_ACT_UNSPEC; + NEXT_ARG(); + } else if (matches(*argv, "pass") == 0) { + parm.action = TC_ACT_OK; + NEXT_ARG(); + } + } + + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); + if (get_u32(&parm.index, *argv, 10)) { + fprintf(stderr, "bpf: Illegal \"index\"\n"); + return -1; + } + argc--; + argv++; + } + } + + if (!bpf_len) { + fprintf(stderr, "bpf: Bytecode needs to be passed\n"); + explain(); + return -1; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, MAX_MSG, tca_id, NULL, 0); + addattr_l(n, MAX_MSG, TCA_ACT_BPF_PARMS, &parm, sizeof(parm)); + addattr16(n, MAX_MSG, TCA_ACT_BPF_OPS_LEN, bpf_len); + addattr_l(n, MAX_MSG, TCA_ACT_BPF_OPS, &bpf_ops, + bpf_len * sizeof(struct sock_filter)); + tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail; + + *argc_p = argc; + *argv_p = argv; + return 0; +} + +static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg) +{ + struct rtattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + + if (arg == NULL) + return -1; + + parse_rtattr_nested(tb, TCA_ACT_BPF_MAX, arg); + + if (!tb[TCA_ACT_BPF_PARMS]) { + fprintf(f, "[NULL bpf parameters]"); + return -1; + } + parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]); + + fprintf(f, " bpf "); + + if (tb[TCA_ACT_BPF_OPS] && tb[TCA_ACT_BPF_OPS_LEN]) + bpf_print_ops(f, tb[TCA_ACT_BPF_OPS], + rta_getattr_u16(tb[TCA_ACT_BPF_OPS_LEN])); + + fprintf(f, "\n\tindex %d ref %d bind %d", parm->index, parm->refcnt, + parm->bindcnt); + + if (show_stats) { + if (tb[TCA_ACT_BPF_TM]) { + struct tcf_t *tm = RTA_DATA(tb[TCA_ACT_BPF_TM]); + print_tm(f, tm); + } + } + + fprintf(f, "\n "); + + return 0; +} + +struct action_util bpf_action_util = { + .id = "bpf", + .parse_aopt = parse_bpf, + .print_aopt = print_bpf, +}; From 3ac0d36ddde9715a8d027a919eb564f7b42c6c40 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Sun, 25 Jan 2015 18:26:24 -0800 Subject: [PATCH 7/9] iproute2: bridge: support vlan range adds This patch adds vlan range support to bridge add command using the newly added vinfo flags BRIDGE_VLAN_INFO_RANGE_BEGIN and BRIDGE_VLAN_INFO_RANGE_END. $bridge vlan show port vlan ids br0 1 PVID Egress Untagged dummy0 1 PVID Egress Untagged $bridge vlan add vid 10-15 dev dummy0 port vlan ids br0 1 PVID Egress Untagged dummy0 1 PVID Egress Untagged 10 11 12 13 14 15 $bridge vlan del vid 14 dev dummy0 $bridge vlan show port vlan ids br0 1 PVID Egress Untagged dummy0 1 PVID Egress Untagged 10 11 12 13 15 $bridge vlan del vid 10-15 dev dummy0 $bridge vlan show port vlan ids br0 1 PVID Egress Untagged dummy0 1 PVID Egress Untagged Signed-off-by: Roopa Prabhu Signed-off-by: Wilson Kok --- bridge/vlan.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/bridge/vlan.c b/bridge/vlan.c index 3bd7b0db..88992e63 100644 --- a/bridge/vlan.c +++ b/bridge/vlan.c @@ -32,6 +32,7 @@ static int vlan_modify(int cmd, int argc, char **argv) } req; char *d = NULL; short vid = -1; + short vid_end = -1; struct rtattr *afspec; struct bridge_vlan_info vinfo; unsigned short flags = 0; @@ -49,8 +50,18 @@ static int vlan_modify(int cmd, int argc, char **argv) NEXT_ARG(); d = *argv; } else if (strcmp(*argv, "vid") == 0) { + char *p; NEXT_ARG(); - vid = atoi(*argv); + p = strchr(*argv, '-'); + if (p) { + *p = '\0'; + p++; + vid = atoi(*argv); + vid_end = atoi(p); + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; + } else { + vid = atoi(*argv); + } } else if (strcmp(*argv, "self") == 0) { flags |= BRIDGE_FLAGS_SELF; } else if (strcmp(*argv, "master") == 0) { @@ -83,15 +94,40 @@ static int vlan_modify(int cmd, int argc, char **argv) return -1; } - vinfo.vid = vid; + if (vinfo.flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (vid_end == -1 || vid_end >= 4096 || vid >= vid_end) { + fprintf(stderr, "Invalid VLAN range \"%hu-%hu\"\n", + vid, vid_end); + return -1; + } + if (vinfo.flags & BRIDGE_VLAN_INFO_PVID) { + fprintf(stderr, + "pvid cannot be configured for a vlan range\n"); + return -1; + } + } afspec = addattr_nest(&req.n, sizeof(req), IFLA_AF_SPEC); if (flags) addattr16(&req.n, sizeof(req), IFLA_BRIDGE_FLAGS, flags); - addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, - sizeof(vinfo)); + vinfo.vid = vid; + if (vid_end != -1) { + /* send vlan range start */ + addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, + sizeof(vinfo)); + vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; + + /* Now send the vlan range end */ + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; + vinfo.vid = vid_end; + addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, + sizeof(vinfo)); + } else { + addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, + sizeof(vinfo)); + } addattr_nest_end(&req.n, afspec); From a2f7934dd04f8e762cc7a3f5f2e7a1edd4a93643 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Sun, 25 Jan 2015 18:26:25 -0800 Subject: [PATCH 8/9] iproute2: bridge vlan show new option to print ranges Introduce new option -c[ompressvlans] to request vlan ranges from kernel (pls suggest better option names if this does not look ok) $bridge vlan show port vlan ids dummy0 1 PVID Egress Untagged dummy1 1 PVID Egress Untagged 2 3 4 5 6 7 9 10 12 br0 1 PVID Egress Untagged $bridge help Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help } where OBJECT := { link | fdb | mdb | vlan | monitor } OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -o[neline] | -t[imestamp] | -n[etns] name | -c[ompressvlans] } $bridge -c vlan show port vlan ids dummy0 1 PVID Egress Untagged dummy1 1 PVID Egress Untagged 2-7 9-10 12 br0 1 PVID Egress Untagged Signed-off-by: Roopa Prabhu --- bridge/br_common.h | 1 + bridge/bridge.c | 6 +++++- bridge/vlan.c | 11 +++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/bridge/br_common.h b/bridge/br_common.h index 12fce3ef..169a162d 100644 --- a/bridge/br_common.h +++ b/bridge/br_common.h @@ -16,4 +16,5 @@ extern int preferred_family; extern int show_stats; extern int show_details; extern int timestamp; +extern int compress_vlans; extern struct rtnl_handle rth; diff --git a/bridge/bridge.c b/bridge/bridge.c index 5fcc552b..88469ca2 100644 --- a/bridge/bridge.c +++ b/bridge/bridge.c @@ -21,6 +21,7 @@ int resolve_hosts; int oneline = 0; int show_stats; int show_details; +int compress_vlans; int timestamp; char * _SL_ = NULL; @@ -32,7 +33,8 @@ static void usage(void) "Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n" "where OBJECT := { link | fdb | mdb | vlan | monitor }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n" -" -o[neline] | -t[imestamp] | -n[etns] name }\n"); +" -o[neline] | -t[imestamp] | -n[etns] name |\n" +" -c[ompressvlans] }\n"); exit(-1); } @@ -117,6 +119,8 @@ main(int argc, char **argv) NEXT_ARG(); if (netns_switch(argv[1])) exit(-1); + } else if (matches(opt, "-compressvlans") == 0) { + ++compress_vlans; } else { fprintf(stderr, "Option \"%s\" is unknown, try \"bridge help\".\n", opt); exit(-1); diff --git a/bridge/vlan.c b/bridge/vlan.c index 88992e63..9f6c84ee 100644 --- a/bridge/vlan.c +++ b/bridge/vlan.c @@ -182,7 +182,12 @@ static int print_vlan(const struct sockaddr_nl *who, continue; vinfo = RTA_DATA(i); - fprintf(fp, "\t %hu", vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) + fprintf(fp, "-%hu", vinfo->vid); + else + fprintf(fp, "\t %hu", vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) + continue; if (vinfo->flags & BRIDGE_VLAN_INFO_PVID) fprintf(fp, " PVID"); if (vinfo->flags & BRIDGE_VLAN_INFO_UNTAGGED) @@ -218,7 +223,9 @@ static int vlan_show(int argc, char **argv) } if (rtnl_wilddump_req_filter(&rth, PF_BRIDGE, RTM_GETLINK, - RTEXT_FILTER_BRVLAN) < 0) { + (compress_vlans ? + RTEXT_FILTER_BRVLAN_COMPRESSED : + RTEXT_FILTER_BRVLAN)) < 0) { perror("Cannont send dump request"); exit(1); } From 95ce04bc86c2299ea8fd466d521ba34f930d05df Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sun, 8 Feb 2015 08:58:43 +0200 Subject: [PATCH 9/9] ss: Show stats from specified network namespace Added new '-N NSNAME, --net=NSNAME' option to show socket stats from the specified network namespace name. Signed-off-by: Vadim Kochan --- man/man8/ss.8 | 3 +++ misc/Makefile | 4 ++++ misc/ss.c | 9 ++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/man/man8/ss.8 b/man/man8/ss.8 index 450649ab..b7fbaefa 100644 --- a/man/man8/ss.8 +++ b/man/man8/ss.8 @@ -84,6 +84,9 @@ context of the creating process, however the context shown will reflect any policy role, type and/or range transition rules applied, and is therefore a useful reference. .TP +.B \-N NSNAME, \-\-net=NSNAME +Switch to the specified network namespace name. +.TP .B \-b, \-\-bpf Show socket BPF filters (only administrators are allowed to get these information). .TP diff --git a/misc/Makefile b/misc/Makefile index 004bcc3a..b7ecba90 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -10,6 +10,10 @@ ifeq ($(HAVE_SELINUX),y) CFLAGS += $(shell pkg-config --cflags libselinux) -DHAVE_SELINUX endif +ifeq ($(IP_CONFIG_SETNS),y) + CFLAGS += -DHAVE_SETNS +endif + all: $(TARGETS) ss: $(SSOBJ) diff --git a/misc/ss.c b/misc/ss.c index 7fc0a99e..0a6a65ee 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -31,6 +31,7 @@ #include "rt_names.h" #include "ll_map.h" #include "libnetlink.h" +#include "namespace.h" #include "SNAPSHOT.h" #include @@ -3246,6 +3247,7 @@ static void _usage(FILE *dest) " -b, --bpf show bpf filter socket information\n" " -Z, --context display process SELinux security contexts\n" " -z, --contexts display process and socket SELinux security contexts\n" +" -N, --net switch to the specified network namespace name\n" "\n" " -4, --ipv4 display only IP version 4 sockets\n" " -6, --ipv6 display only IP version 6 sockets\n" @@ -3345,6 +3347,7 @@ static const struct option long_opts[] = { { "help", 0, 0, 'h' }, { "context", 0, 0, 'Z' }, { "contexts", 0, 0, 'z' }, + { "net", 1, 0, 'N' }, { 0 } }; @@ -3360,7 +3363,7 @@ int main(int argc, char *argv[]) struct filter dbs_filter = {}; int state_filter = 0; - while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbf:miA:D:F:vVzZ", + while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbf:miA:D:F:vVzZN:", long_opts, NULL)) != EOF) { switch(ch) { case 'n': @@ -3532,6 +3535,10 @@ int main(int argc, char *argv[]) show_proc_ctx++; user_ent_hash_build(); break; + case 'N': + if (netns_switch(optarg)) + exit(1); + break; case 'h': case '?': help();