From d36d9d41d6e66c130b0632b82ea74c3ae4d0fbe3 Mon Sep 17 00:00:00 2001 From: Reese Moore Date: Wed, 4 Feb 2015 14:04:48 -0500 Subject: [PATCH 01/23] iproute2: ip-link.8.in: Spelling fixes In the ip-link(8) man page, for the gretap, ip6gre, and ip6gretap types, the word tunnel was incorrectly spelled 'tuunel'. Signed-off-by: Reese Moore --- man/man8/ip-link.8.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 1209b55d..313d6f23 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -221,13 +221,13 @@ Link types: - Virtual tunnel interface GRE over IPv4 .sp .BR gretap -- Virtual L2 tuunel interface GRE over IPv4 +- Virtual L2 tunnel interface GRE over IPv4 .sp .BR ip6gre -- Virtual tuunel interface GRE over IPv6 +- Virtual tunnel interface GRE over IPv6 .sp .BR ip6gretap -- Virtual L2 tuunel interface GRE over IPv6 +- Virtual L2 tunnel interface GRE over IPv6 .in -8 .TP From ffff693130e9d6e4184df27835012e46c6abd58d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 15 Jan 2015 11:36:24 +0100 Subject: [PATCH 02/23] lib: fix warning in namespace.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Warning was: In file included from bridge.c:16:0: ../include/namespace.h:33:12: warning: ‘setns’ defined but not used [-Wunused-function] CC: Vadim Kochan Fixes: eb67e4498aec ("lib: Add netns_switch func for change network namespace") Signed-off-by: Nicolas Dichtel --- include/namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/namespace.h b/include/namespace.h index b8c5cad6..28a4223b 100644 --- a/include/namespace.h +++ b/include/namespace.h @@ -30,7 +30,7 @@ #endif #ifndef HAVE_SETNS -static int setns(int fd, int nstype) +static inline int setns(int fd, int nstype) { #ifdef __NR_setns return syscall(__NR_setns, fd, nstype); From 1ff6b16e2d5b291fd3cc5405c492d9b27d596d0a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 15 Jan 2015 11:36:25 +0100 Subject: [PATCH 03/23] lib: fix setns() function when !HAVE_SETNS When HAVE_SETNS is not set, iproute2 provides a local implementation of this function based on __NR_setns. This macro is defined in sys/syscall.h, which was not included, thus the local implementation always returned -1. CC: Vadim Kochan Fixes: eb67e4498aec ("lib: Add netns_switch func for change network namespace") Signed-off-by: Nicolas Dichtel --- include/namespace.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/namespace.h b/include/namespace.h index 28a4223b..52f7fbd7 100644 --- a/include/namespace.h +++ b/include/namespace.h @@ -3,6 +3,7 @@ #include #include +#include #include #define NETNS_RUN_DIR "/var/run/netns" From f42a45747071116649d5b70d432efd2dedc280bd Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Thu, 8 Jan 2015 19:32:22 +0200 Subject: [PATCH 04/23] ss: Filter inet dgram sockets with established state by default As inet dgram sockets (udp, raw) can call connect(...) - they might be set in ESTABLISHED state. So keep the original behaviour of 'ss' which filtered them by ESTABLISHED state by default. So: $ ss -u or $ ss -w Will show only ESTABLISHED UDP sockets by default. Signed-off-by: Vadim Kochan --- misc/ss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index f434f57f..40dc1887 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -170,11 +170,11 @@ static const struct filter default_dbs[MAX_DB] = { .families = (1 << AF_INET) | (1 << AF_INET6), }, [UDP_DB] = { - .states = (1 << SS_CLOSE), + .states = (1 << SS_ESTABLISHED), .families = (1 << AF_INET) | (1 << AF_INET6), }, [RAW_DB] = { - .states = (1 << SS_CLOSE), + .states = (1 << SS_ESTABLISHED), .families = (1 << AF_INET) | (1 << AF_INET6), }, [UNIX_DG_DB] = { From 4cec9db0b4085c1b2e8072bac2d07843a9285ac1 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Thu, 15 Jan 2015 18:59:22 +0200 Subject: [PATCH 05/23] tests: Add few 'ip link' related tests Added two tests which checks the following fixed issues: 1) Bug when not possible add new virtual interface via: $ ip link add dev XXX type It was fixed a few releases ago. 2) Crash on older kernels when VF rate info does not exist: $ ip link show Used dump file from William Dauchy : testsuite/tests/ip/link/dev_wo_vf_rate.nl So 'ip link show' replaced by 'ip -d monitor file ...' which does the same thing. Also added new func in testsuite/lib/generic.sh to gen new random dev name. Added 'clean' dependency on running all tests. Signed-off-by: Vadim Kochan --- testsuite/Makefile | 3 ++- testsuite/lib/generic.sh | 8 +++++++- testsuite/tests/ip/link/dev_wo_vf_rate.nl | Bin 0 -> 14076 bytes testsuite/tests/ip/link/new_link.t | 11 +++++++++++ testsuite/tests/ip/link/show_dev_wo_vf_rate.t | 6 ++++++ 5 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 testsuite/tests/ip/link/dev_wo_vf_rate.nl create mode 100755 testsuite/tests/ip/link/new_link.t create mode 100755 testsuite/tests/ip/link/show_dev_wo_vf_rate.t diff --git a/testsuite/Makefile b/testsuite/Makefile index 2ba95473..a2c8a2d9 100644 --- a/testsuite/Makefile +++ b/testsuite/Makefile @@ -31,12 +31,13 @@ listtests: alltests: $(TESTS) clean: + @echo "Removing $(RESULTS_DIR) dir ..." @rm -rf $(RESULTS_DIR) distclean: clean echo "Entering iproute2" && cd iproute2 && $(MAKE) distclean && cd ..; -$(TESTS): +$(TESTS): clean @mkdir -p $(RESULTS_DIR) @for d in $(TESTS_DIR); do \ diff --git a/testsuite/lib/generic.sh b/testsuite/lib/generic.sh index 8f76e492..3473cc13 100644 --- a/testsuite/lib/generic.sh +++ b/testsuite/lib/generic.sh @@ -62,8 +62,9 @@ ts_ip() TMP_OUT=`mktemp /tmp/tc_testsuite.XXXXXX` || exit $IP $@ 2> $TMP_ERR > $TMP_OUT + RET=$? - if [ -s $TMP_ERR ]; then + if [ -s $TMP_ERR ] || [ "$RET" != "0" ]; then ts_err "${SCRIPT}: ${DESC} failed:" ts_err "command: $IP $@" ts_err "stderr output:" @@ -91,3 +92,8 @@ ts_qdisc_available() return 1; fi } + +rand_dev() +{ + echo "dev-$(tr -dc "[:alpha:]" < /dev/urandom | head -c 6)" +} diff --git a/testsuite/tests/ip/link/dev_wo_vf_rate.nl b/testsuite/tests/ip/link/dev_wo_vf_rate.nl new file mode 100644 index 0000000000000000000000000000000000000000..40fa87ff1b158fb972e064efb38f76b0e9a56697 GIT binary patch literal 14076 zcmeI2QD_`h6o$|4X4B1fZMT}ns!(GsCQqft8lx5o_Nk#2gn}Z4n3_#@VcVp2BSi%l zK`m6|(o55*6q! zb)4tkkUXi*T+a%)Z-E|A^#$7Mln*Km1sYXlo*q(Vi3aKD;<4q*a)q|j2GuucGkCJq zDyp+|k0Sdi@-ln2m0iT|)VKO4ZE=*})4fVJbioD$cFa;AC2zZuy`N6-ST&5X6EATo z_D|Z(Qu>&6?e@!KMAvgf!`ULof*!D8j*?c3=uZxngEPAQq%()pN?GQnhhmRa(B~Ye z(A8pTJ~c$m=QA9IL@)C6>*%|64N6VG20Ecbn#D)dZ_ng7{!(c-0=xKL9cc>-r(+vLpn!)W(%rl8In%!oLR?bA34VJ ztQhmlKitHza7C5aH$^e#k$UUmB_c2eF@vZ9V+O7g?SGr-@R?S2`HvUO)6{;^Q==<_ z>do$84D>@_0|GlJ{cNG$mN0hAbE5V)&o%c8v{}vJPc*{gl%a9wPJSu7_~x%4nmyh7 z_^&agr0#58Q&RsW`ra<_Op1tpM9YpZCUc_~rNvu9zm6*12K{mj{n*D3`lUV&UV1%g zUn49sc-a(QK6ek%rHYrAh=1`}t|P~0gqN_`DKAAobcv6sGkGaYxZrel&nWIuE#r2B zQ{U?X846^*F1%akI<{ai+lX9uP#2Sz_NB|1*yo1jC0w(3`6zUChnGJ%y!^2}FE4v} zc^~`4;Y(kR5Iyaa-ff1{cPQW`^h00+0y_{tTe`))?Zm!YQ?#QXUVc;E_S@cCu&z#d zdFYwH@;|S(*YAYC>a-Sl`4vy<)WPKPQdrOom((4>evgfMfjQGT^D3Lh0ZZ}AW9+z?D zXmZ^_?wic9FI~pOJ~u3LVB2EOW6;%a=E$1Kk2%Oo5Afy_nE*KKh=T|@j8vM;k=z${ zAbz%Vi+kINeYd7)M@eSR)Y+6d(PlHaw}ia>H+p(HfVkbdu%FDqn$%*>E?h?+nbSm` zBkLNQIT&+rc8UjWMkW9b#~|u?nxjWp4m*%2w(S1}t)aWyBr|8__2e=~QsrjA^hg|B z>&K+}_#|!wF=yI2C&_CojH$(($KiG#nZwWRGbGPC{FsB(11wnSB6B3q5#~g(C2@gq z?3E;$In!rS=EM~T5?gD9IZJ$x;_1w-ZxZl*frmMh7@s>~&a^sjlu+V*A;kT@P*nwP z)ceBsZ)C3Q^FIF$JSm0-FzA%Z;a&|wPT;8nJIq$aL=Chozw)~#u`0)nM z9;_Va3E1c>Z}z9W2|Le8=S_7_%A2rrnUsEU@uumM-{Ad%XRe18dh$aOw@YwX6@yD57x9$}LrV!yl{-g+KTPWWMs+?8L;AedW&T$F&Wg?nt>4HrA8Q zozu*nt1D^V6Lvn6(yyD`DH+^Z&)jJouhr}PC3rvcoyj*o+V Date: Tue, 20 Jan 2015 16:14:23 +0200 Subject: [PATCH 06/23] ss: Unify meminfo output Signed-off-by: Vadim Kochan --- misc/ss.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 40dc1887..ce151be3 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -1613,8 +1613,24 @@ static char *sprint_bw(char *buf, double bw) static void print_skmeminfo(struct rtattr *tb[], int attrtype) { const __u32 *skmeminfo; - if (!tb[attrtype]) + + if (!tb[attrtype]) { + if (attrtype == INET_DIAG_SKMEMINFO) { + if (!tb[INET_DIAG_MEMINFO]) + return; + + const struct inet_diag_meminfo *minfo = + RTA_DATA(tb[INET_DIAG_MEMINFO]); + + printf(" mem:(r%u,w%u,f%u,t%u)", + minfo->idiag_rmem, + minfo->idiag_wmem, + minfo->idiag_fmem, + minfo->idiag_tmem); + } return; + } + skmeminfo = RTA_DATA(tb[attrtype]); printf(" skmem:(r%u,rb%u,t%u,tb%u,f%u,w%u,o%u", @@ -1639,17 +1655,7 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, char b1[64]; double rtt = 0; - if (tb[INET_DIAG_SKMEMINFO]) { - print_skmeminfo(tb, INET_DIAG_SKMEMINFO); - } else if (tb[INET_DIAG_MEMINFO]) { - const struct inet_diag_meminfo *minfo - = RTA_DATA(tb[INET_DIAG_MEMINFO]); - printf(" mem:(r%u,w%u,f%u,t%u)", - minfo->idiag_rmem, - minfo->idiag_wmem, - minfo->idiag_fmem, - minfo->idiag_tmem); - } + print_skmeminfo(tb, INET_DIAG_SKMEMINFO); if (tb[INET_DIAG_INFO]) { struct tcp_info *info; From 8250bc9ff4e55a3ef397ed8c7612f1392d164295 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Tue, 20 Jan 2015 16:14:24 +0200 Subject: [PATCH 07/23] ss: Unify inet sockets output Signed-off-by: Vadim Kochan --- misc/ss.c | 667 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 350 insertions(+), 317 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index ce151be3..7fc0a99e 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -689,23 +689,59 @@ static const char *sstate_namel[] = { [SS_CLOSING] = "closing", }; +struct dctcpstat +{ + unsigned int ce_state; + unsigned int alpha; + unsigned int ab_ecn; + unsigned int ab_tot; + bool enabled; +}; + struct tcpstat { - inet_prefix local; - inet_prefix remote; - int lport; - int rport; - int state; - int rq, wq; - int timer; - int timeout; - int retrs; - unsigned ino; - int probes; - unsigned uid; - int refcnt; - unsigned long long sk; - int rto, ato, qack, cwnd, ssthresh; + inet_prefix local; + inet_prefix remote; + int lport; + int rport; + int state; + int rq, wq; + unsigned ino; + unsigned uid; + int refcnt; + unsigned int iface; + unsigned long long sk; + int timer; + int timeout; + int probes; + char *cong_alg; + double rto, ato, rtt, rttvar; + int qack, cwnd, ssthresh, backoff; + double send_bps; + int snd_wscale; + int rcv_wscale; + int mss; + unsigned int lastsnd; + unsigned int lastrcv; + unsigned int lastack; + double pacing_rate; + double pacing_rate_max; + unsigned int unacked; + unsigned int retrans; + unsigned int retrans_total; + unsigned int lost; + unsigned int sacked; + unsigned int fackets; + unsigned int reordering; + double rcv_rtt; + int rcv_space; + bool has_ts_opt; + bool has_sack_opt; + bool has_ecn_opt; + bool has_ecnseen_opt; + bool has_fastopen_opt; + bool has_wscale_opt; + struct dctcpstat *dctcp; }; static const char *tmr_name[] = { @@ -744,12 +780,6 @@ static const char *print_ms_timer(int timeout) return buf; } -static const char *print_hz_timer(int timeout) -{ - int hz = get_user_hz(); - return print_ms_timer(((timeout*1000) + hz-1)/hz); -} - struct scache { struct scache *next; @@ -1439,125 +1469,250 @@ out: return res; } -static int tcp_show_line(char *line, const struct filter *f, int family) +static char *proto_name(int protocol) +{ + switch (protocol) { + case IPPROTO_UDP: + return "udp"; + case IPPROTO_TCP: + return "tcp"; + case IPPROTO_DCCP: + return "dccp"; + } + + return "???"; +} + +static void inet_stats_print(struct tcpstat *s, int protocol) +{ + char *buf = NULL; + + if (netid_width) + printf("%-*s ", netid_width, proto_name(protocol)); + if (state_width) + printf("%-*s ", state_width, sstate_name[s->state]); + + printf("%-6d %-6d ", s->rq, s->wq); + + formatted_print(&s->local, s->lport, s->iface); + formatted_print(&s->remote, s->rport, 0); + + if (show_options) { + if (s->timer) { + if (s->timer > 4) + s->timer = 5; + printf(" timer:(%s,%s,%d)", + tmr_name[s->timer], + print_ms_timer(s->timeout), + s->retrans); + } + } + + if (show_proc_ctx || show_sock_ctx) { + if (find_entry(s->ino, &buf, + (show_proc_ctx & show_sock_ctx) ? + PROC_SOCK_CTX : PROC_CTX) > 0) { + printf(" users:(%s)", buf); + free(buf); + } + } else if (show_users) { + if (find_entry(s->ino, &buf, USERS) > 0) { + printf(" users:(%s)", buf); + free(buf); + } + } +} + +static int proc_parse_inet_addr(char *loc, char *rem, int family, struct tcpstat *s) +{ + s->local.family = s->remote.family = family; + if (family == AF_INET) { + sscanf(loc, "%x:%x", s->local.data, (unsigned*)&s->lport); + sscanf(rem, "%x:%x", s->remote.data, (unsigned*)&s->rport); + s->local.bytelen = s->remote.bytelen = 4; + return 0; + } else { + sscanf(loc, "%08x%08x%08x%08x:%x", + s->local.data, + s->local.data + 1, + s->local.data + 2, + s->local.data + 3, + &s->lport); + sscanf(rem, "%08x%08x%08x%08x:%x", + s->remote.data, + s->remote.data + 1, + s->remote.data + 2, + s->remote.data + 3, + &s->rport); + s->local.bytelen = s->remote.bytelen = 16; + return 0; + } + return -1; +} + +static int proc_inet_split_line(char *line, char **loc, char **rem, char **data) { - struct tcpstat s; - char *loc, *rem, *data; - char opt[256]; - int n; char *p; if ((p = strchr(line, ':')) == NULL) return -1; - loc = p+2; - if ((p = strchr(loc, ':')) == NULL) + *loc = p+2; + if ((p = strchr(*loc, ':')) == NULL) return -1; - p[5] = 0; - rem = p+6; - if ((p = strchr(rem, ':')) == NULL) + p[5] = 0; + *rem = p+6; + if ((p = strchr(*rem, ':')) == NULL) return -1; + p[5] = 0; - data = p+6; + *data = p+6; + return 0; +} - do { - int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); +static char *sprint_bw(char *buf, double bw) +{ + if (bw > 1000000.) + sprintf(buf,"%.1fM", bw / 1000000.); + else if (bw > 1000.) + sprintf(buf,"%.1fK", bw / 1000.); + else + sprintf(buf, "%g", bw); - if (!(f->states & (1<has_ts_opt) + printf(" ts"); + if (s->has_sack_opt) + printf(" sack"); + if (s->has_ecn_opt) + printf(" ecn"); + if (s->has_ecnseen_opt) + printf(" ecnseen"); + if (s->has_fastopen_opt) + printf(" fastopen"); + if (s->cong_alg) + printf(" %s", s->cong_alg); + if (s->has_wscale_opt) + printf(" wscale:%d,%d", s->snd_wscale, s->rcv_wscale); + if (s->rto) + printf(" rto:%g", s->rto); + if (s->backoff) + printf(" backoff:%u", s->backoff); + if (s->rtt) + printf(" rtt:%g/%g", s->rtt, s->rttvar); + if (s->ato) + printf(" ato:%g", s->ato); + + if (s->qack) + printf(" qack:%d", s->qack); + if (s->qack & 1) + printf(" bidir"); + + if (s->mss) + printf(" mss:%d", s->mss); + if (s->cwnd && s->cwnd != 2) + printf(" cwnd:%d", s->cwnd); + if (s->ssthresh) + printf(" ssthresh:%d", s->ssthresh); + + if (s->dctcp && s->dctcp->enabled) { + struct dctcpstat *dctcp = s->dctcp; + + printf(" ce_state %u alpha %u ab_ecn %u ab_tot %u", + dctcp->ce_state, dctcp->alpha, dctcp->ab_ecn, + dctcp->ab_tot); + } else if (s->dctcp) { + printf(" fallback_mode"); } + if (s->send_bps) + printf(" send %sbps", sprint_bw(b1, s->send_bps)); + if (s->lastsnd) + printf(" lastsnd:%u", s->lastsnd); + if (s->lastrcv) + printf(" lastrcv:%u", s->lastrcv); + if (s->lastack) + printf(" lastack:%u", s->lastack); + + if (s->pacing_rate) { + printf(" pacing_rate %sbps", sprint_bw(b1, s->pacing_rate)); + if (s->pacing_rate_max) + printf("/%sbps", sprint_bw(b1, + s->pacing_rate_max)); + } + + if (s->unacked) + printf(" unacked:%u", s->unacked); + if (s->retrans || s->retrans_total) + printf(" retrans:%u/%u", s->retrans, s->retrans_total); + if (s->lost) + printf(" lost:%u", s->lost); + if (s->sacked && s->state != SS_LISTEN) + printf(" sacked:%u", s->sacked); + if (s->fackets) + printf(" fackets:%u", s->fackets); + if (s->reordering != 3) + printf(" reordering:%d", s->reordering); + if (s->rcv_rtt) + printf(" rcv_rtt:%g", s->rcv_rtt); + if (s->rcv_space) + printf(" rcv_space:%d", s->rcv_space); +} + +static int tcp_show_line(char *line, const struct filter *f, int family) +{ + int rto = 0, ato = 0; + struct tcpstat s = {}; + char *loc, *rem, *data; + char opt[256]; + int n; + int hz = get_user_hz(); + + if (proc_inet_split_line(line, &loc, &rem, &data)) + return -1; + + int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); + if (!(f->states & (1 << state))) + return 0; + + proc_parse_inet_addr(loc, rem, family, &s); + if (f->f && run_ssfilter(f->f, &s) == 0) return 0; opt[0] = 0; n = sscanf(data, "%x %x:%x %x:%x %x %d %d %u %d %llx %d %d %d %d %d %[^\n]\n", &s.state, &s.wq, &s.rq, - &s.timer, &s.timeout, &s.retrs, &s.uid, &s.probes, &s.ino, - &s.refcnt, &s.sk, &s.rto, &s.ato, &s.qack, + &s.timer, &s.timeout, &s.retrans, &s.uid, &s.probes, &s.ino, + &s.refcnt, &s.sk, &rto, &ato, &s.qack, &s.cwnd, &s.ssthresh, opt); if (n < 17) opt[0] = 0; if (n < 12) { - s.rto = 0; + rto = 0; s.cwnd = 2; s.ssthresh = -1; - s.ato = s.qack = 0; + ato = s.qack = 0; } - if (netid_width) - printf("%-*s ", netid_width, "tcp"); - if (state_width) - printf("%-*s ", state_width, sstate_name[s.state]); + s.retrans = s.timer != 1 ? s.probes : s.retrans; + s.timeout = (s.timeout * 1000 + hz - 1) / hz; + s.ato = (double)ato / hz; + s.qack /= 2; + s.rto = (double)rto; + s.ssthresh = s.ssthresh == -1 ? 0 : s.ssthresh; + s.rto = s.rto != 3 * hz ? s.rto / hz : 0; - printf("%-6d %-6d ", s.rq, s.wq); - - formatted_print(&s.local, s.lport, 0); - formatted_print(&s.remote, s.rport, 0); - - if (show_options) { - if (s.timer) { - if (s.timer > 4) - s.timer = 5; - printf(" timer:(%s,%s,%d)", - tmr_name[s.timer], - print_hz_timer(s.timeout), - s.timer != 1 ? s.probes : s.retrs); - } - } - if (show_tcpinfo) { - int hz = get_user_hz(); - if (s.rto && s.rto != 3*hz) - printf(" rto:%g", (double)s.rto/hz); - if (s.ato) - printf(" ato:%g", (double)s.ato/hz); - if (s.cwnd != 2) - printf(" cwnd:%d", s.cwnd); - if (s.ssthresh != -1) - printf(" ssthresh:%d", s.ssthresh); - if (s.qack/2) - printf(" qack:%d", s.qack/2); - if (s.qack&1) - printf(" bidir"); - } - char *buf = NULL; - if (show_proc_ctx || show_sock_ctx) { - if (find_entry(s.ino, &buf, - (show_proc_ctx & show_sock_ctx) ? - PROC_SOCK_CTX : PROC_CTX) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } else if (show_users) { - if (find_entry(s.ino, &buf, USERS) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } + inet_stats_print(&s, IPPROTO_TCP); if (show_details) { if (s.uid) @@ -1567,8 +1722,11 @@ static int tcp_show_line(char *line, const struct filter *f, int family) if (opt[0]) printf(" opt:\"%s\"", opt); } - printf("\n"); + if (show_tcpinfo) + tcp_stats_print(&s); + + printf("\n"); return 0; } @@ -1598,18 +1756,6 @@ outerr: return ferror(fp) ? -1 : 0; } -static char *sprint_bw(char *buf, double bw) -{ - if (bw > 1000000.) - sprintf(buf,"%.1fM", bw / 1000000.); - else if (bw > 1000.) - sprintf(buf,"%.1fK", bw / 1000.); - else - sprintf(buf, "%g", bw); - - return buf; -} - static void print_skmeminfo(struct rtattr *tb[], int attrtype) { const __u32 *skmeminfo; @@ -1649,11 +1795,13 @@ static void print_skmeminfo(struct rtattr *tb[], int attrtype) printf(")"); } +#define TCPI_HAS_OPT(info, opt) !!(info->tcpi_options & (opt)) + static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, struct rtattr *tb[]) { - char b1[64]; double rtt = 0; + struct tcpstat s = {}; print_skmeminfo(tb, INET_DIAG_SKMEMINFO); @@ -1670,39 +1818,49 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, info = RTA_DATA(tb[INET_DIAG_INFO]); if (show_options) { - if (info->tcpi_options & TCPI_OPT_TIMESTAMPS) - printf(" ts"); - if (info->tcpi_options & TCPI_OPT_SACK) - printf(" sack"); - if (info->tcpi_options & TCPI_OPT_ECN) - printf(" ecn"); - if (info->tcpi_options & TCPI_OPT_ECN_SEEN) - printf(" ecnseen"); - if (info->tcpi_options & TCPI_OPT_SYN_DATA) - printf(" fastopen"); + s.has_ts_opt = TCPI_HAS_OPT(info, TCPI_OPT_TIMESTAMPS); + s.has_sack_opt = TCPI_HAS_OPT(info, TCPI_OPT_SACK); + s.has_ecn_opt = TCPI_HAS_OPT(info, TCPI_OPT_ECN); + s.has_ecnseen_opt = TCPI_HAS_OPT(info, TCPI_OPT_ECN_SEEN); + s.has_fastopen_opt = TCPI_HAS_OPT(info, TCPI_OPT_SYN_DATA); } - if (tb[INET_DIAG_CONG]) - printf(" %s", rta_getattr_str(tb[INET_DIAG_CONG])); + if (tb[INET_DIAG_CONG]) { + const char *cong_attr = rta_getattr_str(tb[INET_DIAG_CONG]); + s.cong_alg = malloc(strlen(cong_attr + 1)); + strcpy(s.cong_alg, cong_attr); + } + + if (TCPI_HAS_OPT(info, TCPI_OPT_WSCALE)) { + s.has_wscale_opt = true; + s.snd_wscale = info->tcpi_snd_wscale; + s.rcv_wscale = info->tcpi_rcv_wscale; + } - if (info->tcpi_options & TCPI_OPT_WSCALE) - printf(" wscale:%d,%d", info->tcpi_snd_wscale, - info->tcpi_rcv_wscale); if (info->tcpi_rto && info->tcpi_rto != 3000000) - printf(" rto:%g", (double)info->tcpi_rto/1000); - if (info->tcpi_backoff) - printf(" backoff:%u", info->tcpi_backoff); - if (info->tcpi_rtt) - printf(" rtt:%g/%g", (double)info->tcpi_rtt/1000, - (double)info->tcpi_rttvar/1000); - if (info->tcpi_ato) - printf(" ato:%g", (double)info->tcpi_ato/1000); - if (info->tcpi_snd_mss) - printf(" mss:%d", info->tcpi_snd_mss); - if (info->tcpi_snd_cwnd != 2) - printf(" cwnd:%d", info->tcpi_snd_cwnd); + s.rto = (double)info->tcpi_rto / 1000; + + s.backoff = info->tcpi_backoff; + s.rtt = (double)info->tcpi_rtt / 1000; + s.rttvar = (double)info->tcpi_rttvar / 1000; + s.ato = (double)info->tcpi_rttvar / 1000; + s.mss = info->tcpi_snd_mss; + s.rcv_space = info->tcpi_rcv_space; + s.rcv_rtt = (double)info->tcpi_rcv_rtt / 1000; + s.lastsnd = info->tcpi_last_data_sent; + s.lastrcv = info->tcpi_last_data_recv; + s.lastack = info->tcpi_last_ack_recv; + s.unacked = info->tcpi_unacked; + s.retrans = info->tcpi_retrans; + s.retrans_total = info->tcpi_total_retrans; + s.lost = info->tcpi_lost; + s.sacked = info->tcpi_sacked; + s.reordering = info->tcpi_reordering; + s.rcv_space = info->tcpi_rcv_space; + s.cwnd = info->tcpi_snd_cwnd; + if (info->tcpi_snd_ssthresh < 0xFFFF) - printf(" ssthresh:%d", info->tcpi_snd_ssthresh); + s.ssthresh = info->tcpi_snd_ssthresh; rtt = (double) info->tcpi_rtt; if (tb[INET_DIAG_VEGASINFO]) { @@ -1710,89 +1868,51 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, = RTA_DATA(tb[INET_DIAG_VEGASINFO]); if (vinfo->tcpv_enabled && - vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff) + vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff) rtt = vinfo->tcpv_rtt; } if (tb[INET_DIAG_DCTCPINFO]) { + struct dctcpstat *dctcp = malloc(sizeof(struct + dctcpstat)); + const struct tcp_dctcp_info *dinfo = RTA_DATA(tb[INET_DIAG_DCTCPINFO]); - if (dinfo->dctcp_enabled) { - printf(" ce_state %u alpha %u ab_ecn %u ab_tot %u", - dinfo->dctcp_ce_state, dinfo->dctcp_alpha, - dinfo->dctcp_ab_ecn, dinfo->dctcp_ab_tot); - } else { - printf(" fallback_mode"); - } + dctcp->enabled = !!dinfo->dctcp_enabled; + dctcp->ce_state = dinfo->dctcp_ce_state; + dctcp->alpha = dinfo->dctcp_alpha; + dctcp->ab_ecn = dinfo->dctcp_ab_ecn; + dctcp->ab_tot = dinfo->dctcp_ab_tot; + s.dctcp = dctcp; } if (rtt > 0 && info->tcpi_snd_mss && info->tcpi_snd_cwnd) { - printf(" send %sbps", - sprint_bw(b1, (double) info->tcpi_snd_cwnd * - (double) info->tcpi_snd_mss * 8000000. - / rtt)); + s.send_bps = (double) info->tcpi_snd_cwnd * + (double)info->tcpi_snd_mss * 8000000. / rtt; } - if (info->tcpi_last_data_sent) - printf(" lastsnd:%u", info->tcpi_last_data_sent); - - if (info->tcpi_last_data_recv) - printf(" lastrcv:%u", info->tcpi_last_data_recv); - - if (info->tcpi_last_ack_recv) - printf(" lastack:%u", info->tcpi_last_ack_recv); - if (info->tcpi_pacing_rate && - info->tcpi_pacing_rate != ~0ULL) { - printf(" pacing_rate %sbps", - sprint_bw(b1, info->tcpi_pacing_rate * 8.)); + info->tcpi_pacing_rate != ~0ULL) { + s.pacing_rate = info->tcpi_pacing_rate * 8.; if (info->tcpi_max_pacing_rate && - info->tcpi_max_pacing_rate != ~0ULL) - printf("/%sbps", - sprint_bw(b1, info->tcpi_max_pacing_rate * 8.)); + info->tcpi_max_pacing_rate != ~0ULL) + s.pacing_rate_max = info->tcpi_max_pacing_rate * 8.; } - if (info->tcpi_unacked) - printf(" unacked:%u", info->tcpi_unacked); - if (info->tcpi_retrans || info->tcpi_total_retrans) - printf(" retrans:%u/%u", info->tcpi_retrans, - info->tcpi_total_retrans); - if (info->tcpi_lost) - printf(" lost:%u", info->tcpi_lost); - if (info->tcpi_sacked && r->idiag_state != SS_LISTEN) - printf(" sacked:%u", info->tcpi_sacked); - if (info->tcpi_fackets) - printf(" fackets:%u", info->tcpi_fackets); - if (info->tcpi_reordering != 3) - printf(" reordering:%d", info->tcpi_reordering); - if (info->tcpi_rcv_rtt) - printf(" rcv_rtt:%g", (double) info->tcpi_rcv_rtt/1000); - if (info->tcpi_rcv_space) - printf(" rcv_space:%d", info->tcpi_rcv_space); - + tcp_stats_print(&s); + if (s.dctcp) + free(s.dctcp); + if (s.cong_alg) + free(s.cong_alg); } } -static char *proto_name(int protocol) -{ - switch (protocol) { - case IPPROTO_UDP: - return "udp"; - case IPPROTO_TCP: - return "tcp"; - case IPPROTO_DCCP: - return "dccp"; - } - - return "???"; -} - static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) { struct rtattr * tb[INET_DIAG_MAX+1]; struct inet_diag_msg *r = NLMSG_DATA(nlh); - struct tcpstat s; + struct tcpstat s = {}; parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr*)(r+1), nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); @@ -1801,52 +1921,28 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) s.local.family = s.remote.family = r->idiag_family; s.lport = ntohs(r->id.idiag_sport); s.rport = ntohs(r->id.idiag_dport); + s.wq = r->idiag_wqueue; + s.rq = r->idiag_rqueue; + s.timer = r->idiag_timer; + s.timeout = r->idiag_expires; + s.retrans = r->idiag_retrans; + s.ino = r->idiag_inode; + s.uid = r->idiag_uid; + s.iface = r->id.idiag_if; + if (s.local.family == AF_INET) { s.local.bytelen = s.remote.bytelen = 4; } else { s.local.bytelen = s.remote.bytelen = 16; } + memcpy(s.local.data, r->id.idiag_src, s.local.bytelen); memcpy(s.remote.data, r->id.idiag_dst, s.local.bytelen); if (f && f->f && run_ssfilter(f->f, &s) == 0) return 0; - if (netid_width) - printf("%-*s ", netid_width, proto_name(protocol)); - if (state_width) - printf("%-*s ", state_width, sstate_name[s.state]); - - printf("%-6d %-6d ", r->idiag_rqueue, r->idiag_wqueue); - - formatted_print(&s.local, s.lport, r->id.idiag_if); - formatted_print(&s.remote, s.rport, 0); - - if (show_options) { - if (r->idiag_timer) { - if (r->idiag_timer > 4) - r->idiag_timer = 5; - printf(" timer:(%s,%s,%d)", - tmr_name[r->idiag_timer], - print_ms_timer(r->idiag_expires), - r->idiag_retrans); - } - } - char *buf = NULL; - - if (show_proc_ctx || show_sock_ctx) { - if (find_entry(r->idiag_inode, &buf, - (show_proc_ctx & show_sock_ctx) ? - PROC_SOCK_CTX : PROC_CTX) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } else if (show_users) { - if (find_entry(r->idiag_inode, &buf, USERS) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } + inet_stats_print(&s, protocol); if (show_details) { if (r->idiag_uid) @@ -1862,13 +1958,13 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) printf(" %c-%c", mask & 1 ? '-' : '<', mask & 2 ? '-' : '>'); } } + if (show_mem || show_tcpinfo) { printf("\n\t"); tcp_show_info(nlh, r, tb); } printf("\n"); - return 0; } @@ -2189,53 +2285,19 @@ outerr: static int dgram_show_line(char *line, const struct filter *f, int family) { - struct tcpstat s; + struct tcpstat s = {}; char *loc, *rem, *data; char opt[256]; int n; - char *p; - if ((p = strchr(line, ':')) == NULL) + if (proc_inet_split_line(line, &loc, &rem, &data)) return -1; - loc = p+2; - if ((p = strchr(loc, ':')) == NULL) - return -1; - p[5] = 0; - rem = p+6; + int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); + if (!(f->states & (1 << state))) + return 0; - if ((p = strchr(rem, ':')) == NULL) - return -1; - p[5] = 0; - data = p+6; - - do { - int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); - - if (!(f->states & (1<f && run_ssfilter(f->f, &s) == 0) return 0; @@ -2249,31 +2311,7 @@ static int dgram_show_line(char *line, const struct filter *f, int family) if (n < 9) opt[0] = 0; - if (netid_width) - printf("%-*s ", netid_width, dg_proto); - if (state_width) - printf("%-*s ", state_width, sstate_name[s.state]); - - printf("%-6d %-6d ", s.rq, s.wq); - - formatted_print(&s.local, s.lport, 0); - formatted_print(&s.remote, s.rport, 0); - - char *buf = NULL; - - if (show_proc_ctx || show_sock_ctx) { - if (find_entry(s.ino, &buf, - (show_proc_ctx & show_sock_ctx) ? - PROC_SOCK_CTX : PROC_CTX) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } else if (show_users) { - if (find_entry(s.ino, &buf, USERS) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } + inet_stats_print(&s, IPPROTO_UDP); if (show_details) { if (s.uid) @@ -2283,12 +2321,11 @@ static int dgram_show_line(char *line, const struct filter *f, int family) if (opt[0]) printf(" opt:\"%s\"", opt); } - printf("\n"); + printf("\n"); return 0; } - static int udp_show(struct filter *f) { FILE *fp = NULL; @@ -2357,7 +2394,6 @@ outerr: } while (0); } - struct unixstat { struct unixstat *next; @@ -2371,12 +2407,9 @@ struct unixstat char *name; }; - - int unix_state_map[] = { SS_CLOSE, SS_SYN_SENT, SS_ESTABLISHED, SS_CLOSING }; - #define MAX_UNIX_REMEMBER (1024*1024/sizeof(struct unixstat)) static void unix_list_free(struct unixstat *list) From 4c7d75de95f5f20ab106460cfe86218b2dd87a77 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 5 Feb 2015 10:17:50 -0800 Subject: [PATCH 08/23] can: update kernel header Sanitized header from upstream 3.20-rc kernel --- include/linux/can/netlink.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/can/netlink.h b/include/linux/can/netlink.h index 25fd52cc..6d4ec2a9 100644 --- a/include/linux/can/netlink.h +++ b/include/linux/can/netlink.h @@ -98,6 +98,7 @@ struct can_ctrlmode { #define CAN_CTRLMODE_BERR_REPORTING 0x10 /* Bus-error reporting */ #define CAN_CTRLMODE_FD 0x20 /* CAN FD mode */ #define CAN_CTRLMODE_PRESUME_ACK 0x40 /* Ignore missing CAN ACKs */ +#define CAN_CTRLMODE_FD_NON_ISO 0x80 /* CAN FD in non-ISO mode */ /* * CAN device statistics From 8c58d4036b4f4a14c2c65149d4ff65273a72c743 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 5 Feb 2015 10:20:58 -0800 Subject: [PATCH 09/23] update kernel headers based on net-next 3.21 Pull in headers from later tree --- include/linux/if_link.h | 4 ++++ include/linux/neighbour.h | 1 + include/linux/rtnetlink.h | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 167ec34b..ac64724c 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -146,6 +146,7 @@ enum { IFLA_PHYS_PORT_ID, IFLA_CARRIER_CHANGES, IFLA_PHYS_SWITCH_ID, + IFLA_LINK_NETNSID, __IFLA_MAX }; @@ -368,6 +369,9 @@ enum { IFLA_VXLAN_UDP_CSUM, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, + IFLA_VXLAN_REMCSUM_TX, + IFLA_VXLAN_REMCSUM_RX, + IFLA_VXLAN_GBP, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index f3d77f9f..3873a355 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -25,6 +25,7 @@ enum { NDA_VNI, NDA_IFINDEX, NDA_MASTER, + NDA_LINK_NETNSID, __NDA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9111053f..3eb78105 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -132,6 +132,11 @@ enum { RTM_GETMDB = 86, #define RTM_GETMDB RTM_GETMDB + RTM_NEWNSID = 88, +#define RTM_NEWNSID RTM_NEWNSID + RTM_GETNSID = 90, +#define RTM_GETNSID RTM_GETNSID + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; From e998e118ddc3a0ab1b325ad7ed4abe59b83e684a Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sun, 18 Jan 2015 16:10:17 +0200 Subject: [PATCH 10/23] lib: Exec func on each netns Added possibility to run some func on each netns. Signed-off-by: Vadim Kochan --- include/namespace.h | 6 ++++++ include/utils.h | 4 ++++ lib/namespace.c | 22 ++++++++++++++++++++++ lib/utils.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+) diff --git a/include/namespace.h b/include/namespace.h index 52f7fbd7..a2ac7dcc 100644 --- a/include/namespace.h +++ b/include/namespace.h @@ -44,5 +44,11 @@ static inline int setns(int fd, int nstype) extern int netns_switch(char *netns); extern int netns_get_fd(const char *netns); +extern int netns_foreach(int (*func)(char *nsname, void *arg), void *arg); + +struct netns_func { + int (*func)(char *nsname, void *arg); + void *arg; +}; #endif /* __NAMESPACE_H__ */ diff --git a/include/utils.h b/include/utils.h index e1fe7cfc..a8817d30 100644 --- a/include/utils.h +++ b/include/utils.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "libnetlink.h" #include "ll_map.h" @@ -162,4 +163,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **name, char **type, char **link, char **dev, int *group, int *index); +extern int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, + bool show_label); + #endif /* __UTILS_H__ */ diff --git a/lib/namespace.c b/lib/namespace.c index 65c1e3d7..c03a103a 100644 --- a/lib/namespace.c +++ b/lib/namespace.c @@ -99,3 +99,25 @@ int netns_get_fd(const char *name) } return open(path, O_RDONLY); } + +int netns_foreach(int (*func)(char *nsname, void *arg), void *arg) +{ + DIR *dir; + struct dirent *entry; + + dir = opendir(NETNS_RUN_DIR); + if (!dir) + return -1; + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0) + continue; + if (strcmp(entry->d_name, "..") == 0) + continue; + if (func(entry->d_name, arg)) + break; + } + + closedir(dir); + return 0; +} diff --git a/lib/utils.c b/lib/utils.c index f65ceaaf..efebe189 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -31,6 +31,7 @@ #include "utils.h" +#include "namespace.h" int timestamp_short = 0; @@ -878,3 +879,30 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n) tstr[strlen(tstr)-1] = 0; fprintf(fp, "Timestamp: %s %lu us\n", tstr, usecs); } + +static int on_netns(char *nsname, void *arg) +{ + struct netns_func *f = arg; + + if (netns_switch(nsname)) + return -1; + + return f->func(nsname, f->arg); +} + +static int on_netns_label(char *nsname, void *arg) +{ + printf("\nnetns: %s\n", nsname); + return on_netns(nsname, arg); +} + +int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, + bool show_label) +{ + struct netns_func nsf = { .func = func, .arg = arg }; + + if (show_label) + return netns_foreach(on_netns_label, &nsf); + + return netns_foreach(on_netns, &nsf); +} From b13ba03f54e95de2b2209763f874ac6713d13dd1 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sun, 18 Jan 2015 16:10:18 +0200 Subject: [PATCH 11/23] ip netns: Allow exec on each netns This change allows to exec some cmd on each named netns (except default) by specifying '-all' option: # ip -all netns exec ip link Each command executes synchronously. Exit status is not considered, so there might be a case that some CMD can fail on some netns but success on the other. EXAMPLES: 1) Show link info on all netns: $ ip -all netns exec ip link netns: test_net 1: lo: mtu 65536 qdisc noop state DOWN mode DEFAULT group default link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 4: tap0: mtu 1500 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 500 link/ether 1a:19:6f:25:eb:85 brd ff:ff:ff:ff:ff:ff netns: home0 1: lo: mtu 65536 qdisc noop state DOWN mode DEFAULT group default link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 4: tap0: mtu 1500 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 500 link/ether ea:1a:59:40:d3:29 brd ff:ff:ff:ff:ff:ff netns: lan0 1: lo: mtu 65536 qdisc noop state DOWN mode DEFAULT group default link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 4: tap0: mtu 1500 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 500 link/ether ce:49:d5:46:81:ea brd ff:ff:ff:ff:ff:ff 2) Set UP tap0 device for the all netns: $ ip -all netns exec ip link set dev tap0 up netns: test_net netns: home0 netns: lan0 Signed-off-by: Vadim Kochan --- include/utils.h | 1 + ip/ip.c | 5 +++- ip/ipnetns.c | 72 +++++++++++++++++++++++++++------------------ man/man8/ip-netns.8 | 16 ++++++++-- man/man8/ip.8 | 7 ++++- 5 files changed, 67 insertions(+), 34 deletions(-) diff --git a/include/utils.h b/include/utils.h index a8817d30..3da22837 100644 --- a/include/utils.h +++ b/include/utils.h @@ -24,6 +24,7 @@ extern int timestamp_short; extern char * _SL_; extern int max_flush_loops; extern int batch_mode; +extern bool do_all; #ifndef IPPROTO_ESP #define IPPROTO_ESP 50 diff --git a/ip/ip.c b/ip/ip.c index 850a0017..da16b15f 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -36,6 +36,7 @@ char * _SL_ = NULL; int force = 0; int max_flush_loops = 10; int batch_mode = 0; +bool do_all = false; struct rtnl_handle rth = { .fd = -1 }; @@ -55,7 +56,7 @@ static void usage(void) " -4 | -6 | -I | -D | -B | -0 |\n" " -l[oops] { maximum-addr-flush-attempts } |\n" " -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n" -" -rc[vbuf] [size] | -n[etns] name }\n"); +" -rc[vbuf] [size] | -n[etns] name | -a[ll] }\n"); exit(-1); } @@ -270,6 +271,8 @@ int main(int argc, char **argv) NEXT_ARG(); if (netns_switch(argv[1])) exit(-1); + } else if (matches(opt, "-all") == 0) { + do_all = true; } else { fprintf(stderr, "Option \"%s\" is unknown, try \"ip -help\".\n", opt); exit(-1); diff --git a/ip/ipnetns.c b/ip/ipnetns.c index 123318eb..59a93213 100644 --- a/ip/ipnetns.c +++ b/ip/ipnetns.c @@ -26,7 +26,7 @@ static int usage(void) fprintf(stderr, " ip netns delete NAME\n"); fprintf(stderr, " ip netns identify [PID]\n"); fprintf(stderr, " ip netns pids NAME\n"); - fprintf(stderr, " ip netns exec NAME cmd ...\n"); + fprintf(stderr, " ip [-all] netns exec [NAME] cmd ...\n"); fprintf(stderr, " ip netns monitor\n"); exit(-1); } @@ -51,29 +51,10 @@ static int netns_list(int argc, char **argv) return 0; } -static int netns_exec(int argc, char **argv) +static int cmd_exec(const char *cmd, char **argv, bool do_fork) { - /* Setup the proper environment for apps that are not netns - * aware, and execute a program in that environment. - */ - const char *cmd; - - if (argc < 1) { - fprintf(stderr, "No netns name specified\n"); - return -1; - } - if (argc < 2) { - fprintf(stderr, "No command specified\n"); - return -1; - } - cmd = argv[1]; - - if (netns_switch(argv[0])) - return -1; - fflush(stdout); - - if (batch_mode) { + if (do_fork) { int status; pid_t pid; @@ -91,23 +72,56 @@ static int netns_exec(int argc, char **argv) } if (WIFEXITED(status)) { - /* ip must return the status of the child, - * but do_cmd() will add a minus to this, - * so let's add another one here to cancel it. - */ - return -WEXITSTATUS(status); + return WEXITSTATUS(status); } exit(1); } } - if (execvp(cmd, argv + 1) < 0) + if (execvp(cmd, argv) < 0) fprintf(stderr, "exec of \"%s\" failed: %s\n", - cmd, strerror(errno)); + cmd, strerror(errno)); _exit(1); } +static int on_netns_exec(char *nsname, void *arg) +{ + char **argv = arg; + cmd_exec(argv[1], argv + 1, true); + return 0; +} + +static int netns_exec(int argc, char **argv) +{ + /* Setup the proper environment for apps that are not netns + * aware, and execute a program in that environment. + */ + const char *cmd; + + if (argc < 1 && !do_all) { + fprintf(stderr, "No netns name specified\n"); + return -1; + } + if ((argc < 2 && !do_all) || (argc < 1 && do_all)) { + fprintf(stderr, "No command specified\n"); + return -1; + } + + if (do_all) + return do_each_netns(on_netns_exec, --argv, 1); + + if (netns_switch(argv[0])) + return -1; + + /* ip must return the status of the child, + * but do_cmd() will add a minus to this, + * so let's add another one here to cancel it. + */ + cmd = argv[1]; + return -cmd_exec(cmd, argv + 1, !!batch_mode); +} + static int is_pid(const char *str) { int ch; diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8 index 74343ed6..7b9571ea 100644 --- a/man/man8/ip-netns.8 +++ b/man/man8/ip-netns.8 @@ -28,8 +28,8 @@ ip-netns \- process network namespace management .I NETNSNAME .ti -8 -.BR "ip netns exec " -.I NETNSNAME command ... +.BR "ip [-all] netns exec " +.RI "[ " NETNSNAME " ] " command ... .ti -8 .BR "ip netns monitor" @@ -98,7 +98,7 @@ This command walks through proc and finds all of the process who have the named network namespace as their primary network namespace. .TP -.B ip netns exec NAME cmd ... - Run cmd in the named network namespace +.B ip [-all] netns exec [ NAME ] cmd ... - Run cmd in the named network namespace .sp This command allows applications that are network namespace unaware to be run in something other than the default network namespace with @@ -107,6 +107,16 @@ in the customary global locations. A network namespace and bind mounts are used to move files from their network namespace specific location to their default locations without affecting other processes. +If +.B -all +option was specified then +.B cmd +will be executed synchronously on the each named network namespace even if +.B cmd +fails on some of them. Network namespace name is printed on each +.B cmd +executing. + .TP .B ip netns monitor - Report as network namespace names are added and deleted .sp diff --git a/man/man8/ip.8 b/man/man8/ip.8 index 0bae59e7..016e8c66 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -32,7 +32,8 @@ ip \- show / manipulate routing, devices, policy routing and tunnels \fB\-f\fR[\fIamily\fR] { .BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | " \fB\-o\fR[\fIneline\fR] | -\fB\-n\fR[\fIetns\fR] name } +\fB\-n\fR[\fIetns\fR] name | +\fB\-a\fR[\fIll\fR] } .SH OPTIONS @@ -155,6 +156,10 @@ to .RI "-n[etns] " NETNS " [ " OPTIONS " ] " OBJECT " { " COMMAND " | " .BR help " }" +.TP +.BR "\-a" , " \-all" +executes specified command over all objects, it depends if command supports this option. + .SH IP - COMMAND SYNTAX .SS From 33724939097b8ebb5c37cc0dc2b5e57fe342c8fe Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sun, 18 Jan 2015 16:10:19 +0200 Subject: [PATCH 12/23] ip netns: Delete all netns Allow delete all namespace names by: $ ip -all netns del Signed-off-by: Vadim Kochan --- ip/ipnetns.c | 26 ++++++++++++++++---------- man/man8/ip-netns.8 | 12 ++++++++++-- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/ip/ipnetns.c b/ip/ipnetns.c index 59a93213..e4038ea7 100644 --- a/ip/ipnetns.c +++ b/ip/ipnetns.c @@ -23,7 +23,7 @@ static int usage(void) { fprintf(stderr, "Usage: ip netns list\n"); fprintf(stderr, " ip netns add NAME\n"); - fprintf(stderr, " ip netns delete NAME\n"); + fprintf(stderr, " ip [-all] netns delete [NAME]\n"); fprintf(stderr, " ip netns identify [PID]\n"); fprintf(stderr, " ip netns pids NAME\n"); fprintf(stderr, " ip [-all] netns exec [NAME] cmd ...\n"); @@ -259,18 +259,11 @@ static int netns_identify(int argc, char **argv) } -static int netns_delete(int argc, char **argv) +static int on_netns_del(char *nsname, void *arg) { - const char *name; char netns_path[MAXPATHLEN]; - if (argc < 1) { - fprintf(stderr, "No netns name specified\n"); - return -1; - } - - name = argv[0]; - snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name); + snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, nsname); umount2(netns_path, MNT_DETACH); if (unlink(netns_path) < 0) { fprintf(stderr, "Cannot remove namespace file \"%s\": %s\n", @@ -280,6 +273,19 @@ static int netns_delete(int argc, char **argv) return 0; } +static int netns_delete(int argc, char **argv) +{ + if (argc < 1 && !do_all) { + fprintf(stderr, "No netns name specified\n"); + return -1; + } + + if (do_all) + return netns_foreach(on_netns_del, NULL); + + return on_netns_del(argv[0], NULL); +} + static int create_netns_dir(void) { /* Create the base netns directory if it doesn't exist */ diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8 index 7b9571ea..8e6999c0 100644 --- a/man/man8/ip-netns.8 +++ b/man/man8/ip-netns.8 @@ -16,9 +16,13 @@ ip-netns \- process network namespace management .BR "ip netns" " { " list " } " .ti -8 -.BR "ip netns" " { " add " | " delete " } " +.B ip netns add .I NETNSNAME +.ti -8 +.B ip [-all] netns del +.RI "[ " NETNSNAME " ]" + .ti -8 .BR "ip netns identify" .RI "[ " PID " ]" @@ -76,7 +80,7 @@ If NAME is available in /var/run/netns/ this command creates a new network namespace and assigns NAME. .TP -.B ip netns delete NAME - delete the name of a network namespace +.B ip [-all] netns delete [ NAME ] - delete the name of a network namespace(s) .sp If NAME is present in /var/run/netns it is umounted and the mount point is removed. If this is the last user of the network namespace the @@ -84,6 +88,10 @@ network namespace will be freed, otherwise the network namespace persists until it has no more users. ip netns delete may fail if the mount point is in use in another mount namespace. +If +.B -all +option was specified then all the network namespace names will be removed. + .TP .B ip netns identify [PID] - Report network namespaces names for process .sp From 5e5055bc26aea47073e9071fbc6ddb822ff89c58 Mon Sep 17 00:00:00 2001 From: Andreas Henriksson Date: Fri, 23 Jan 2015 13:10:33 +0100 Subject: [PATCH 13/23] iproute2/ip: fix up filter when printing addresses "ip addr show up" would exclude the interface (link), but include the addresses of down interfaces (which looked like they where indented under a different interface). This fixes the filtering. For a full example see the original bug report at: http://bugs.debian.org/776040 Reported-by: Paul Slootman CC: 776040@bugs.debian.org Signed-off-by: Andreas Henriksson --- ip/ipaddress.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index d5e863dd..3730424a 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -970,7 +970,8 @@ struct nlmsg_chain struct nlmsg_list *tail; }; -static int print_selected_addrinfo(int ifindex, struct nlmsg_list *ainfo, FILE *fp) +static int print_selected_addrinfo(struct ifinfomsg *ifi, + struct nlmsg_list *ainfo, FILE *fp) { for ( ;ainfo ; ainfo = ainfo->next) { struct nlmsghdr *n = &ainfo->h; @@ -982,10 +983,13 @@ static int print_selected_addrinfo(int ifindex, struct nlmsg_list *ainfo, FILE * if (n->nlmsg_len < NLMSG_LENGTH(sizeof(ifa))) return -1; - if (ifa->ifa_index != ifindex || + if (ifa->ifa_index != ifi->ifi_index || (filter.family && filter.family != ifa->ifa_family)) continue; + if (filter.up && !(ifi->ifi_flags&IFF_UP)) + continue; + print_addrinfo(NULL, n, fp); } return 0; @@ -1446,7 +1450,7 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) if (no_link || (res = print_linkinfo(NULL, &l->h, stdout)) >= 0) { struct ifinfomsg *ifi = NLMSG_DATA(&l->h); if (filter.family != AF_PACKET) - print_selected_addrinfo(ifi->ifi_index, + print_selected_addrinfo(ifi, ainfo.head, stdout); if (res > 0 && !do_link && show_stats) print_link_stats(stdout, &l->h); From 2eb90dc7622caab72a84897592bbc02375cdd4f0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 15 Jan 2015 14:54:25 +0100 Subject: [PATCH 14/23] vxlan: Group policy extension Signed-off-by: Thomas Graf --- ip/iplink_vxlan.c | 11 +++++++++++ man/man8/ip-link.8.in | 45 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c index 9cc3ec38..473ff97a 100644 --- a/ip/iplink_vxlan.c +++ b/ip/iplink_vxlan.c @@ -30,6 +30,7 @@ static void print_explain(FILE *f) fprintf(f, " [ [no]l2miss ] [ [no]l3miss ]\n"); fprintf(f, " [ ageing SECONDS ] [ maxaddress NUMBER ]\n"); fprintf(f, " [ [no]udpcsum ] [ [no]udp6zerocsumtx ] [ [no]udp6zerocsumrx ]\n"); + fprintf(f, " [ gbp ]\n"); fprintf(f, "\n"); fprintf(f, "Where: VNI := 0-16777215\n"); fprintf(f, " ADDR := { IP_ADDRESS | any }\n"); @@ -68,6 +69,7 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, __u8 udpcsum = 0; __u8 udp6zerocsumtx = 0; __u8 udp6zerocsumrx = 0; + __u8 gbp = 0; int dst_port_set = 0; struct ifla_vxlan_port_range range = { 0, 0 }; @@ -197,6 +199,8 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, udp6zerocsumrx = 1; } else if (!matches(*argv, "noudp6zerocsumrx")) { udp6zerocsumrx = 0; + } else if (!matches(*argv, "gbp")) { + gbp = 1; } else if (matches(*argv, "help") == 0) { explain(); return -1; @@ -268,6 +272,10 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, if (dstport) addattr16(n, 1024, IFLA_VXLAN_PORT, htons(dstport)); + if (gbp) + addattr_l(n, 1024, IFLA_VXLAN_GBP, NULL, 0); + + return 0; } @@ -398,6 +406,9 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (tb[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] && rta_getattr_u8(tb[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) fputs("udp6zerocsumrx ", f); + + if (tb[IFLA_VXLAN_GBP]) + fputs("gbp ", f); } static void vxlan_print_help(struct link_util *lu, int argc, char **argv, diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 313d6f23..8e312978 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -276,6 +276,8 @@ the following additional arguments are supported: .BI ageing " SECONDS " .R " ] [ " .BI maxaddress " NUMBER " +.R " ] [ " +.B gbp .R " ]" .in +8 @@ -348,6 +350,49 @@ are entered into the VXLAN device forwarding database. .BI maxaddress " NUMBER" - specifies the maximum number of FDB entries. +.sp +.B gbp +- enables the Group Policy extension (VXLAN-GBP). + +.in +4 +Allows to transport group policy context across VXLAN network peers. +If enabled, includes the mark of a packet in the VXLAN header for outgoing +packets and fills the packet mark based on the information found in the +VXLAN header for incomming packets. + +Format of upper 16 bits of packet mark (flags); + +.in +2 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +.br +|-|-|-|-|-|-|-|-|-|D|-|-|A|-|-|-| +.br ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +.B D := +Don't Learn bit. When set, this bit indicates that the egress +VTEP MUST NOT learn the source address of the encapsulated frame. + +.B A := +Indicates that the group policy has already been applied to +this packet. Policies MUST NOT be applied by devices when the A bit is set. +.in -2 + +Format of lower 16 bits of packet mark (policy ID): + +.in +2 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +.br +| Group Policy ID | +.br ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +.in -2 + +Example: + iptables -A OUTPUT [...] -j MARK --set-mark 0x800FF + +.in -4 + .in -8 .TP From 82a307e835a0b7431c0367737757522dedc606fe Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 22 Jan 2015 19:04:33 +0100 Subject: [PATCH 15/23] can: Add support for CAN FD non-ISO feature This patch makes CAN_CTRLMODE_FD_NON_ISO netlink feature configurable. During the CAN FD standardization process within the ISO it turned out that the failure detection capability has to be improved. The CAN in Automation organization (CiA) defined the already implemented CAN FD controllers as 'non-ISO' and the upcoming improved CAN FD controllers as 'ISO' compliant. See at http://www.can-cia.com/index.php?id=1937 Starting with the - currently non-ISO - driver for M_CAN v3.0.1 introduced in Linux 3.18 this bit needs to be propagated to userspace. In future drivers this bit will become configurable depending on the CAN FD controllers capabilities. Signed-off-by: Oliver Hartkopp --- ip/iplink_can.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ip/iplink_can.c b/ip/iplink_can.c index fb503321..f1b089df 100644 --- a/ip/iplink_can.c +++ b/ip/iplink_can.c @@ -37,6 +37,7 @@ static void print_usage(FILE *f) "\t[ one-shot { on | off } ]\n" "\t[ berr-reporting { on | off } ]\n" "\t[ fd { on | off } ]\n" + "\t[ fd-non-iso { on | off } ]\n" "\t[ presume-ack { on | off } ]\n" "\n" "\t[ restart-ms TIME-MS ]\n" @@ -100,6 +101,7 @@ static void print_ctrlmode(FILE *f, __u32 cm) _PF(CAN_CTRLMODE_ONE_SHOT, "ONE-SHOT"); _PF(CAN_CTRLMODE_BERR_REPORTING, "BERR-REPORTING"); _PF(CAN_CTRLMODE_FD, "FD"); + _PF(CAN_CTRLMODE_FD_NON_ISO, "FD-NON-ISO"); _PF(CAN_CTRLMODE_PRESUME_ACK, "PRESUME-ACK"); #undef _PF if (cm) @@ -203,6 +205,10 @@ static int can_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); set_ctrlmode("fd", *argv, &cm, CAN_CTRLMODE_FD); + } else if (matches(*argv, "fd-non-iso") == 0) { + NEXT_ARG(); + set_ctrlmode("fd-non-iso", *argv, &cm, + CAN_CTRLMODE_FD_NON_ISO); } else if (matches(*argv, "presume-ack") == 0) { NEXT_ARG(); set_ctrlmode("presume-ack", *argv, &cm, From 1d129d191a3a632e05cf440c15aaffe23e0fa798 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 19 Jan 2015 16:56:29 +0100 Subject: [PATCH 16/23] tc: push bpf common code into separate file Signed-off-by: Jiri Pirko --- tc/Makefile | 2 +- tc/f_bpf.c | 136 ++++-------------------------------------------- tc/tc_bpf.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++ tc/tc_bpf.h | 28 ++++++++++ 4 files changed, 186 insertions(+), 126 deletions(-) create mode 100644 tc/tc_bpf.c create mode 100644 tc/tc_bpf.h diff --git a/tc/Makefile b/tc/Makefile index 9412094f..15f68ce0 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -1,5 +1,5 @@ TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o \ - tc_monitor.o m_police.o m_estimator.o m_action.o \ + tc_monitor.o tc_bpf.o m_police.o m_estimator.o m_action.o \ m_ematch.o emp_ematch.yacc.o emp_ematch.lex.o include ../Config diff --git a/tc/f_bpf.c b/tc/f_bpf.c index 48635a70..e2af94e3 100644 --- a/tc/f_bpf.c +++ b/tc/f_bpf.c @@ -26,6 +26,7 @@ #include "utils.h" #include "tc_util.h" +#include "tc_bpf.h" static void explain(void) { @@ -44,130 +45,6 @@ static void explain(void) fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n"); } -static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, - char **bpf_string, bool *need_release, - const char separator) -{ - char sp; - - if (from_file) { - size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); - char *tmp_string; - FILE *fp; - - tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; - tmp_string = malloc(tmp_len); - if (tmp_string == NULL) - return -ENOMEM; - - memset(tmp_string, 0, tmp_len); - - fp = fopen(arg, "r"); - if (fp == NULL) { - perror("Cannot fopen"); - free(tmp_string); - return -ENOENT; - } - - if (!fgets(tmp_string, tmp_len, fp)) { - free(tmp_string); - fclose(fp); - return -EIO; - } - - fclose(fp); - - *need_release = true; - *bpf_string = tmp_string; - } else { - *need_release = false; - *bpf_string = arg; - } - - if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || - sp != separator) { - if (*need_release) - free(*bpf_string); - return -EINVAL; - } - - return 0; -} - -static int bpf_parse_ops(int argc, char **argv, struct nlmsghdr *n, - bool from_file) -{ - char *bpf_string, *token, separator = ','; - struct sock_filter bpf_ops[BPF_MAXINSNS]; - int ret = 0, i = 0; - bool need_release; - __u16 bpf_len = 0; - - if (argc < 1) - return -EINVAL; - if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, - &need_release, separator)) - return -EINVAL; - if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { - ret = -EINVAL; - goto out; - } - - token = bpf_string; - while ((token = strchr(token, separator)) && (++token)[0]) { - if (i >= bpf_len) { - fprintf(stderr, "Real program length exceeds encoded " - "length parameter!\n"); - ret = -EINVAL; - goto out; - } - - if (sscanf(token, "%hu %hhu %hhu %u,", - &bpf_ops[i].code, &bpf_ops[i].jt, - &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { - fprintf(stderr, "Error at instruction %d!\n", i); - ret = -EINVAL; - goto out; - } - - i++; - } - - if (i != bpf_len) { - fprintf(stderr, "Parsed program length is less than encoded" - "length parameter!\n"); - ret = -EINVAL; - goto out; - } - - addattr_l(n, MAX_MSG, TCA_BPF_OPS_LEN, &bpf_len, sizeof(bpf_len)); - addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, - bpf_len * sizeof(struct sock_filter)); -out: - if (need_release) - free(bpf_string); - - return ret; -} - -static void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) -{ - struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); - int i; - - if (len == 0) - return; - - fprintf(f, "bytecode \'%u,", len); - - for (i = 0; i < len - 1; i++) - fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, - ops[i].jf, ops[i].k); - - fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt, - ops[i].jf, ops[i].k); -} - static int bpf_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n) { @@ -195,6 +72,10 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle, while (argc > 0) { if (matches(*argv, "run") == 0) { bool from_file; + struct sock_filter bpf_ops[BPF_MAXINSNS]; + __u16 bpf_len; + int ret; + NEXT_ARG(); if (strcmp(*argv, "bytecode-file") == 0) { from_file = true; @@ -206,10 +87,15 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle, return -1; } NEXT_ARG(); - if (bpf_parse_ops(argc, argv, n, from_file)) { + ret = bpf_parse_ops(argc, argv, bpf_ops, from_file); + if (ret < 0) { fprintf(stderr, "Illegal \"bytecode\"\n"); return -1; } + bpf_len = ret; + addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, bpf_len); + addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, + bpf_len * sizeof(struct sock_filter)); } else if (matches(*argv, "classid") == 0 || strcmp(*argv, "flowid") == 0) { unsigned handle; diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c new file mode 100644 index 00000000..c6901d6c --- /dev/null +++ b/tc/tc_bpf.c @@ -0,0 +1,146 @@ +/* + * tc_bpf.c BPF common code + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Daniel Borkmann + * Jiri Pirko + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" +#include "tc_bpf.h" + +int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator) +{ + char sp; + + if (from_file) { + size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); + char *tmp_string; + FILE *fp; + + tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; + tmp_string = malloc(tmp_len); + if (tmp_string == NULL) + return -ENOMEM; + + memset(tmp_string, 0, tmp_len); + + fp = fopen(arg, "r"); + if (fp == NULL) { + perror("Cannot fopen"); + free(tmp_string); + return -ENOENT; + } + + if (!fgets(tmp_string, tmp_len, fp)) { + free(tmp_string); + fclose(fp); + return -EIO; + } + + fclose(fp); + + *need_release = true; + *bpf_string = tmp_string; + } else { + *need_release = false; + *bpf_string = arg; + } + + if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || + sp != separator) { + if (*need_release) + free(*bpf_string); + return -EINVAL; + } + + return 0; +} + +int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, + bool from_file) +{ + char *bpf_string, *token, separator = ','; + int ret = 0, i = 0; + bool need_release; + __u16 bpf_len = 0; + + if (argc < 1) + return -EINVAL; + if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, + &need_release, separator)) + return -EINVAL; + if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { + ret = -EINVAL; + goto out; + } + + token = bpf_string; + while ((token = strchr(token, separator)) && (++token)[0]) { + if (i >= bpf_len) { + fprintf(stderr, "Real program length exceeds encoded " + "length parameter!\n"); + ret = -EINVAL; + goto out; + } + + if (sscanf(token, "%hu %hhu %hhu %u,", + &bpf_ops[i].code, &bpf_ops[i].jt, + &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { + fprintf(stderr, "Error at instruction %d!\n", i); + ret = -EINVAL; + goto out; + } + + i++; + } + + if (i != bpf_len) { + fprintf(stderr, "Parsed program length is less than encoded" + "length parameter!\n"); + ret = -EINVAL; + goto out; + } + ret = bpf_len; + +out: + if (need_release) + free(bpf_string); + + return ret; +} + +void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) +{ + struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); + int i; + + if (len == 0) + return; + + fprintf(f, "bytecode \'%u,", len); + + for (i = 0; i < len - 1; i++) + fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, + ops[i].jf, ops[i].k); + + fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt, + ops[i].jf, ops[i].k); +} diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h new file mode 100644 index 00000000..08cca927 --- /dev/null +++ b/tc/tc_bpf.h @@ -0,0 +1,28 @@ +/* + * tc_bpf.h BPF common code + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Daniel Borkmann + * Jiri Pirko + */ + +#ifndef _TC_BPF_H_ +#define _TC_BPF_H_ 1 + +#include +#include +#include +#include + +int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator); +int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, + bool from_file); +void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len); + +#endif From 86ab59a6660f12302049cb3ad88fb2c2c9a716f2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 19 Jan 2015 16:56:30 +0100 Subject: [PATCH 17/23] tc: add support for BPF based actions Signed-off-by: Jiri Pirko --- include/linux/tc_act/tc_bpf.h | 31 ++++++ tc/Makefile | 1 + tc/m_bpf.c | 183 ++++++++++++++++++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 include/linux/tc_act/tc_bpf.h create mode 100644 tc/m_bpf.c diff --git a/include/linux/tc_act/tc_bpf.h b/include/linux/tc_act/tc_bpf.h new file mode 100644 index 00000000..5288bd77 --- /dev/null +++ b/include/linux/tc_act/tc_bpf.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_BPF_H +#define __LINUX_TC_BPF_H + +#include + +#define TCA_ACT_BPF 13 + +struct tc_act_bpf { + tc_gen; +}; + +enum { + TCA_ACT_BPF_UNSPEC, + TCA_ACT_BPF_TM, + TCA_ACT_BPF_PARMS, + TCA_ACT_BPF_OPS_LEN, + TCA_ACT_BPF_OPS, + __TCA_ACT_BPF_MAX, +}; +#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) + +#endif diff --git a/tc/Makefile b/tc/Makefile index 15f68ce0..d831a153 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -46,6 +46,7 @@ TCMODULES += m_skbedit.o TCMODULES += m_csum.o TCMODULES += m_simple.o TCMODULES += m_vlan.o +TCMODULES += m_bpf.o TCMODULES += p_ip.o TCMODULES += p_icmp.o TCMODULES += p_tcp.o diff --git a/tc/m_bpf.c b/tc/m_bpf.c new file mode 100644 index 00000000..611135ea --- /dev/null +++ b/tc/m_bpf.c @@ -0,0 +1,183 @@ +/* + * m_bpf.c BFP based action module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Jiri Pirko + */ + +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "rt_names.h" +#include "tc_util.h" +#include "tc_bpf.h" + +static void explain(void) +{ + fprintf(stderr, "Usage: ... bpf ...\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n"); + fprintf(stderr, " [from file]: run bytecode-file FILE\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n"); + fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); + fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n"); + fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n"); + fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n"); +} + +static void usage(void) +{ + explain(); + exit(-1); +} + +static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p, + int tca_id, struct nlmsghdr *n) +{ + int argc = *argc_p; + char **argv = *argv_p; + struct rtattr *tail; + struct tc_act_bpf parm = { 0 }; + struct sock_filter bpf_ops[BPF_MAXINSNS]; + __u16 bpf_len = 0; + + if (matches(*argv, "bpf") != 0) + return -1; + + NEXT_ARG(); + + while (argc > 0) { + if (matches(*argv, "run") == 0) { + bool from_file; + int ret; + + NEXT_ARG(); + if (strcmp(*argv, "bytecode-file") == 0) { + from_file = true; + } else if (strcmp(*argv, "bytecode") == 0) { + from_file = false; + } else { + fprintf(stderr, "unexpected \"%s\"\n", *argv); + explain(); + return -1; + } + NEXT_ARG(); + ret = bpf_parse_ops(argc, argv, bpf_ops, from_file); + if (ret < 0) { + fprintf(stderr, "Illegal \"bytecode\"\n"); + return -1; + } + bpf_len = ret; + } else if (matches(*argv, "help") == 0) { + usage(); + } else { + break; + } + argc--; + argv++; + } + + parm.action = TC_ACT_PIPE; + if (argc) { + if (matches(*argv, "reclassify") == 0) { + parm.action = TC_ACT_RECLASSIFY; + NEXT_ARG(); + } else if (matches(*argv, "pipe") == 0) { + parm.action = TC_ACT_PIPE; + NEXT_ARG(); + } else if (matches(*argv, "drop") == 0 || + matches(*argv, "shot") == 0) { + parm.action = TC_ACT_SHOT; + NEXT_ARG(); + } else if (matches(*argv, "continue") == 0) { + parm.action = TC_ACT_UNSPEC; + NEXT_ARG(); + } else if (matches(*argv, "pass") == 0) { + parm.action = TC_ACT_OK; + NEXT_ARG(); + } + } + + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); + if (get_u32(&parm.index, *argv, 10)) { + fprintf(stderr, "bpf: Illegal \"index\"\n"); + return -1; + } + argc--; + argv++; + } + } + + if (!bpf_len) { + fprintf(stderr, "bpf: Bytecode needs to be passed\n"); + explain(); + return -1; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, MAX_MSG, tca_id, NULL, 0); + addattr_l(n, MAX_MSG, TCA_ACT_BPF_PARMS, &parm, sizeof(parm)); + addattr16(n, MAX_MSG, TCA_ACT_BPF_OPS_LEN, bpf_len); + addattr_l(n, MAX_MSG, TCA_ACT_BPF_OPS, &bpf_ops, + bpf_len * sizeof(struct sock_filter)); + tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail; + + *argc_p = argc; + *argv_p = argv; + return 0; +} + +static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg) +{ + struct rtattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + + if (arg == NULL) + return -1; + + parse_rtattr_nested(tb, TCA_ACT_BPF_MAX, arg); + + if (!tb[TCA_ACT_BPF_PARMS]) { + fprintf(f, "[NULL bpf parameters]"); + return -1; + } + parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]); + + fprintf(f, " bpf "); + + if (tb[TCA_ACT_BPF_OPS] && tb[TCA_ACT_BPF_OPS_LEN]) + bpf_print_ops(f, tb[TCA_ACT_BPF_OPS], + rta_getattr_u16(tb[TCA_ACT_BPF_OPS_LEN])); + + fprintf(f, "\n\tindex %d ref %d bind %d", parm->index, parm->refcnt, + parm->bindcnt); + + if (show_stats) { + if (tb[TCA_ACT_BPF_TM]) { + struct tcf_t *tm = RTA_DATA(tb[TCA_ACT_BPF_TM]); + print_tm(f, tm); + } + } + + fprintf(f, "\n "); + + return 0; +} + +struct action_util bpf_action_util = { + .id = "bpf", + .parse_aopt = parse_bpf, + .print_aopt = print_bpf, +}; From 3ac0d36ddde9715a8d027a919eb564f7b42c6c40 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Sun, 25 Jan 2015 18:26:24 -0800 Subject: [PATCH 18/23] iproute2: bridge: support vlan range adds This patch adds vlan range support to bridge add command using the newly added vinfo flags BRIDGE_VLAN_INFO_RANGE_BEGIN and BRIDGE_VLAN_INFO_RANGE_END. $bridge vlan show port vlan ids br0 1 PVID Egress Untagged dummy0 1 PVID Egress Untagged $bridge vlan add vid 10-15 dev dummy0 port vlan ids br0 1 PVID Egress Untagged dummy0 1 PVID Egress Untagged 10 11 12 13 14 15 $bridge vlan del vid 14 dev dummy0 $bridge vlan show port vlan ids br0 1 PVID Egress Untagged dummy0 1 PVID Egress Untagged 10 11 12 13 15 $bridge vlan del vid 10-15 dev dummy0 $bridge vlan show port vlan ids br0 1 PVID Egress Untagged dummy0 1 PVID Egress Untagged Signed-off-by: Roopa Prabhu Signed-off-by: Wilson Kok --- bridge/vlan.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/bridge/vlan.c b/bridge/vlan.c index 3bd7b0db..88992e63 100644 --- a/bridge/vlan.c +++ b/bridge/vlan.c @@ -32,6 +32,7 @@ static int vlan_modify(int cmd, int argc, char **argv) } req; char *d = NULL; short vid = -1; + short vid_end = -1; struct rtattr *afspec; struct bridge_vlan_info vinfo; unsigned short flags = 0; @@ -49,8 +50,18 @@ static int vlan_modify(int cmd, int argc, char **argv) NEXT_ARG(); d = *argv; } else if (strcmp(*argv, "vid") == 0) { + char *p; NEXT_ARG(); - vid = atoi(*argv); + p = strchr(*argv, '-'); + if (p) { + *p = '\0'; + p++; + vid = atoi(*argv); + vid_end = atoi(p); + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; + } else { + vid = atoi(*argv); + } } else if (strcmp(*argv, "self") == 0) { flags |= BRIDGE_FLAGS_SELF; } else if (strcmp(*argv, "master") == 0) { @@ -83,15 +94,40 @@ static int vlan_modify(int cmd, int argc, char **argv) return -1; } - vinfo.vid = vid; + if (vinfo.flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (vid_end == -1 || vid_end >= 4096 || vid >= vid_end) { + fprintf(stderr, "Invalid VLAN range \"%hu-%hu\"\n", + vid, vid_end); + return -1; + } + if (vinfo.flags & BRIDGE_VLAN_INFO_PVID) { + fprintf(stderr, + "pvid cannot be configured for a vlan range\n"); + return -1; + } + } afspec = addattr_nest(&req.n, sizeof(req), IFLA_AF_SPEC); if (flags) addattr16(&req.n, sizeof(req), IFLA_BRIDGE_FLAGS, flags); - addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, - sizeof(vinfo)); + vinfo.vid = vid; + if (vid_end != -1) { + /* send vlan range start */ + addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, + sizeof(vinfo)); + vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; + + /* Now send the vlan range end */ + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; + vinfo.vid = vid_end; + addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, + sizeof(vinfo)); + } else { + addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, + sizeof(vinfo)); + } addattr_nest_end(&req.n, afspec); From a2f7934dd04f8e762cc7a3f5f2e7a1edd4a93643 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Sun, 25 Jan 2015 18:26:25 -0800 Subject: [PATCH 19/23] iproute2: bridge vlan show new option to print ranges Introduce new option -c[ompressvlans] to request vlan ranges from kernel (pls suggest better option names if this does not look ok) $bridge vlan show port vlan ids dummy0 1 PVID Egress Untagged dummy1 1 PVID Egress Untagged 2 3 4 5 6 7 9 10 12 br0 1 PVID Egress Untagged $bridge help Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help } where OBJECT := { link | fdb | mdb | vlan | monitor } OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -o[neline] | -t[imestamp] | -n[etns] name | -c[ompressvlans] } $bridge -c vlan show port vlan ids dummy0 1 PVID Egress Untagged dummy1 1 PVID Egress Untagged 2-7 9-10 12 br0 1 PVID Egress Untagged Signed-off-by: Roopa Prabhu --- bridge/br_common.h | 1 + bridge/bridge.c | 6 +++++- bridge/vlan.c | 11 +++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/bridge/br_common.h b/bridge/br_common.h index 12fce3ef..169a162d 100644 --- a/bridge/br_common.h +++ b/bridge/br_common.h @@ -16,4 +16,5 @@ extern int preferred_family; extern int show_stats; extern int show_details; extern int timestamp; +extern int compress_vlans; extern struct rtnl_handle rth; diff --git a/bridge/bridge.c b/bridge/bridge.c index 5fcc552b..88469ca2 100644 --- a/bridge/bridge.c +++ b/bridge/bridge.c @@ -21,6 +21,7 @@ int resolve_hosts; int oneline = 0; int show_stats; int show_details; +int compress_vlans; int timestamp; char * _SL_ = NULL; @@ -32,7 +33,8 @@ static void usage(void) "Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n" "where OBJECT := { link | fdb | mdb | vlan | monitor }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n" -" -o[neline] | -t[imestamp] | -n[etns] name }\n"); +" -o[neline] | -t[imestamp] | -n[etns] name |\n" +" -c[ompressvlans] }\n"); exit(-1); } @@ -117,6 +119,8 @@ main(int argc, char **argv) NEXT_ARG(); if (netns_switch(argv[1])) exit(-1); + } else if (matches(opt, "-compressvlans") == 0) { + ++compress_vlans; } else { fprintf(stderr, "Option \"%s\" is unknown, try \"bridge help\".\n", opt); exit(-1); diff --git a/bridge/vlan.c b/bridge/vlan.c index 88992e63..9f6c84ee 100644 --- a/bridge/vlan.c +++ b/bridge/vlan.c @@ -182,7 +182,12 @@ static int print_vlan(const struct sockaddr_nl *who, continue; vinfo = RTA_DATA(i); - fprintf(fp, "\t %hu", vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) + fprintf(fp, "-%hu", vinfo->vid); + else + fprintf(fp, "\t %hu", vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) + continue; if (vinfo->flags & BRIDGE_VLAN_INFO_PVID) fprintf(fp, " PVID"); if (vinfo->flags & BRIDGE_VLAN_INFO_UNTAGGED) @@ -218,7 +223,9 @@ static int vlan_show(int argc, char **argv) } if (rtnl_wilddump_req_filter(&rth, PF_BRIDGE, RTM_GETLINK, - RTEXT_FILTER_BRVLAN) < 0) { + (compress_vlans ? + RTEXT_FILTER_BRVLAN_COMPRESSED : + RTEXT_FILTER_BRVLAN)) < 0) { perror("Cannont send dump request"); exit(1); } From 858dbb208e3934525674252a6b6cf7d36a9de191 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 29 Jan 2015 08:51:58 -0800 Subject: [PATCH 20/23] ip link: Add support for remote checksum offload to IP tunnels This patch adds support to remote checksum checksum offload confinguration for IPIP, SIT, and GRE tunnels. This patch adds a [no]encap-remcsum to ip link command which applicable when configured tunnels that use GUE. http://tools.ietf.org/html/draft-herbert-remotecsumoffload-00 Example: ip link add name tun1 type gre remote 192.168.1.1 local 192.168.1.2 \ ttl 225 encap fou encap-sport auto encap-dport 7777 encap-csum \ encap-remcsum This would create an GRE tunnel in GUE encapsulation where the source port is automatically selected (based on hash of inner packet), checksums in the encapsulating UDP header are enabled (needed.for remote checksum offload), and remote checksum ffload is configured to be used on the tunnel (affects TX side). Signed-off-by: Tom Herbert --- ip/link_gre.c | 11 ++++++++++- ip/link_iptnl.c | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/ip/link_gre.c b/ip/link_gre.c index 47b64cb8..1d783876 100644 --- a/ip/link_gre.c +++ b/ip/link_gre.c @@ -31,7 +31,7 @@ static void print_usage(FILE *f) fprintf(f, " [ ttl TTL ] [ tos TOS ] [ [no]pmtudisc ] [ dev PHYS_DEV ]\n"); fprintf(f, " [ noencap ] [ encap { fou | gue | none } ]\n"); fprintf(f, " [ encap-sport PORT ] [ encap-dport PORT ]\n"); - fprintf(f, " [ [no]encap-csum ] [ [no]encap-csum6 ]\n"); + fprintf(f, " [ [no]encap-csum ] [ [no]encap-csum6 ] [ [no]encap-remcsum ]\n"); fprintf(f, "\n"); fprintf(f, "Where: NAME := STRING\n"); fprintf(f, " ADDR := { IP_ADDRESS | any }\n"); @@ -287,6 +287,10 @@ get_failed: encapflags |= TUNNEL_ENCAP_FLAG_CSUM6; } else if (strcmp(*argv, "noencap-udp6-csum") == 0) { encapflags |= ~TUNNEL_ENCAP_FLAG_CSUM6; + } else if (strcmp(*argv, "encap-remcsum") == 0) { + encapflags |= TUNNEL_ENCAP_FLAG_REMCSUM; + } else if (strcmp(*argv, "noencap-remcsum") == 0) { + encapflags |= ~TUNNEL_ENCAP_FLAG_REMCSUM; } else usage(); argc--; argv++; @@ -445,6 +449,11 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) fputs("encap-csum6 ", f); else fputs("noencap-csum6 ", f); + + if (flags & TUNNEL_ENCAP_FLAG_REMCSUM) + fputs("encap-remcsum ", f); + else + fputs("noencap-remcsum ", f); } } diff --git a/ip/link_iptnl.c b/ip/link_iptnl.c index 94871178..cab174f9 100644 --- a/ip/link_iptnl.c +++ b/ip/link_iptnl.c @@ -31,7 +31,7 @@ static void print_usage(FILE *f, int sit) fprintf(f, " [ 6rd-prefix ADDR ] [ 6rd-relay_prefix ADDR ] [ 6rd-reset ]\n"); fprintf(f, " [ noencap ] [ encap { fou | gue | none } ]\n"); fprintf(f, " [ encap-sport PORT ] [ encap-dport PORT ]\n"); - fprintf(f, " [ [no]encap-csum ] [ [no]encap-csum6 ]\n"); + fprintf(f, " [ [no]encap-csum ] [ [no]encap-csum6 ] [ [no]encap-remcsum ]\n"); if (sit) { fprintf(f, " [ mode { ip6ip | ipip | any } ]\n"); fprintf(f, " [ isatap ]\n"); @@ -256,6 +256,10 @@ get_failed: encapflags |= TUNNEL_ENCAP_FLAG_CSUM6; } else if (strcmp(*argv, "noencap-udp6-csum") == 0) { encapflags &= ~TUNNEL_ENCAP_FLAG_CSUM6; + } else if (strcmp(*argv, "encap-remcsum") == 0) { + encapflags |= TUNNEL_ENCAP_FLAG_REMCSUM; + } else if (strcmp(*argv, "noencap-remcsum") == 0) { + encapflags &= ~TUNNEL_ENCAP_FLAG_REMCSUM; } else if (strcmp(*argv, "6rd-prefix") == 0) { inet_prefix prefix; NEXT_ARG(); @@ -438,6 +442,11 @@ static void iptunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[ fputs("encap-csum6 ", f); else fputs("noencap-csum6 ", f); + + if (flags & TUNNEL_ENCAP_FLAG_REMCSUM) + fputs("encap-remcsum ", f); + else + fputs("noencap-remcsum ", f); } } From 90f1df715eb38bbbb4554ebbad783d67690a38b2 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 29 Jan 2015 08:52:01 -0800 Subject: [PATCH 21/23] iproute: Descriptions of fou and gue options in ip-link man pages Add section for additional arguments to GRE, IPIP, and SIT types that are related to Foo-over-UDP and Generic UDP Encapsulation. Also, added an example GUE configuration in the examples section. Signed-off-by: Tom Herbert --- man/man8/ip-link.8.in | 68 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 313d6f23..239f7649 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -350,6 +350,63 @@ are entered into the VXLAN device forwarding database. .in -8 +.TP +GRE, IPIP, SIT Type Support +For a link of types +.I GRE/IPIP/SIT +the following additional arguments are supported: + +.BI "ip link add " DEVICE +.BR type " { gre | ipip | sit } " +.BI " remote " ADDR " local " ADDR +.R " [ " +.BR encap " { fou | gue | none } " +.R " ] [ " +.BI "encap-sport { " PORT " | auto } " +.R " ] [ " +.BI "encap-dport " PORT +.R " ] [ " +.I " [no]encap-csum " +.R " ] [ " +.I " [no]encap-remcsum " +.R " ]" + +.in +8 +.sp +.BI remote " ADDR " +- specifies the remote address of the tunnel. + +.sp +.BI local " ADDR " +- specifies the fixed local address for tunneled packets. +It must be an address on another interface on this host. + +.sp +.BR encap " { fou | gue | none } " +- specifies type of secondary UDP encapsulation. "fou" indicates +Foo-Over-UDP, "gue" indicates Generic UDP Encapsulation. + +.sp +.BI "encap-sport { " PORT " | auto } " +- specifies the source port in UDP encapsulation. +.IR PORT +indicates the port by number, "auto" +indicates that the port number should be chosen automatically +(the kernel picks a flow based on the flow hash of the +encapsulated packet). + +.sp +.I [no]encap-csum +- specifies if UDP checksums are enabled in the secondary +encapsulation. + +.sp +.I [no]encap-remcsum +- specifies if Remote Checksum Offload is enabled. This is only +applicable for Generic UDP Encapsulation. + +.in -8 + .TP IP6GRE/IP6GRETAP Type Support For a link of type @@ -386,7 +443,7 @@ the following additional arguments are supported: .sp .BI local " ADDR " - specifies the fixed local IPv6 address for tunneled packets. -It must be and address on another interface on this host. +It must be an address on another interface on this host. .sp .BI [i|o]seq @@ -762,6 +819,15 @@ Removes vlan device. ip link help gre .RS 4 Display help for the gre link type. +.RE +.PP +ip link add name tun1 type ipip remote 192.168.1.1 +local 192.168.1.2 ttl 225 encap gue encap-sport auto +encap-dport 5555 encap-csum encap-remcsum +.RS 4 +Creates an IPIP that is encapsulated with Generic UDP Encapsulation, +and the outer UDP checksum and remote checksum offload are enabled. + .RE .SH SEE ALSO From 95ce04bc86c2299ea8fd466d521ba34f930d05df Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sun, 8 Feb 2015 08:58:43 +0200 Subject: [PATCH 22/23] ss: Show stats from specified network namespace Added new '-N NSNAME, --net=NSNAME' option to show socket stats from the specified network namespace name. Signed-off-by: Vadim Kochan --- man/man8/ss.8 | 3 +++ misc/Makefile | 4 ++++ misc/ss.c | 9 ++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/man/man8/ss.8 b/man/man8/ss.8 index 450649ab..b7fbaefa 100644 --- a/man/man8/ss.8 +++ b/man/man8/ss.8 @@ -84,6 +84,9 @@ context of the creating process, however the context shown will reflect any policy role, type and/or range transition rules applied, and is therefore a useful reference. .TP +.B \-N NSNAME, \-\-net=NSNAME +Switch to the specified network namespace name. +.TP .B \-b, \-\-bpf Show socket BPF filters (only administrators are allowed to get these information). .TP diff --git a/misc/Makefile b/misc/Makefile index 004bcc3a..b7ecba90 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -10,6 +10,10 @@ ifeq ($(HAVE_SELINUX),y) CFLAGS += $(shell pkg-config --cflags libselinux) -DHAVE_SELINUX endif +ifeq ($(IP_CONFIG_SETNS),y) + CFLAGS += -DHAVE_SETNS +endif + all: $(TARGETS) ss: $(SSOBJ) diff --git a/misc/ss.c b/misc/ss.c index 7fc0a99e..0a6a65ee 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -31,6 +31,7 @@ #include "rt_names.h" #include "ll_map.h" #include "libnetlink.h" +#include "namespace.h" #include "SNAPSHOT.h" #include @@ -3246,6 +3247,7 @@ static void _usage(FILE *dest) " -b, --bpf show bpf filter socket information\n" " -Z, --context display process SELinux security contexts\n" " -z, --contexts display process and socket SELinux security contexts\n" +" -N, --net switch to the specified network namespace name\n" "\n" " -4, --ipv4 display only IP version 4 sockets\n" " -6, --ipv6 display only IP version 6 sockets\n" @@ -3345,6 +3347,7 @@ static const struct option long_opts[] = { { "help", 0, 0, 'h' }, { "context", 0, 0, 'Z' }, { "contexts", 0, 0, 'z' }, + { "net", 1, 0, 'N' }, { 0 } }; @@ -3360,7 +3363,7 @@ int main(int argc, char *argv[]) struct filter dbs_filter = {}; int state_filter = 0; - while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbf:miA:D:F:vVzZ", + while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbf:miA:D:F:vVzZN:", long_opts, NULL)) != EOF) { switch(ch) { case 'n': @@ -3532,6 +3535,10 @@ int main(int argc, char *argv[]) show_proc_ctx++; user_ent_hash_build(); break; + case 'N': + if (netns_switch(optarg)) + exit(1); + break; case 'h': case '?': help(); From 46d364fe8fa9fd6f5ca3989b572a2548d9caf56a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 10 Feb 2015 15:14:32 -0800 Subject: [PATCH 23/23] v3.19.0 --- include/SNAPSHOT.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/SNAPSHOT.h b/include/SNAPSHOT.h index 46e6d4ae..35bda66a 100644 --- a/include/SNAPSHOT.h +++ b/include/SNAPSHOT.h @@ -1 +1 @@ -static const char SNAPSHOT[] = "141224"; +static const char SNAPSHOT[] = "150210";