From d48ed3f4518046cc6033224575c7ade55e73f9aa Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 13 Sep 2013 10:31:41 -0700 Subject: [PATCH 01/25] nstat: add json output format New command line flag to output statistics in JSON format. In our envrionment, we have scripts that parse output of commands. It is better to use a format supported by existing parsers. --- misc/nstat.c | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/misc/nstat.c b/misc/nstat.c index b6859787..5a4cfe94 100644 --- a/misc/nstat.c +++ b/misc/nstat.c @@ -33,6 +33,7 @@ int dump_zeros = 0; int reset_history = 0; int ignore_history = 0; int no_output = 0; +int json_output = 0; int no_update = 0; int scan_interval = 0; int time_constant = 0; @@ -255,11 +256,17 @@ static void load_netstat(void) } } + static void dump_kern_db(FILE *fp, int to_hist) { struct nstat_ent *n, *h; + const char *eol = "\n"; + h = hist_db; - fprintf(fp, "#%s\n", info_source); + if (json_output) + fprintf(fp, "{ \"%s\": [", info_source); + else + fprintf(fp, "#%s\n", info_source); for (n=kern_db; n; n=n->next) { unsigned long long val = n->val; if (!dump_zeros && !val && !n->rate) @@ -276,15 +283,30 @@ static void dump_kern_db(FILE *fp, int to_hist) } } } - fprintf(fp, "%-32s%-16llu%6.1f\n", n->id, val, n->rate); + + if (json_output) { + fprintf(fp, "%s { \"id\":\"%s\", \"val\":%llu," + " \"rate\":%.1f }", + eol, n->id, val, n->rate); + eol = ",\n"; + } else + fprintf(fp, "%-32s%-16llu%6.1f\n", n->id, val, n->rate); } + if (json_output) + fprintf(fp, "\n] }\n"); } static void dump_incr_db(FILE *fp) { struct nstat_ent *n, *h; + const char *eol = "\n"; + h = hist_db; - fprintf(fp, "#%s\n", info_source); + if (json_output) + fprintf(fp, "{ \"%s\": [", info_source); + else + fprintf(fp, "#%s\n", info_source); + for (n=kern_db; n; n=n->next) { int ovfl = 0; unsigned long long val = n->val; @@ -304,9 +326,18 @@ static void dump_incr_db(FILE *fp) continue; if (!match(n->id)) continue; - fprintf(fp, "%-32s%-16llu%6.1f%s\n", n->id, val, - n->rate, ovfl?" (overflow)":""); + + if (json_output) { + fprintf(fp, "%s { \"id\":\"%s\", \"val\":%llu," + " \"rate\":%.1f, \"overflow\":%d }", + eol, n->id, val, n->rate, ovfl); + eol = ",\n"; + } else + fprintf(fp, "%-32s%-16llu%6.1f%s\n", n->id, val, + n->rate, ovfl?" (overflow)":""); } + if (json_output) + fprintf(fp, "\n] }\n"); } static int children; @@ -451,7 +482,7 @@ int main(int argc, char *argv[]) int ch; int fd; - while ((ch = getopt(argc, argv, "h?vVzrnasd:t:")) != EOF) { + while ((ch = getopt(argc, argv, "h?vVzrnasd:t:j")) != EOF) { switch(ch) { case 'z': dump_zeros = 1; @@ -478,6 +509,9 @@ int main(int argc, char *argv[]) exit(-1); } break; + case 'j': + json_output = 1; + break; case 'v': case 'V': printf("nstat utility, iproute2-ss%s\n", SNAPSHOT); From bc113e46a3cd06973b37b13c06db3f5f459502f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Aug 2013 19:30:36 -0700 Subject: [PATCH 02/25] pkt_sched: fq: Fair Queue packet scheduler Support for FQ packet scheduler $ tc qd add dev eth0 root fq help Usage: ... fq [ limit PACKETS ] [ flow_limit PACKETS ] [ quantum BYTES ] [ initial_quantum BYTES ] [ maxrate RATE ] [ buckets NUMBER ] [ [no]pacing ] $ tc -s -d qd qdisc fq 8002: dev eth0 root refcnt 32 limit 10000p flow_limit 100p buckets 256 quantum 3028 initial_quantum 15140 Sent 216532416 bytes 148395 pkt (dropped 0, overlimits 0 requeues 14) backlog 0b 0p requeues 14 511 flows (511 inactive, 0 throttled) 110 gc, 0 highprio, 0 retrans, 1143 throttled, 0 flows_plimit limit : max number of packets on whole Qdisc (default 10000) flow_limit : max number of packets per flow (default 100) quantum : the max deficit per RR round (default is 2 MTU) initial_quantum : initial credit for new flows (default is 10 MTU) maxrate : max per flow rate (default : unlimited) buckets : number of RB trees (default : 1024) in hash table. (consumes 8 bytes per bucket) [no]pacing : disable/enable pacing (default is enable) Usage : tc qdisc add dev $ETH root fq tc qdisc del dev $ETH root 2>/dev/null tc qdisc add dev $ETH root handle 1: mq for i in `seq 1 4` do tc qdisc add dev $ETH parent 1:$i est 1sec 4sec fq done Signed-off-by: Eric Dumazet --- tc/Makefile | 1 + tc/q_fq.c | 279 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 280 insertions(+) create mode 100644 tc/q_fq.c diff --git a/tc/Makefile b/tc/Makefile index f26e7646..1eeabd8c 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -50,6 +50,7 @@ TCMODULES += em_meta.o TCMODULES += q_mqprio.o TCMODULES += q_codel.o TCMODULES += q_fq_codel.o +TCMODULES += q_fq.o ifeq ($(TC_CONFIG_IPSET), y) ifeq ($(TC_CONFIG_XT), y) diff --git a/tc/q_fq.c b/tc/q_fq.c new file mode 100644 index 00000000..c1f658e0 --- /dev/null +++ b/tc/q_fq.c @@ -0,0 +1,279 @@ +/* + * Fair Queue + * + * Copyright (C) 2013 Eric Dumazet + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" + +static void explain(void) +{ + fprintf(stderr, "Usage: ... fq [ limit PACKETS ] [ flow_limit PACKETS ]\n"); + fprintf(stderr, " [ quantum BYTES ] [ initial_quantum BYTES ]\n"); + fprintf(stderr, " [ maxrate RATE ] [ buckets NUMBER ]\n"); + fprintf(stderr, " [ [no]pacing ]\n"); +} + +static unsigned int ilog2(unsigned int val) +{ + unsigned int res = 0; + + val--; + while (val) { + res++; + val >>= 1; + } + return res; +} + +static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv, + struct nlmsghdr *n) +{ + unsigned int plimit = ~0U; + unsigned int flow_plimit = ~0U; + unsigned int quantum = ~0U; + unsigned int initial_quantum = ~0U; + unsigned int buckets = 0; + unsigned int maxrate = ~0U; + unsigned int defrate = ~0U; + int pacing = -1; + struct rtattr *tail; + + while (argc > 0) { + if (strcmp(*argv, "limit") == 0) { + NEXT_ARG(); + if (get_unsigned(&plimit, *argv, 0)) { + fprintf(stderr, "Illegal \"limit\"\n"); + return -1; + } + } else if (strcmp(*argv, "flow_limit") == 0) { + NEXT_ARG(); + if (get_unsigned(&flow_plimit, *argv, 0)) { + fprintf(stderr, "Illegal \"flow_limit\"\n"); + return -1; + } + } else if (strcmp(*argv, "buckets") == 0) { + NEXT_ARG(); + if (get_unsigned(&buckets, *argv, 0)) { + fprintf(stderr, "Illegal \"buckets\"\n"); + return -1; + } + } else if (strcmp(*argv, "maxrate") == 0) { + NEXT_ARG(); + if (get_rate(&maxrate, *argv)) { + fprintf(stderr, "Illegal \"maxrate\"\n"); + return -1; + } + } else if (strcmp(*argv, "defrate") == 0) { + NEXT_ARG(); + if (get_rate(&defrate, *argv)) { + fprintf(stderr, "Illegal \"defrate\"\n"); + return -1; + } + } else if (strcmp(*argv, "quantum") == 0) { + NEXT_ARG(); + if (get_unsigned(&quantum, *argv, 0)) { + fprintf(stderr, "Illegal \"quantum\"\n"); + return -1; + } + } else if (strcmp(*argv, "initial_quantum") == 0) { + NEXT_ARG(); + if (get_unsigned(&initial_quantum, *argv, 0)) { + fprintf(stderr, "Illegal \"initial_quantum\"\n"); + return -1; + } + } else if (strcmp(*argv, "pacing") == 0) { + pacing = 1; + } else if (strcmp(*argv, "nopacing") == 0) { + pacing = 0; + } else if (strcmp(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--; argv++; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + if (buckets) { + unsigned int log = ilog2(buckets); + + addattr_l(n, 1024, TCA_FQ_BUCKETS_LOG, + &log, sizeof(log)); + } + if (plimit != ~0U) + addattr_l(n, 1024, TCA_FQ_PLIMIT, + &plimit, sizeof(plimit)); + if (flow_plimit != ~0U) + addattr_l(n, 1024, TCA_FQ_FLOW_PLIMIT, + &flow_plimit, sizeof(flow_plimit)); + if (quantum != ~0U) + addattr_l(n, 1024, TCA_FQ_QUANTUM, &quantum, sizeof(quantum)); + if (initial_quantum != ~0U) + addattr_l(n, 1024, TCA_FQ_INITIAL_QUANTUM, + &initial_quantum, sizeof(initial_quantum)); + if (pacing != -1) + addattr_l(n, 1024, TCA_FQ_RATE_ENABLE, + &pacing, sizeof(pacing)); + if (maxrate != ~0U) + addattr_l(n, 1024, TCA_FQ_FLOW_MAX_RATE, + &maxrate, sizeof(maxrate)); + if (defrate != ~0U) + addattr_l(n, 1024, TCA_FQ_FLOW_DEFAULT_RATE, + &defrate, sizeof(defrate)); + tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; + return 0; +} + +static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_FQ_MAX + 1]; + unsigned int plimit, flow_plimit; + unsigned int buckets_log; + int pacing; + unsigned int rate, quantum; + SPRINT_BUF(b1); + + if (opt == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_FQ_MAX, opt); + + if (tb[TCA_FQ_PLIMIT] && + RTA_PAYLOAD(tb[TCA_FQ_PLIMIT]) >= sizeof(__u32)) { + plimit = rta_getattr_u32(tb[TCA_FQ_PLIMIT]); + fprintf(f, "limit %up ", plimit); + } + if (tb[TCA_FQ_FLOW_PLIMIT] && + RTA_PAYLOAD(tb[TCA_FQ_FLOW_PLIMIT]) >= sizeof(__u32)) { + flow_plimit = rta_getattr_u32(tb[TCA_FQ_FLOW_PLIMIT]); + fprintf(f, "flow_limit %up ", flow_plimit); + } + if (tb[TCA_FQ_BUCKETS_LOG] && + RTA_PAYLOAD(tb[TCA_FQ_BUCKETS_LOG]) >= sizeof(__u32)) { + buckets_log = rta_getattr_u32(tb[TCA_FQ_BUCKETS_LOG]); + fprintf(f, "buckets %u ", 1U << buckets_log); + } + if (tb[TCA_FQ_RATE_ENABLE] && + RTA_PAYLOAD(tb[TCA_FQ_RATE_ENABLE]) >= sizeof(int)) { + pacing = rta_getattr_u32(tb[TCA_FQ_RATE_ENABLE]); + if (pacing == 0) + fprintf(f, "nopacing "); + } + if (tb[TCA_FQ_QUANTUM] && + RTA_PAYLOAD(tb[TCA_FQ_QUANTUM]) >= sizeof(__u32)) { + quantum = rta_getattr_u32(tb[TCA_FQ_QUANTUM]); + fprintf(f, "quantum %u ", quantum); + } + if (tb[TCA_FQ_INITIAL_QUANTUM] && + RTA_PAYLOAD(tb[TCA_FQ_INITIAL_QUANTUM]) >= sizeof(__u32)) { + quantum = rta_getattr_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + fprintf(f, "initial_quantum %u ", quantum); + } + if (tb[TCA_FQ_FLOW_MAX_RATE] && + RTA_PAYLOAD(tb[TCA_FQ_FLOW_MAX_RATE]) >= sizeof(__u32)) { + rate = rta_getattr_u32(tb[TCA_FQ_FLOW_MAX_RATE]); + + if (rate != ~0U) + fprintf(f, "maxrate %s ", sprint_rate(rate, b1)); + } + if (tb[TCA_FQ_FLOW_DEFAULT_RATE] && + RTA_PAYLOAD(tb[TCA_FQ_FLOW_DEFAULT_RATE]) >= sizeof(__u32)) { + rate = rta_getattr_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); + + if (rate != 0) + fprintf(f, "defrate %s ", sprint_rate(rate, b1)); + } + + return 0; +} + +static int fq_print_xstats(struct qdisc_util *qu, FILE *f, + struct rtattr *xstats) +{ + struct tc_fq_qd_stats *st; + + if (xstats == NULL) + return 0; + + if (RTA_PAYLOAD(xstats) < sizeof(*st)) + return -1; + + st = RTA_DATA(xstats); + + fprintf(f, " %u flows (%u inactive, %u throttled)", + st->flows, st->inactive_flows, st->throttled_flows); + + if (st->time_next_delayed_flow > 0) + fprintf(f, ", next packet delay %llu ns", st->time_next_delayed_flow); + + fprintf(f, "\n %llu gc, %llu highprio", + st->gc_flows, st->highprio_packets); + + if (st->tcp_retrans) + fprintf(f, ", %llu retrans", st->tcp_retrans); + + fprintf(f, ", %llu throttled", st->throttled); + + if (st->flows_plimit) + fprintf(f, ", %llu flows_plimit", st->flows_plimit); + + if (st->pkts_too_long || st->allocation_errors) + fprintf(f, "\n %llu too long pkts, %llu alloc errors\n", + st->pkts_too_long, st->allocation_errors); + + return 0; +} + +struct qdisc_util fq_qdisc_util = { + .id = "fq", + .parse_qopt = fq_parse_opt, + .print_qopt = fq_print_opt, + .print_xstats = fq_print_xstats, +}; From 54e9c3a34d4ba8a0890f3bf21d708342329461b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Thu, 19 Sep 2013 10:41:26 +0200 Subject: [PATCH 03/25] iproute2: bridge: document mdb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This augments bridge(8) manual page with `bridge mdb' and `bridge monitor mdb' commands which have been added recently. Signed-off-by: Petr Písař --- man/man8/bridge.8 | 88 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 3 deletions(-) diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 66678b5b..9a34804f 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -13,7 +13,7 @@ bridge \- show / manipulate bridge addresses and devices .ti -8 .IR OBJECT " := { " -.BR link " | " fdb " | " vlan " | " monitor " }" +.BR link " | " fdb " | " mdb " | " vlan " | " monitor " }" .sp .ti -8 @@ -64,6 +64,21 @@ bridge \- show / manipulate bridge addresses and devices .B dev .IR DEV " ]" +.ti -8 +.BR "bridge mdb" " { " add " | " del " } " +.B dev +.IR DEV +.B port +.IR PORT +.B grp +.IR GROUP " [ " +.BR permanent " | " temp " ]" + +.ti -8 +.BR "bridge mdb show " [ " +.B dev +.IR DEV " ]" + .ti -8 .BR "bridge vlan" " { " add " | " del " } " .B dev @@ -79,7 +94,7 @@ bridge \- show / manipulate bridge addresses and devices .IR DEV " ]" .ti -8 -.BR "bridge monitor" " [ " all " | " neigh " | " link " ]" +.BR "bridge monitor" " [ " all " | " neigh " | " link " | " mdb " ]" .SH OPTIONS @@ -109,6 +124,10 @@ As a rule, the information is statistics or some time values. .B fdb - Forwarding Database entry. +.TP +.B mdb +- Multicast group database entry. + .TP .B vlan - VLAN filter list. @@ -326,6 +345,69 @@ With the option, the command becomes verbose. It prints out the last updated and last used time for each entry. +.SH bridge mdb - multicast group database management + +.B mdb +objects contain known IP multicast group addresses on a link. + +.P +The corresponding commands display mdb entries, add new entries, +and delete old ones. + +.SS bridge mdb add - add a new multicast group database entry + +This command creates a new mdb entry. + +.TP +.BI dev " DEV" +the interface where this group address is associated. + +.TP +.BI port " PORT" +the port whose link is known to have members of this multicast group. + +.TP +.BI grp " GROUP" +the IP multicast group address whose members reside on the link connected to +the port. + +.B permanent +- the mdb entry is permanent +.sp + +.B temp +- the mdb entry is temporary (default) +.sp + +.in -8 +.SS bridge mdb delete - delete a multicast group database entry +This command removes an existing mdb entry. + +.PP +The arguments are the same as with +.BR "bridge mdb add" . + +.SS bridge mdb show - list multicast group database entries + +This command displays the current multicast group membership table. The table +is populated by IGMP and MLD snooping in the bridge driver automatically. It +can be altered by +.B bridge mdb add +and +.B bridge mdb del +commands manually too. + +.TP +.BI dev " DEV" +the interface only whose entries should be listed. Default is to list all +bridge interfaces. + +.PP +With the +.B -details +option, the command becomes verbose. It prints out the ports known to have +a connected router. + .SH bridge vlan - VLAN filter list .B vlan @@ -395,7 +477,7 @@ command is the first in the command line and then the object list follows: .I OBJECT-LIST is the list of object types that we want to monitor. It may contain -.BR link ", and " fdb "." +.BR link ", " fdb ", and " mdb "." If no .B file argument is given, From 8f7574edd85851f7a9b775d9ca7aadc28b19f13a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Sep 2013 04:19:03 -0700 Subject: [PATCH 04/25] tc: support TCA_STATS_RATE_EST64 Since linux-3.11, rate estimator can provide TCA_STATS_RATE_EST64 when rate (bytes per second) is above 2^32 (~34 Mbits) Change tc to use this attribute for high rates. Signed-off-by: Eric Dumazet --- tc/tc_util.c | 26 ++++++++++++++++++++------ tc/tc_util.h | 4 ++-- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tc/tc_util.c b/tc/tc_util.c index 8114c97c..be3ed071 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -171,20 +171,24 @@ int get_rate(unsigned *rate, const char *str) return 0; } -void print_rate(char *buf, int len, __u32 rate) +void print_rate(char *buf, int len, __u64 rate) { double tmp = (double)rate*8; extern int use_iec; if (use_iec) { - if (tmp >= 1000.0*1024.0*1024.0) + if (tmp >= 1000.0*1024.0*1024.0*1024.0) + snprintf(buf, len, "%.0fGibit", tmp/(1024.0*1024.0*1024.0)); + else if (tmp >= 1000.0*1024.0*1024.0) snprintf(buf, len, "%.0fMibit", tmp/(1024.0*1024.0)); else if (tmp >= 1000.0*1024) snprintf(buf, len, "%.0fKibit", tmp/1024); else snprintf(buf, len, "%.0fbit", tmp); } else { - if (tmp >= 1000.0*1000000.0) + if (tmp >= 1000.0*1000000000.0) + snprintf(buf, len, "%.0fGbit", tmp/1000000000.0); + else if (tmp >= 1000.0*1000000.0) snprintf(buf, len, "%.0fMbit", tmp/1000000.0); else if (tmp >= 1000.0 * 1000.0) snprintf(buf, len, "%.0fKbit", tmp/1000.0); @@ -193,7 +197,7 @@ void print_rate(char *buf, int len, __u32 rate) } } -char * sprint_rate(__u32 rate, char *buf) +char * sprint_rate(__u64 rate, char *buf) { print_rate(buf, SPRINT_BSIZE-1, rate); return buf; @@ -460,9 +464,19 @@ void print_tcstats2_attr(FILE *fp, struct rtattr *rta, char *prefix, struct rtat q.drops, q.overlimits, q.requeues); } - if (tbs[TCA_STATS_RATE_EST]) { + if (tbs[TCA_STATS_RATE_EST64]) { + struct gnet_stats_rate_est64 re = {0}; + + memcpy(&re, RTA_DATA(tbs[TCA_STATS_RATE_EST64]), + MIN(RTA_PAYLOAD(tbs[TCA_STATS_RATE_EST64]), + sizeof(re))); + fprintf(fp, "\n%srate %s %llupps ", + prefix, sprint_rate(re.bps, b1), re.pps); + } else if (tbs[TCA_STATS_RATE_EST]) { struct gnet_stats_rate_est re = {0}; - memcpy(&re, RTA_DATA(tbs[TCA_STATS_RATE_EST]), MIN(RTA_PAYLOAD(tbs[TCA_STATS_RATE_EST]), sizeof(re))); + + memcpy(&re, RTA_DATA(tbs[TCA_STATS_RATE_EST]), + MIN(RTA_PAYLOAD(tbs[TCA_STATS_RATE_EST]), sizeof(re))); fprintf(fp, "\n%srate %s %upps ", prefix, sprint_rate(re.bps, b1), re.pps); } diff --git a/tc/tc_util.h b/tc/tc_util.h index 4f544363..7c3709f5 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -63,12 +63,12 @@ extern int get_size_and_cell(unsigned *size, int *cell_log, char *str); extern int get_time(unsigned *time, const char *str); extern int get_linklayer(unsigned *val, const char *arg); -extern void print_rate(char *buf, int len, __u32 rate); +extern void print_rate(char *buf, int len, __u64 rate); extern void print_size(char *buf, int len, __u32 size); extern void print_qdisc_handle(char *buf, int len, __u32 h); extern void print_time(char *buf, int len, __u32 time); extern void print_linklayer(char *buf, int len, unsigned linklayer); -extern char * sprint_rate(__u32 rate, char *buf); +extern char * sprint_rate(__u64 rate, char *buf); extern char * sprint_size(__u32 size, char *buf); extern char * sprint_qdisc_handle(__u32 h, char *buf); extern char * sprint_tc_classid(__u32 h, char *buf); From b43f3318280a7b4b8c110d8fef7d5e6ebf2843fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Sep 2013 09:10:01 -0700 Subject: [PATCH 05/25] htb: add support for direct_qlen attribute TCA_HTB_DIRECT_QLEN attribute is supported since linux-3.10 HTB classes use an internal pfifo queue, which limit was not reported by tc, and value inherited from device tx_queue_len at setup time. With this patch, tc displays the value and can change it. Signed-off-by: Eric Dumazet --- tc/q_htb.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tc/q_htb.c b/tc/q_htb.c index 7b6f9082..6737ddb0 100644 --- a/tc/q_htb.c +++ b/tc/q_htb.c @@ -31,9 +31,11 @@ static void explain(void) { fprintf(stderr, "Usage: ... qdisc add ... htb [default N] [r2q N]\n" + " [direct_qlen P]\n" " default minor id of class to which unclassified packets are sent {0}\n" " r2q DRR quantums are computed as rate in Bps/r2q {10}\n" " debug string of 16 numbers each 0-3 {0}\n\n" + " direct_qlen Limit of the direct queue {in packets}\n" "... class add ... htb rate R1 [burst B1] [mpu B] [overhead O]\n" " [prio P] [slot S] [pslot PS]\n" " [ceil R2] [cburst B2] [mtu MTU] [quantum Q]\n" @@ -108,6 +110,7 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str unsigned mtu; unsigned short mpu = 0; unsigned short overhead = 0; + unsigned int direct_qlen = ~0U; unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */ struct rtattr *tail; @@ -125,6 +128,11 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str if (get_u32(&mtu, *argv, 10)) { explain1("mtu"); return -1; } + } else if (matches(*argv, "direct_qlen") == 0) { + NEXT_ARG(); + if (get_u32(&direct_qlen, *argv, 10)) { + explain1("direct_qlen"); return -1; + } } else if (matches(*argv, "mpu") == 0) { NEXT_ARG(); if (get_u16(&mpu, *argv, 10)) { @@ -230,6 +238,9 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str opt.cbuffer = tc_calc_xmittime(opt.ceil.rate, cbuffer); tail = NLMSG_TAIL(n); + if (direct_qlen != ~0U) + addattr_l(n, 1024, TCA_HTB_DIRECT_QLEN, + &direct_qlen, sizeof(direct_qlen)); addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); addattr_l(n, 2024, TCA_HTB_PARMS, &opt, sizeof(opt)); addattr_l(n, 3024, TCA_HTB_RTAB, rtab, 1024); @@ -240,7 +251,7 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { - struct rtattr *tb[TCA_HTB_RTAB+1]; + struct rtattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; struct tc_htb_glob *gopt; double buffer,cbuffer; @@ -253,7 +264,7 @@ static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (opt == NULL) return 0; - parse_rtattr_nested(tb, TCA_HTB_RTAB, opt); + parse_rtattr_nested(tb, TCA_HTB_MAX, opt); if (tb[TCA_HTB_PARMS]) { hopt = RTA_DATA(tb[TCA_HTB_PARMS]); @@ -302,6 +313,12 @@ static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (show_details) fprintf(f," ver %d.%d",gopt->version >> 16,gopt->version & 0xffff); } + if (tb[TCA_HTB_DIRECT_QLEN] && + RTA_PAYLOAD(tb[TCA_HTB_DIRECT_QLEN]) >= sizeof(__u32)) { + __u32 direct_qlen = rta_getattr_u32(tb[TCA_HTB_DIRECT_QLEN]); + + fprintf(f, " direct_qlen %u", direct_qlen); + } return 0; } From 6b2ed935ae4e6a0bafa5a314cc177470529cd629 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 23 Sep 2013 13:15:49 -0700 Subject: [PATCH 06/25] Update to 3.12-rc1 headers --- include/linux/fib_rules.h | 4 ++-- include/linux/if_bridge.h | 3 ++- include/linux/if_link.h | 3 +++ include/linux/if_tun.h | 6 ++++++ include/linux/pkt_sched.h | 41 +++++++++++++++++++++++++++++++++++++++ include/linux/tcp.h | 1 + 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 51da65b6..2b82d7e3 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -44,8 +44,8 @@ enum { FRA_FWMARK, /* mark */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, - FRA_UNUSED7, - FRA_UNUSED8, + FRA_SUPPRESS_IFGROUP, + FRA_SUPPRESS_PREFIXLEN, FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index d37e53c3..d2de4e67 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -14,6 +14,7 @@ #define _LINUX_IF_BRIDGE_H #include +#include #define SYSFS_BRIDGE_ATTR "bridge" #define SYSFS_BRIDGE_FDB "brforward" @@ -88,7 +89,7 @@ struct __port_info { }; struct __fdb_entry { - __u8 mac_addr[6]; + __u8 mac_addr[ETH_ALEN]; __u8 port_no; __u8 is_local; __u32 ageing_timer_value; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index d07aecaa..ee4f2ba9 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -143,6 +143,7 @@ enum { IFLA_NUM_TX_QUEUES, IFLA_NUM_RX_QUEUES, IFLA_CARRIER, + IFLA_PHYS_PORT_ID, __IFLA_MAX }; @@ -311,6 +312,8 @@ enum { IFLA_VXLAN_L2MISS, IFLA_VXLAN_L3MISS, IFLA_VXLAN_PORT, /* destination port */ + IFLA_VXLAN_GROUP6, + IFLA_VXLAN_LOCAL6, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index dc13de35..75cc8ace 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -56,6 +56,8 @@ #define TUNGETVNETHDRSZ _IOR('T', 215, int) #define TUNSETVNETHDRSZ _IOW('T', 216, int) #define TUNSETQUEUE _IOW('T', 217, int) +#define TUNSETIFINDEX _IOW('T', 218, unsigned int) +#define TUNGETFILTER _IOR('T', 219, struct sock_fprog) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 @@ -70,6 +72,10 @@ #define IFF_DETACH_QUEUE 0x0400 /* read-only flag */ #define IFF_PERSIST 0x0800 +#define IFF_NOFILTER 0x1000 + +/* Socket options */ +#define TUN_TX_TIMESTAMP 1 /* Features for GSO (TUNSETOFFLOAD). */ #define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 09d62b92..9b829134 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -744,4 +744,45 @@ struct tc_fq_codel_xstats { }; }; +/* FQ */ + +enum { + TCA_FQ_UNSPEC, + + TCA_FQ_PLIMIT, /* limit of total number of packets in queue */ + + TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */ + + TCA_FQ_QUANTUM, /* RR quantum */ + + TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */ + + TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ + + TCA_FQ_FLOW_DEFAULT_RATE,/* for sockets with unspecified sk_rate, + * use the following rate + */ + + TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ + + TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + __TCA_FQ_MAX +}; + +#define TCA_FQ_MAX (__TCA_FQ_MAX - 1) + +struct tc_fq_qd_stats { + __u64 gc_flows; + __u64 highprio_packets; + __u64 tcp_retrans; + __u64 throttled; + __u64 flows_plimit; + __u64 pkts_too_long; + __u64 allocation_errors; + __s64 time_next_delayed_flow; + __u32 flows; + __u32 inactive_flows; + __u32 throttled_flows; + __u32 pad; +}; #endif diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1eb04d35..8df6bd7f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -111,6 +111,7 @@ enum { #define TCP_REPAIR_OPTIONS 22 #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ #define TCP_TIMESTAMP 24 +#define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ struct tcp_repair_opt { __u32 opt_code; From 404582c8ebc99920bdfe8ba0fdef8ee4ee4d9783 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 24 Sep 2013 11:54:45 -0700 Subject: [PATCH 07/25] nstat: revise json output Also add long options --- misc/nstat.c | 53 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/misc/nstat.c b/misc/nstat.c index 5a4cfe94..d3f8621d 100644 --- a/misc/nstat.c +++ b/misc/nstat.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -264,9 +265,10 @@ static void dump_kern_db(FILE *fp, int to_hist) h = hist_db; if (json_output) - fprintf(fp, "{ \"%s\": [", info_source); + fprintf(fp, "{ \"%s\":{", info_source); else fprintf(fp, "#%s\n", info_source); + for (n=kern_db; n; n=n->next) { unsigned long long val = n->val; if (!dump_zeros && !val && !n->rate) @@ -285,15 +287,14 @@ static void dump_kern_db(FILE *fp, int to_hist) } if (json_output) { - fprintf(fp, "%s { \"id\":\"%s\", \"val\":%llu," - " \"rate\":%.1f }", - eol, n->id, val, n->rate); + fprintf(fp, "%s \"%s\":%llu", + eol, n->id, val); eol = ",\n"; } else fprintf(fp, "%-32s%-16llu%6.1f\n", n->id, val, n->rate); } if (json_output) - fprintf(fp, "\n] }\n"); + fprintf(fp, "\n} }\n"); } static void dump_incr_db(FILE *fp) @@ -303,7 +304,7 @@ static void dump_incr_db(FILE *fp) h = hist_db; if (json_output) - fprintf(fp, "{ \"%s\": [", info_source); + fprintf(fp, "{ \"%s\":{", info_source); else fprintf(fp, "#%s\n", info_source); @@ -328,16 +329,15 @@ static void dump_incr_db(FILE *fp) continue; if (json_output) { - fprintf(fp, "%s { \"id\":\"%s\", \"val\":%llu," - " \"rate\":%.1f, \"overflow\":%d }", - eol, n->id, val, n->rate, ovfl); + fprintf(fp, "%s \"%s\":%llu", + eol, n->id, val); eol = ",\n"; } else fprintf(fp, "%-32s%-16llu%6.1f%s\n", n->id, val, n->rate, ovfl?" (overflow)":""); } if (json_output) - fprintf(fp, "\n] }\n"); + fprintf(fp, "\n} }\n"); } static int children; @@ -468,11 +468,33 @@ static void usage(void) __attribute__((noreturn)); static void usage(void) { fprintf(stderr, -"Usage: nstat [ -h?vVzrnasd:t: ] [ PATTERN [ PATTERN ] ]\n" - ); +"Usage: nstat [OPTION] [ PATTERN [ PATTERN ] ]\n" +" -h, --help this message\n" +" -a, --ignore ignore history\n" +" -d, --scan=SECS sample every statistics every SECS\n" +" -j, --json format output in JSON\n" +" -n, --nooutput do history only\n" +" -r, --reset reset history\n" +" -s, --noupdate don\'t update history\n" +" -t, --interval=SECS report average over the last SECS\n" +" -V, --version output version information\n" +" -z, --zeros show entries with zero activity\n"); exit(-1); } +static const struct option longopts[] = { + { "help", 0, 0, 'h' }, + { "ignore", 0, 0, 'a' }, + { "scan", 1, 0, 'd'}, + { "nooutput", 0, 0, 'n' }, + { "json", 0, 0, 'j' }, + { "reset", 0, 0, 'r' }, + { "noupdate", 0, 0, 's' }, + { "interval", 1, 0, 't' }, + { "version", 0, 0, 'V' }, + { "zeros", 0, 0, 'z' }, + { 0 } +}; int main(int argc, char *argv[]) { @@ -482,7 +504,8 @@ int main(int argc, char *argv[]) int ch; int fd; - while ((ch = getopt(argc, argv, "h?vVzrnasd:t:j")) != EOF) { + while ((ch = getopt_long(argc, argv, "h?vVzrnasd:t:j", + longopts, NULL)) != EOF) { switch(ch) { case 'z': dump_zeros = 1; @@ -648,8 +671,10 @@ int main(int argc, char *argv[]) if (!no_update) { ftruncate(fileno(hist_fp), 0); rewind(hist_fp); + + json_output = 0; dump_kern_db(hist_fp, 1); - fflush(hist_fp); + fclose(hist_fp); } exit(0); } From ec3e625c410feaad1b3da4c8e4b8e91069a862ba Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 24 Sep 2013 11:55:13 -0700 Subject: [PATCH 08/25] ifstat: add json output format --- misc/ifstat.c | 131 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 112 insertions(+), 19 deletions(-) diff --git a/misc/ifstat.c b/misc/ifstat.c index 6d0ad8cc..a47c0461 100644 --- a/misc/ifstat.c +++ b/misc/ifstat.c @@ -38,6 +38,7 @@ int dump_zeros = 0; int reset_history = 0; int ignore_history = 0; int no_output = 0; +int json_output = 0; int no_update = 0; int scan_interval = 0; int time_constant = 0; @@ -61,6 +62,32 @@ struct ifstat_ent __u32 ival[MAXS]; }; +static const char *stats[MAXS] = { + "rx_packets", + "tx_packets", + "rx_bytes", + "tx_bytes", + "rx_errors", + "tx_errors", + "rx_dropped", + "tx_dropped", + "multicast", + "collisions", + "rx_length_errors", + "rx_over_errors", + "rx_crc_errors", + "rx_frame_errors", + "rx_fifo_errors", + "rx_missed_errors", + "tx_aborted_errors", + "tx_carrier_errors", + "tx_fifo_errors", + "tx_heartbeat_errors", + "tx_window_errors", + "rx_compressed", + "tx_compressed" +}; + struct ifstat_ent *kern_db; struct ifstat_ent *hist_db; @@ -212,8 +239,13 @@ static void load_raw_table(FILE *fp) static void dump_raw_db(FILE *fp, int to_hist) { struct ifstat_ent *n, *h; + const char *eol = "\n"; + h = hist_db; - fprintf(fp, "#%s\n", info_source); + if (json_output) + fprintf(fp, "{ \"%s\":{", info_source); + else + fprintf(fp, "#%s\n", info_source); for (n=kern_db; n; n=n->next) { int i; @@ -232,10 +264,22 @@ static void dump_raw_db(FILE *fp, int to_hist) } } } - fprintf(fp, "%d %s ", n->ifindex, n->name); - for (i=0; iname); + eol = ",\n"; + for (i=0; iifindex, n->name); + for (i=0; i giga) fprintf(fp, "%7lluM ", vals[i]/mega); else if (vals[i] > mega) @@ -265,7 +310,7 @@ static void format_rate(FILE *fp, unsigned long long *vals, fprintf(fp, "%-6u ", (unsigned)rates[i]); } -static void format_pair(FILE *fp, unsigned long long *vals, int i, int k) +static void format_pair(FILE *fp, const unsigned long long *vals, int i, int k) { char temp[64]; if (vals[i] > giga) @@ -328,10 +373,27 @@ static void print_head(FILE *fp) } } -static void print_one_if(FILE *fp, struct ifstat_ent *n, - unsigned long long *vals) +static void print_one_json(FILE *fp, const struct ifstat_ent *n, + const unsigned long long *vals) +{ + int i, m; + const char *sep = " "; + + m = show_errors ? 20 : 10; + fprintf(fp, " \"%s\":{", n->name); + for (i=0; i < m && stats[i]; i++) { + fprintf(fp, "%s\"%s\":%llu", + sep, stats[i], vals[i]); + sep = ", "; + } + fprintf(fp, " }"); +} + +static void print_one_if(FILE *fp, const struct ifstat_ent *n, + const unsigned long long *vals) { int i; + fprintf(fp, "%-15s ", n->name); for (i=0; i<4; i++) format_rate(fp, vals, n->rate, i); @@ -375,27 +437,42 @@ static void print_one_if(FILE *fp, struct ifstat_ent *n, } } - static void dump_kern_db(FILE *fp) { struct ifstat_ent *n; + const char *eol = "\n"; - print_head(fp); + if (json_output) + fprintf(fp, "{ \"%s\": {", info_source); + else + print_head(fp); for (n=kern_db; n; n=n->next) { if (!match(n->name)) continue; - print_one_if(fp, n, n->val); + + if (json_output) { + fprintf(fp, "%s", eol); + eol = ",\n"; + print_one_json(fp, n, n->val); + } else + print_one_if(fp, n, n->val); } + if (json_output) + fprintf(fp, "\n} }\n"); } static void dump_incr_db(FILE *fp) { struct ifstat_ent *n, *h; - h = hist_db; + const char *eol = "\n"; - print_head(fp); + h = hist_db; + if (json_output) + fprintf(fp, "{ \"%s\":{", info_source); + else + print_head(fp); for (n=kern_db; n; n=n->next) { int i; @@ -414,8 +491,16 @@ static void dump_incr_db(FILE *fp) } if (!match(n->name)) continue; - print_one_if(fp, n, vals); + + if (json_output) { + fprintf(fp, "%s", eol); + eol = ",\n"; + print_one_json(fp, n, n->val); + } else + print_one_if(fp, n, vals); } + if (json_output) + fprintf(fp, "\n} }\n"); } @@ -559,9 +644,10 @@ static void usage(void) " -a, --ignore ignore history\n" " -d, --scan=SECS sample every statistics every SECS\n" " -e, --errors show errors\n" +" -j, --json format output in JSON\n" " -n, --nooutput do history only\n" " -r, --reset reset history\n" -" -s, --noupdate don;t update history\n" +" -s, --noupdate don\'t update history\n" " -t, --interval=SECS report average over the last SECS\n" " -V, --version output version information\n" " -z, --zeros show entries with zero activity\n"); @@ -575,6 +661,7 @@ static const struct option longopts[] = { { "scan", 1, 0, 'd'}, { "errors", 0, 0, 'e' }, { "nooutput", 0, 0, 'n' }, + { "json", 0, 0, 'j' }, { "reset", 0, 0, 'r' }, { "noupdate", 0, 0, 's' }, { "interval", 1, 0, 't' }, @@ -591,7 +678,7 @@ int main(int argc, char *argv[]) int ch; int fd; - while ((ch = getopt_long(argc, argv, "hvVzrnasd:t:eK", + while ((ch = getopt_long(argc, argv, "hjvVzrnasd:t:e", longopts, NULL)) != EOF) { switch(ch) { case 'z': @@ -612,6 +699,9 @@ int main(int argc, char *argv[]) case 'e': show_errors = 1; break; + case 'j': + json_output = 1; + break; case 'd': scan_interval = atoi(optarg) * 1000; if (scan_interval <= 0) { @@ -759,11 +849,14 @@ int main(int argc, char *argv[]) else dump_incr_db(stdout); } + if (!no_update) { ftruncate(fileno(hist_fp), 0); rewind(hist_fp); + + json_output = 0; dump_raw_db(hist_fp, 1); - fflush(hist_fp); + fclose(hist_fp); } exit(0); } From a4f9e8df37b23341821d8725520abfcb52b9dab9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 24 Sep 2013 11:55:27 -0700 Subject: [PATCH 09/25] lnstat: add json output format --- misc/lnstat.c | 171 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 104 insertions(+), 67 deletions(-) diff --git a/misc/lnstat.c b/misc/lnstat.c index 90198b73..d6415efb 100644 --- a/misc/lnstat.c +++ b/misc/lnstat.c @@ -41,7 +41,8 @@ static struct option opts[] = { { "version", 0, NULL, 'V' }, { "count", 1, NULL, 'c' }, - { "dump", 1, NULL, 'd' }, + { "dump", 0, NULL, 'd' }, + { "json", 0, NULL, 'j' }, { "file", 1, NULL, 'f' }, { "help", 0, NULL, 'h' }, { "interval", 1, NULL, 'i' }, @@ -63,6 +64,8 @@ static int usage(char *name, int exit_code) "Print number of intervals\n"); fprintf(stderr, "\t-d --dump\t\t" "Dump list of available files/keys\n"); + fprintf(stderr, "\t-j --json\t\t" + "Display in JSON format\n"); fprintf(stderr, "\t-f --file \tStatistics file to use\n"); fprintf(stderr, "\t-h --help\t\tThis help message\n"); fprintf(stderr, "\t-i --interval \t" @@ -94,7 +97,7 @@ static void print_line(FILE *of, const struct lnstat_file *lnstat_files, int i; for (i = 0; i < fp->num; i++) { - struct lnstat_field *lf = fp->params[i].lf; + const struct lnstat_field *lf = fp->params[i].lf; char formatbuf[255]; snprintf(formatbuf, sizeof(formatbuf)-1, "%%%ulu|", @@ -104,6 +107,30 @@ static void print_line(FILE *of, const struct lnstat_file *lnstat_files, fputc('\n', of); } +static void print_json(FILE *of, const struct lnstat_file *lnstat_files, + const struct field_params *fp) +{ + int i; + const char *sep; + const char *base = NULL; + + fputs("{\n", of); + for (i = 0; i < fp->num; i++) { + const struct lnstat_field *lf = fp->params[i].lf; + + if (!base || lf->file->basename != base) { + if (base) fputs("},\n", of); + base = lf->file->basename; + sep = "\n\t"; + fprintf(of, " \"%s\":{", base); + } + fprintf(of, "%s\"%s\":%lu", sep, + lf->name, lf->result); + sep = ",\n\t"; + } + fputs("}\n}\n", of); +} + /* find lnstat_field according to user specification */ static int map_field_params(struct lnstat_file *lnstat_files, struct field_params *fps, int interval) @@ -218,15 +245,16 @@ int main(int argc, char **argv) { struct lnstat_file *lnstat_files; const char *basename; - int c; + int i, c; int interval = DEFAULT_INTERVAL; int hdr = 2; enum { MODE_DUMP, + MODE_JSON, MODE_NORMAL, } mode = MODE_NORMAL; - unsigned long count = 1; + struct table_hdr *header; static struct field_params fp; int num_req_files = 0; char *req_files[LNSTAT_MAX_FILES]; @@ -248,70 +276,73 @@ int main(int argc, char **argv) num_req_files = 1; } - while ((c = getopt_long(argc, argv,"Vc:df:h?i:k:s:w:", + while ((c = getopt_long(argc, argv,"Vc:djf:h?i:k:s:w:", opts, NULL)) != -1) { - int i, len = 0; + int len = 0; char *tmp, *tok; switch (c) { - case 'c': - count = strtoul(optarg, NULL, 0); + case 'c': + count = strtoul(optarg, NULL, 0); + break; + case 'd': + mode = MODE_DUMP; + break; + case 'j': + mode = MODE_JSON; + break; + case 'f': + req_files[num_req_files++] = strdup(optarg); + break; + case '?': + case 'h': + usage(argv[0], 0); + break; + case 'i': + sscanf(optarg, "%u", &interval); + break; + case 'k': + tmp = strdup(optarg); + if (!tmp) break; - case 'd': - mode = MODE_DUMP; - break; - case 'f': - req_files[num_req_files++] = strdup(optarg); - break; - case '?': - case 'h': - usage(argv[0], 0); - break; - case 'i': - sscanf(optarg, "%u", &interval); - break; - case 'k': - tmp = strdup(optarg); - if (!tmp) + for (tok = strtok(tmp, ","); + tok; + tok = strtok(NULL, ",")) { + if (fp.num >= MAX_FIELDS) { + fprintf(stderr, + "WARN: too many keys" + " requested: (%d max)\n", + MAX_FIELDS); break; - for (tok = strtok(tmp, ","); - tok; - tok = strtok(NULL, ",")) { - if (fp.num >= MAX_FIELDS) { - fprintf(stderr, - "WARN: too many keys" - " requested: (%d max)\n", - MAX_FIELDS); - break; - } - fp.params[fp.num++].name = tok; } + fp.params[fp.num++].name = tok; + } + break; + case 's': + sscanf(optarg, "%u", &hdr); + break; + case 'w': + tmp = strdup(optarg); + if (!tmp) break; - case 's': - sscanf(optarg, "%u", &hdr); - break; - case 'w': - tmp = strdup(optarg); - if (!tmp) - break; - i = 0; - for (tok = strtok(tmp, ","); - tok; - tok = strtok(NULL, ",")) { - len = strtoul(tok, NULL, 0); - if (len > FIELD_WIDTH_MAX) - len = FIELD_WIDTH_MAX; + i = 0; + for (tok = strtok(tmp, ","); + tok; + tok = strtok(NULL, ",")) { + len = strtoul(tok, NULL, 0); + if (len > FIELD_WIDTH_MAX) + len = FIELD_WIDTH_MAX; + fp.params[i].print.width = len; + i++; + } + if (i == 1) { + for (i = 0; i < MAX_FIELDS; i++) fp.params[i].print.width = len; - i++; - } - if (i == 1) { - for (i = 0; i < MAX_FIELDS; i++) - fp.params[i].print.width = len; - } - break; - default: - usage(argv[0], 1); - break; + } + break; + default: + usage(argv[0], 1); + break; } } @@ -319,13 +350,12 @@ int main(int argc, char **argv) (const char **) req_files); switch (mode) { - int i; - struct table_hdr *header; case MODE_DUMP: lnstat_dump(stderr, lnstat_files); break; - case MODE_NORMAL: + case MODE_NORMAL: + case MODE_JSON: if (!map_field_params(lnstat_files, &fp, interval)) exit(1); @@ -334,16 +364,23 @@ int main(int argc, char **argv) exit(1); if (interval < 1 ) - interval=1; + interval = 1; for (i = 0; i < count; i++) { - if ((hdr > 1 && (! (i % 20))) || (hdr == 1 && i == 0)) - print_hdr(stdout, header); lnstat_update(lnstat_files); - print_line(stdout, lnstat_files, &fp); + if (mode == MODE_JSON) + print_json(stdout, lnstat_files, &fp); + else { + if ((hdr > 1 && + (! (i % 20))) || (hdr == 1 && i == 0)) + print_hdr(stdout, header); + print_line(stdout, lnstat_files, &fp); + } fflush(stdout); - sleep(interval); + if (i < count - 1) + sleep(interval); } + break; } return 1; From f3abcfed464b16b1149992b7ecdca585d3b7838d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 24 Sep 2013 12:00:41 -0700 Subject: [PATCH 10/25] lnstat, nstat, ifstat: update man pages New json option --- man/man8/lnstat.8 | 3 +++ man/man8/rtacct.8 | 22 ++++++++++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/man/man8/lnstat.8 b/man/man8/lnstat.8 index a76940b1..699ddf4c 100644 --- a/man/man8/lnstat.8 +++ b/man/man8/lnstat.8 @@ -33,6 +33,9 @@ Statistics file to use. .B \-i, \-\-interval Set interval to 'intv' seconds. .TP +.B \-j, \-\-json +Display results in JSON format +.TP .B \-k, \-\-keys k,k,k,... Display only keys specified. .TP diff --git a/man/man8/rtacct.8 b/man/man8/rtacct.8 index fb9afe85..c3ab03de 100644 --- a/man/man8/rtacct.8 +++ b/man/man8/rtacct.8 @@ -15,33 +15,35 @@ and are simple tools to monitor kernel snmp counters and network interface statistics. .SH OPTIONS -.TP --h -? +.B \-h, \-\-help Print help .TP --v -V +.B \-V, \-\-version Print version .TP --z +.B \-z, \-\-zero Dump zero counters too. By default they are not shown. .TP --r +.B \-r, \-\-reset Reset history. .TP --n +.B \-n, \-\-nooutput Do not display anything, only update history. .TP --a +.B \-a, \-\-ignore Dump absolute values of counters. The default is to calculate increments since the previous use. .TP --s +.B \-s, \-\-noupdate Do not update history, so that the next time you will see counters including values accumulated to the moment of this measurement too. +.B \-j, \-\-json +Display results in JSON format. .TP --d +.B \-d, \-\-interval Run in daemon mode collecting statistics. is interval between measurements in seconds. .TP --t + Time interval to average rates. Default value is 60 seconds. +.TP .SH SEE ALSO lnstat(8) From 101847446e6a5e3ca370e65ebc462584934fce0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Wed, 25 Sep 2013 09:45:45 +0200 Subject: [PATCH 11/25] iproute2: bridge: Close file with bridge monitor file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `bridge monitor file FILENAME' reads dumped netlink messages from a file. But it forgot to close the file after using it. This patch fixes it. Signed-off-by: Petr Písař --- bridge/monitor.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bridge/monitor.c b/bridge/monitor.c index e96fcaf7..76e7d477 100644 --- a/bridge/monitor.c +++ b/bridge/monitor.c @@ -132,12 +132,15 @@ int do_monitor(int argc, char **argv) if (file) { FILE *fp; + int err; fp = fopen(file, "r"); if (fp == NULL) { perror("Cannot fopen"); exit(-1); } - return rtnl_from_file(fp, accept_msg, stdout); + err = rtnl_from_file(fp, accept_msg, stdout); + fclose(fp); + return err; } if (rtnl_open(&rth, groups) < 0) From 99500b56d94dfa735a3d088fdbdde6c0c2638e78 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Mon, 30 Sep 2013 21:09:05 -0700 Subject: [PATCH 12/25] xfrm: use memcpy to suppress gcc phony buffer overflow warning. This bug is reported from below link: https://bugzilla.redhat.com/show_bug.cgi?id=982761 An simplified command from its original reproducing method in bugzilla: ip xfrm state add src 10.0.0.2 dst 10.0.0.1 proto ah spi 0x12345678 auth md5 12 will cause below spew from gcc. Reported-by: Sohny Thomas --- ip/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index 389942c0..f4ad4cb1 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -162,7 +162,7 @@ static int xfrm_algo_parse(struct xfrm_algo *alg, enum xfrm_attr_type_t type, if (len > max) invarg("ALGO-KEYMAT value makes buffer overflow\n", key); - strncpy(buf, key, len); + memcpy(buf, key, len); } } From 29ff4d2e1136c7b4766fd8162c6985b3b1a53eed Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 30 Sep 2013 21:28:09 -0700 Subject: [PATCH 13/25] tc: add default action to kernel headers --- include/linux/tc_act/tc_defact.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/tc_act/tc_defact.h diff --git a/include/linux/tc_act/tc_defact.h b/include/linux/tc_act/tc_defact.h new file mode 100644 index 00000000..6f65d07c --- /dev/null +++ b/include/linux/tc_act/tc_defact.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_TC_DEF_H +#define __LINUX_TC_DEF_H + +#include + +struct tc_defact { + tc_gen; +}; + +enum { + TCA_DEF_UNSPEC, + TCA_DEF_TM, + TCA_DEF_PARMS, + TCA_DEF_DATA, + __TCA_DEF_MAX +}; +#define TCA_DEF_MAX (__TCA_DEF_MAX - 1) + +#endif From 087f46ee4ebd178a2a8562989fd9a4e02c93f406 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 29 Sep 2013 07:33:42 -0400 Subject: [PATCH 14/25] tc: introduce simple action Simple action is already in the kernel for years now as an example. This complements it with user space control. Signed-off-by: Jamal Hadi Salim --- tc/Makefile | 1 + tc/m_simple.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 tc/m_simple.c diff --git a/tc/Makefile b/tc/Makefile index 1eeabd8c..f54a955b 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -38,6 +38,7 @@ TCMODULES += m_nat.o TCMODULES += m_pedit.o TCMODULES += m_skbedit.o TCMODULES += m_csum.o +TCMODULES += m_simple.o TCMODULES += p_ip.o TCMODULES += p_icmp.o TCMODULES += p_tcp.o diff --git a/tc/m_simple.c b/tc/m_simple.c new file mode 100644 index 00000000..02244406 --- /dev/null +++ b/tc/m_simple.c @@ -0,0 +1,202 @@ +/* + * m_simple.c simple action + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: J Hadi Salim + * + * Pedagogical example. Adds a string that will be printed everytime + * the simple instance is hit. + * Use this as a skeleton action and keep modifying it to meet your needs. + * Look at linux/tc_act/tc_defact.h for the different components ids and + * definitions used in this actions + * + * example use, yell "Incoming ICMP!" every time you see an incoming ICMP on + * eth0. Steps are: + * 1) Add an ingress qdisc point to eth0 + * 2) Start a chain on ingress of eth0 that first matches ICMP then invokes + * the simple action to shout. + * 3) display stats and show that no packet has been seen by the action + * 4) Send one ping packet to google (expect to receive a response back) + * 5) grep the logs to see the logged message + * 6) display stats again and observe increment by 1 + * + hadi@noma1:$ tc qdisc add dev eth0 ingress + hadi@noma1:$tc filter add dev eth0 parent ffff: protocol ip prio 5 \ + u32 match ip protocol 1 0xff flowid 1:1 action simple "Incoming ICMP" + + hadi@noma1:$ sudo tc -s filter ls dev eth0 parent ffff: + filter protocol ip pref 5 u32 + filter protocol ip pref 5 u32 fh 800: ht divisor 1 + filter protocol ip pref 5 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 + match 00010000/00ff0000 at 8 + action order 1: Simple + index 4 ref 1 bind 1 installed 29 sec used 29 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + + + hadi@noma1$ ping -c 1 www.google.ca + PING www.google.ca (74.125.225.120) 56(84) bytes of data. + 64 bytes from ord08s08-in-f24.1e100.net (74.125.225.120): icmp_req=1 ttl=53 time=31.3 ms + + --- www.google.ca ping statistics --- + 1 packets transmitted, 1 received, 0% packet loss, time 0ms + rtt min/avg/max/mdev = 31.316/31.316/31.316/0.000 ms + + hadi@noma1$ dmesg | grep simple + [135354.473951] simple: Incoming ICMP_1 + + hadi@noma1$ sudo tc/tc -s filter ls dev eth0 parent ffff: + filter protocol ip pref 5 u32 + filter protocol ip pref 5 u32 fh 800: ht divisor 1 + filter protocol ip pref 5 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 + match 00010000/00ff0000 at 8 + action order 1: Simple + index 4 ref 1 bind 1 installed 206 sec used 67 sec + Action statistics: + Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "tc_util.h" +#include + +#ifndef SIMP_MAX_DATA +#define SIMP_MAX_DATA 32 +#endif +static void explain(void) +{ + fprintf(stderr, "Usage: ... simple STRING\n" + "STRING being an arbitrary string\n" + "example: \"simple blah\"\n"); +} + +static void usage(void) +{ + explain(); + exit(-1); +} + +static int +parse_simple(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + struct nlmsghdr *n) +{ + struct tc_defact sel = {}; + int argc = *argc_p; + char **argv = *argv_p; + int ok = 0; + struct rtattr *tail; + char *simpdata = NULL; + + + while (argc > 0) { + if (matches(*argv, "simple") == 0) { + NEXT_ARG(); + simpdata = *argv; + ok = 1; + argc--; + argv++; + break; + } else if (matches(*argv, "help") == 0) { + usage(); + } else { + break; + } + + } + + if (!ok) { + explain(); + return -1; + } + + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); + if (get_u32(&sel.index, *argv, 10)) { + fprintf(stderr, "simple: Illegal \"index\"\n"); + return -1; + } + argc--; + argv++; + } + } + + if (strlen(simpdata) > (SIMP_MAX_DATA - 1)) { + fprintf(stderr, "simple: Illegal string len %ld <%s> \n", + strlen(simpdata), simpdata); + return -1; + } + + sel.action = TC_ACT_PIPE; + + tail = NLMSG_TAIL(n); + addattr_l(n, MAX_MSG, tca_id, NULL, 0); + addattr_l(n, MAX_MSG, TCA_DEF_PARMS, &sel, sizeof(sel)); + addattr_l(n, MAX_MSG, TCA_DEF_DATA, simpdata, SIMP_MAX_DATA); + tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail; + + *argc_p = argc; + *argv_p = argv; + return 0; +} + +static int print_simple(struct action_util *au, FILE * f, struct rtattr *arg) +{ + struct tc_defact *sel; + struct rtattr *tb[TCA_DEF_MAX + 1]; + char *simpdata; + + if (arg == NULL) + return -1; + + parse_rtattr_nested(tb, TCA_DEF_MAX, arg); + + if (tb[TCA_DEF_PARMS] == NULL) { + fprintf(f, "[NULL simple parameters]"); + return -1; + } + sel = RTA_DATA(tb[TCA_DEF_PARMS]); + + if (tb[TCA_DEF_DATA] == NULL) { + fprintf(f, "[missing simple string]"); + return -1; + } + + simpdata = RTA_DATA(tb[TCA_DEF_DATA]); + + fprintf(f, "Simple <%s>\n", simpdata); + fprintf(f, "\t index %d ref %d bind %d", sel->index, + sel->refcnt, sel->bindcnt); + + if (show_stats) { + if (tb[TCA_DEF_TM]) { + struct tcf_t *tm = RTA_DATA(tb[TCA_DEF_TM]); + print_tm(f, tm); + fprintf(f, "\n"); + } + } + + return 0; +} + +struct action_util simple_action_util = { + .id = "simple", + .parse_aopt = parse_simple, + .print_aopt = print_simple, +}; From e26520e5c1c29af977283aed781d7865dc6b1f73 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 29 Sep 2013 07:39:23 -0400 Subject: [PATCH 15/25] action: typo nat fix If you taketh you giveth. I Went the LinuxWay and copied this for m_simple.c and noticed this one typo (I wonder where it came from?;->). Signed-off-by: Jamal Hadi Salim --- tc/m_nat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tc/m_nat.c b/tc/m_nat.c index 01ec0328..d502a819 100644 --- a/tc/m_nat.c +++ b/tc/m_nat.c @@ -146,7 +146,7 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct if (matches(*argv, "index") == 0) { NEXT_ARG(); if (get_u32(&sel.index, *argv, 10)) { - fprintf(stderr, "Pedit: Illegal \"index\"\n"); + fprintf(stderr, "Nat: Illegal \"index\"\n"); return -1; } argc--; From af89576d7a8c4235f68efd15c28092f090e36f43 Mon Sep 17 00:00:00 2001 From: "xeb@mail.ru" Date: Sat, 28 Sep 2013 11:32:51 +0400 Subject: [PATCH 16/25] iproute2: GRE over IPv6 tunnel support. GRE over IPv6 tunnel support. Signed-off-by: Dmitry Kozlov --- ip/Makefile | 3 +- ip/ip6tunnel.c | 131 +++++++++++++--- ip/iplink.c | 7 +- ip/link_gre6.c | 398 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 516 insertions(+), 23 deletions(-) create mode 100644 ip/link_gre6.c diff --git a/ip/Makefile b/ip/Makefile index 48bd4a16..f10d22fc 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -5,7 +5,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \ iplink_vlan.o link_veth.o link_gre.o iplink_can.o \ iplink_macvlan.o iplink_macvtap.o ipl2tp.o link_vti.o \ iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \ - link_iptnl.o + link_iptnl.o link_gre6.o RTMONOBJ=rtmon.o @@ -23,7 +23,6 @@ all: $(TARGETS) $(SCRIPTS) ip: $(IPOBJ) $(LIBNETLINK) - rtmon: $(RTMONOBJ) install: all diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c index 216e982f..463be42b 100644 --- a/ip/ip6tunnel.c +++ b/ip/ip6tunnel.c @@ -48,11 +48,12 @@ static void usage(void) __attribute__((noreturn)); static void usage(void) { fprintf(stderr, "Usage: ip -f inet6 tunnel { add | change | del | show } [ NAME ]\n"); - fprintf(stderr, " [ mode { ip6ip6 | ipip6 | any } ]\n"); + fprintf(stderr, " [ mode { ip6ip6 | ipip6 | ip6gre | any } ]\n"); fprintf(stderr, " [ remote ADDR local ADDR ] [ dev PHYS_DEV ]\n"); fprintf(stderr, " [ encaplimit ELIM ]\n"); fprintf(stderr ," [ hoplimit TTL ] [ tclass TCLASS ] [ flowlabel FLOWLABEL ]\n"); fprintf(stderr, " [ dscp inherit ]\n"); + fprintf(stderr, " [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where: NAME := STRING\n"); fprintf(stderr, " ADDR := IPV6_ADDRESS\n"); @@ -62,10 +63,11 @@ static void usage(void) DEFAULT_TNL_HOP_LIMIT); fprintf(stderr, " TCLASS := { 0x0..0xff | inherit }\n"); fprintf(stderr, " FLOWLABEL := { 0x0..0xfffff | inherit }\n"); + fprintf(stderr, " KEY := { DOTTED_QUAD | NUMBER }\n"); exit(-1); } -static void print_tunnel(struct ip6_tnl_parm *p) +static void print_tunnel(struct ip6_tnl_parm2 *p) { char remote[64]; char local[64]; @@ -104,9 +106,29 @@ static void print_tunnel(struct ip6_tnl_parm *p) if (p->flags & IP6_TNL_F_RCV_DSCP_COPY) printf(" dscp inherit"); + + if (p->proto == IPPROTO_GRE) { + if ((p->i_flags&GRE_KEY) && (p->o_flags&GRE_KEY) && p->o_key == p->i_key) + printf(" key %u", ntohl(p->i_key)); + else if ((p->i_flags|p->o_flags)&GRE_KEY) { + if (p->i_flags&GRE_KEY) + printf(" ikey %u ", ntohl(p->i_key)); + if (p->o_flags&GRE_KEY) + printf(" okey %u ", ntohl(p->o_key)); + } + + if (p->i_flags&GRE_SEQ) + printf("%s Drop packets out of sequence.\n", _SL_); + if (p->i_flags&GRE_CSUM) + printf("%s Checksum in received packet is required.", _SL_); + if (p->o_flags&GRE_SEQ) + printf("%s Sequence packets on output.", _SL_); + if (p->o_flags&GRE_CSUM) + printf("%s Checksum output packets.", _SL_); + } } -static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p) +static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) { int count = 0; char medium[IFNAMSIZ]; @@ -124,6 +146,9 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p) strcmp(*argv, "ipip6") == 0 || strcmp(*argv, "ip4ip6") == 0) p->proto = IPPROTO_IPIP; + else if (strcmp(*argv, "ip6gre") == 0 || + strcmp(*argv, "gre/ipv6") == 0) + p->proto = IPPROTO_GRE; else if (strcmp(*argv, "any/ipv6") == 0 || strcmp(*argv, "any") == 0) p->proto = 0; @@ -202,6 +227,60 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p) if (strcmp(*argv, "inherit") != 0) invarg("not inherit", *argv); p->flags |= IP6_TNL_F_RCV_DSCP_COPY; + } else if (strcmp(*argv, "key") == 0) { + unsigned uval; + NEXT_ARG(); + p->i_flags |= GRE_KEY; + p->o_flags |= GRE_KEY; + if (strchr(*argv, '.')) + p->i_key = p->o_key = get_addr32(*argv); + else { + if (get_unsigned(&uval, *argv, 0)<0) { + fprintf(stderr, "invalid value of \"key\"\n"); + exit(-1); + } + p->i_key = p->o_key = htonl(uval); + } + } else if (strcmp(*argv, "ikey") == 0) { + unsigned uval; + NEXT_ARG(); + p->i_flags |= GRE_KEY; + if (strchr(*argv, '.')) + p->i_key = get_addr32(*argv); + else { + if (get_unsigned(&uval, *argv, 0)<0) { + fprintf(stderr, "invalid value of \"ikey\"\n"); + exit(-1); + } + p->i_key = htonl(uval); + } + } else if (strcmp(*argv, "okey") == 0) { + unsigned uval; + NEXT_ARG(); + p->o_flags |= GRE_KEY; + if (strchr(*argv, '.')) + p->o_key = get_addr32(*argv); + else { + if (get_unsigned(&uval, *argv, 0)<0) { + fprintf(stderr, "invalid value of \"okey\"\n"); + exit(-1); + } + p->o_key = htonl(uval); + } + } else if (strcmp(*argv, "seq") == 0) { + p->i_flags |= GRE_SEQ; + p->o_flags |= GRE_SEQ; + } else if (strcmp(*argv, "iseq") == 0) { + p->i_flags |= GRE_SEQ; + } else if (strcmp(*argv, "oseq") == 0) { + p->o_flags |= GRE_SEQ; + } else if (strcmp(*argv, "csum") == 0) { + p->i_flags |= GRE_CSUM; + p->o_flags |= GRE_CSUM; + } else if (strcmp(*argv, "icsum") == 0) { + p->i_flags |= GRE_CSUM; + } else if (strcmp(*argv, "ocsum") == 0) { + p->o_flags |= GRE_CSUM; } else { if (strcmp(*argv, "name") == 0) { NEXT_ARG(); @@ -212,7 +291,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p) duparg2("name", *argv); strncpy(p->name, *argv, IFNAMSIZ - 1); if (cmd == SIOCCHGTUNNEL && count == 0) { - struct ip6_tnl_parm old_p; + struct ip6_tnl_parm2 old_p; memset(&old_p, 0, sizeof(old_p)); if (tnl_get_ioctl(*argv, &old_p)) return -1; @@ -230,7 +309,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p) return 0; } -static void ip6_tnl_parm_init(struct ip6_tnl_parm *p, int apply_default) +static void ip6_tnl_parm_init(struct ip6_tnl_parm2 *p, int apply_default) { memset(p, 0, sizeof(*p)); p->proto = IPPROTO_IPV6; @@ -244,8 +323,8 @@ static void ip6_tnl_parm_init(struct ip6_tnl_parm *p, int apply_default) * @p1: user specified parameter * @p2: database entry */ -static int ip6_tnl_parm_match(const struct ip6_tnl_parm *p1, - const struct ip6_tnl_parm *p2) +static int ip6_tnl_parm_match(const struct ip6_tnl_parm2 *p1, + const struct ip6_tnl_parm2 *p2) { return ((!p1->link || p1->link == p2->link) && (!p1->name[0] || strcmp(p1->name, p2->name) == 0) && @@ -263,7 +342,7 @@ static int ip6_tnl_parm_match(const struct ip6_tnl_parm *p1, (!p1->flags || (p1->flags & p2->flags))); } -static int do_tunnels_list(struct ip6_tnl_parm *p) +static int do_tunnels_list(struct ip6_tnl_parm2 *p) { char buf[512]; int err = -1; @@ -287,7 +366,7 @@ static int do_tunnels_list(struct ip6_tnl_parm *p) rx_fifo, rx_frame, tx_bytes, tx_packets, tx_errs, tx_drops, tx_fifo, tx_colls, tx_carrier, rx_multi; - struct ip6_tnl_parm p1; + struct ip6_tnl_parm2 p1; char *ptr; buf[sizeof(buf) - 1] = '\0'; @@ -312,10 +391,12 @@ static int do_tunnels_list(struct ip6_tnl_parm *p) fprintf(stderr, "Failed to get type of \"%s\"\n", name); continue; } - if (type != ARPHRD_TUNNEL6) + if (type != ARPHRD_TUNNEL6 && type != ARPHRD_IP6GRE) continue; memset(&p1, 0, sizeof(p1)); ip6_tnl_parm_init(&p1, 0); + if (type == ARPHRD_IP6GRE) + p1.proto = IPPROTO_GRE; strcpy(p1.name, name); p1.link = ll_name_to_index(p1.name); if (p1.link == 0) @@ -346,7 +427,7 @@ static int do_tunnels_list(struct ip6_tnl_parm *p) static int do_show(int argc, char **argv) { - struct ip6_tnl_parm p; + struct ip6_tnl_parm2 p; ll_init_map(&rth); ip6_tnl_parm_init(&p, 0); @@ -369,28 +450,44 @@ static int do_show(int argc, char **argv) static int do_add(int cmd, int argc, char **argv) { - struct ip6_tnl_parm p; + struct ip6_tnl_parm2 p; ip6_tnl_parm_init(&p, 1); if (parse_args(argc, argv, cmd, &p) < 0) return -1; - return tnl_add_ioctl(cmd, - cmd == SIOCCHGTUNNEL && p.name[0] ? - p.name : "ip6tnl0", p.name, &p); + switch (p.proto) { + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return tnl_add_ioctl(cmd, "ip6tnl0", p.name, &p); + case IPPROTO_GRE: + return tnl_add_ioctl(cmd, "ip6gre0", p.name, &p); + default: + fprintf(stderr, "cannot determine tunnel mode (ip6ip6, ipip6 or gre)\n"); + } + return -1; } static int do_del(int argc, char **argv) { - struct ip6_tnl_parm p; + struct ip6_tnl_parm2 p; ip6_tnl_parm_init(&p, 1); if (parse_args(argc, argv, SIOCDELTUNNEL, &p) < 0) return -1; - return tnl_del_ioctl(p.name[0] ? p.name : "ip6tnl0", p.name, &p); + switch (p.proto) { + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return tnl_del_ioctl("ip6tnl0", p.name, &p); + case IPPROTO_GRE: + return tnl_del_ioctl("ip6gre0", p.name, &p); + default: + return tnl_del_ioctl(p.name, p.name, &p); + } + return -1; } int do_ip6tunnel(int argc, char **argv) diff --git a/ip/iplink.c b/ip/iplink.c index ada9d425..16cb6fed 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -85,7 +85,8 @@ void iplink_usage(void) if (iplink_have_newlink()) { fprintf(stderr, "\n"); fprintf(stderr, "TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | can |\n"); - fprintf(stderr, " bridge | ipoib | ip6tnl | ipip | sit | vxlan }\n"); + fprintf(stderr, " bridge | ipoib | ip6tnl | ipip | sit | vxlan |\n"); + fprintf(stderr, " gre | gretap | ip6gre | ip6gretap }\n"); } exit(-1); } @@ -243,7 +244,7 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, } ivt.vf = vf; addattr_l(&req->n, sizeof(*req), IFLA_VF_TX_RATE, &ivt, sizeof(ivt)); - + } else if (matches(*argv, "spoofchk") == 0) { struct ifla_vf_spoofchk ivs; NEXT_ARG(); @@ -286,7 +287,6 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, return 0; } - int iplink_parse(int argc, char **argv, struct iplink_req *req, char **name, char **type, char **link, char **dev, int *group) { @@ -811,7 +811,6 @@ static int set_address(struct ifreq *ifr, int brd) return 0; } - static int do_set(int argc, char **argv) { char *dev = NULL; diff --git a/ip/link_gre6.c b/ip/link_gre6.c new file mode 100644 index 00000000..4c9c5365 --- /dev/null +++ b/ip/link_gre6.c @@ -0,0 +1,398 @@ +/* + * link_gre6.c gre driver module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Dmitry Kozlov + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "rt_names.h" +#include "utils.h" +#include "ip_common.h" +#include "tunnel.h" + +#define IP6_FLOWINFO_TCLASS htonl(0x0FF00000) +#define IP6_FLOWINFO_FLOWLABEL htonl(0x000FFFFF) + +#define DEFAULT_TNL_HOP_LIMIT (64) + +static void usage(void) __attribute__((noreturn)); +static void usage(void) +{ + fprintf(stderr, "Usage: ip link { add | set | change | replace | del } NAME\n"); + fprintf(stderr, " type { ip6gre | ip6gretap } [ remote ADDR ] [ local ADDR ]\n"); + fprintf(stderr, " [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n"); + fprintf(stderr, " [ hoplimit TTL ] [ encaplimit ELIM ]\n"); + fprintf(stderr, " [ tclass TCLASS ] [ flowlabel FLOWLABEL ]\n"); + fprintf(stderr, " [ dscp inherit ] [ dev PHYS_DEV ]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Where: NAME := STRING\n"); + fprintf(stderr, " ADDR := IPV6_ADDRESS\n"); + fprintf(stderr, " TTL := { 0..255 } (default=%d)\n", + DEFAULT_TNL_HOP_LIMIT); + fprintf(stderr, " KEY := { DOTTED_QUAD | NUMBER }\n"); + fprintf(stderr, " ELIM := { none | 0..255 }(default=%d)\n", + IPV6_DEFAULT_TNL_ENCAP_LIMIT); + fprintf(stderr, " TCLASS := { 0x0..0xff | inherit }\n"); + fprintf(stderr, " FLOWLABEL := { 0x0..0xfffff | inherit }\n"); + exit(-1); +} + +static int gre_parse_opt(struct link_util *lu, int argc, char **argv, + struct nlmsghdr *n) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[1024]; + } req; + struct ifinfomsg *ifi = (struct ifinfomsg *)(n + 1); + struct rtattr *tb[IFLA_MAX + 1]; + struct rtattr *linkinfo[IFLA_INFO_MAX+1]; + struct rtattr *greinfo[IFLA_GRE_MAX + 1]; + __u16 iflags = 0; + __u16 oflags = 0; + unsigned ikey = 0; + unsigned okey = 0; + struct in6_addr raddr = IN6ADDR_ANY_INIT; + struct in6_addr laddr = IN6ADDR_ANY_INIT; + unsigned link = 0; + unsigned flowinfo = 0; + unsigned flags = 0; + __u8 hop_limit = DEFAULT_TNL_HOP_LIMIT; + __u8 encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT; + int len; + + if (!(n->nlmsg_flags & NLM_F_CREATE)) { + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(*ifi)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETLINK; + req.i.ifi_family = preferred_family; + req.i.ifi_index = ifi->ifi_index; + + if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) { +get_failed: + fprintf(stderr, + "Failed to get existing tunnel info.\n"); + return -1; + } + + len = req.n.nlmsg_len; + len -= NLMSG_LENGTH(sizeof(*ifi)); + if (len < 0) + goto get_failed; + + parse_rtattr(tb, IFLA_MAX, IFLA_RTA(&req.i), len); + + if (!tb[IFLA_LINKINFO]) + goto get_failed; + + parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]); + + if (!linkinfo[IFLA_INFO_DATA]) + goto get_failed; + + parse_rtattr_nested(greinfo, IFLA_GRE_MAX, + linkinfo[IFLA_INFO_DATA]); + + if (greinfo[IFLA_GRE_IKEY]) + ikey = rta_getattr_u32(greinfo[IFLA_GRE_IKEY]); + + if (greinfo[IFLA_GRE_OKEY]) + okey = rta_getattr_u32(greinfo[IFLA_GRE_OKEY]); + + if (greinfo[IFLA_GRE_IFLAGS]) + iflags = rta_getattr_u16(greinfo[IFLA_GRE_IFLAGS]); + + if (greinfo[IFLA_GRE_OFLAGS]) + oflags = rta_getattr_u16(greinfo[IFLA_GRE_OFLAGS]); + + if (greinfo[IFLA_GRE_LOCAL]) + memcpy(&laddr, RTA_DATA(greinfo[IFLA_GRE_LOCAL]), sizeof(laddr)); + + if (greinfo[IFLA_GRE_REMOTE]) + memcpy(&raddr, RTA_DATA(greinfo[IFLA_GRE_REMOTE]), sizeof(raddr)); + + if (greinfo[IFLA_GRE_TTL]) + hop_limit = rta_getattr_u8(greinfo[IFLA_GRE_TTL]); + + if (greinfo[IFLA_GRE_LINK]) + link = rta_getattr_u32(greinfo[IFLA_GRE_LINK]); + + if (greinfo[IFLA_GRE_ENCAP_LIMIT]) + encap_limit = rta_getattr_u8(greinfo[IFLA_GRE_ENCAP_LIMIT]); + + if (greinfo[IFLA_GRE_FLOWINFO]) + flowinfo = rta_getattr_u32(greinfo[IFLA_GRE_FLOWINFO]); + + if (greinfo[IFLA_GRE_FLAGS]) + flags = rta_getattr_u32(greinfo[IFLA_GRE_FLAGS]); + } + + while (argc > 0) { + if (!matches(*argv, "key")) { + unsigned uval; + + NEXT_ARG(); + iflags |= GRE_KEY; + oflags |= GRE_KEY; + if (strchr(*argv, '.')) + uval = get_addr32(*argv); + else { + if (get_unsigned(&uval, *argv, 0) < 0) { + fprintf(stderr, + "Invalid value for \"key\"\n"); + exit(-1); + } + uval = htonl(uval); + } + + ikey = okey = uval; + } else if (!matches(*argv, "ikey")) { + unsigned uval; + + NEXT_ARG(); + iflags |= GRE_KEY; + if (strchr(*argv, '.')) + uval = get_addr32(*argv); + else { + if (get_unsigned(&uval, *argv, 0)<0) { + fprintf(stderr, "invalid value of \"ikey\"\n"); + exit(-1); + } + uval = htonl(uval); + } + ikey = uval; + } else if (!matches(*argv, "okey")) { + unsigned uval; + + NEXT_ARG(); + oflags |= GRE_KEY; + if (strchr(*argv, '.')) + uval = get_addr32(*argv); + else { + if (get_unsigned(&uval, *argv, 0)<0) { + fprintf(stderr, "invalid value of \"okey\"\n"); + exit(-1); + } + uval = htonl(uval); + } + okey = uval; + } else if (!matches(*argv, "seq")) { + iflags |= GRE_SEQ; + oflags |= GRE_SEQ; + } else if (!matches(*argv, "iseq")) { + iflags |= GRE_SEQ; + } else if (!matches(*argv, "oseq")) { + oflags |= GRE_SEQ; + } else if (!matches(*argv, "csum")) { + iflags |= GRE_CSUM; + oflags |= GRE_CSUM; + } else if (!matches(*argv, "icsum")) { + iflags |= GRE_CSUM; + } else if (!matches(*argv, "ocsum")) { + oflags |= GRE_CSUM; + } else if (!matches(*argv, "remote")) { + inet_prefix addr; + NEXT_ARG(); + get_prefix(&addr, *argv, preferred_family); + if (addr.family == AF_UNSPEC) + invarg("\"remote\" address family is AF_UNSPEC", *argv); + memcpy(&raddr, &addr.data, sizeof(raddr)); + } else if (!matches(*argv, "local")) { + inet_prefix addr; + NEXT_ARG(); + get_prefix(&addr, *argv, preferred_family); + if (addr.family == AF_UNSPEC) + invarg("\"local\" address family is AF_UNSPEC", *argv); + memcpy(&laddr, &addr.data, sizeof(laddr)); + } else if (!matches(*argv, "dev")) { + NEXT_ARG(); + link = if_nametoindex(*argv); + if (link == 0) + exit(-1); + } else if (!matches(*argv, "ttl") || + !matches(*argv, "hoplimit")) { + __u8 uval; + NEXT_ARG(); + if (get_u8(&uval, *argv, 0)) + invarg("invalid TTL", *argv); + hop_limit = uval; + } else if (!matches(*argv, "tos") || + !matches(*argv, "tclass") || + !matches(*argv, "dsfield")) { + __u8 uval; + NEXT_ARG(); + if (strcmp(*argv, "inherit") == 0) + flags |= IP6_TNL_F_USE_ORIG_TCLASS; + else { + if (get_u8(&uval, *argv, 16)) + invarg("invalid TClass", *argv); + flowinfo |= htonl((__u32)uval << 20) & IP6_FLOWINFO_TCLASS; + flags &= ~IP6_TNL_F_USE_ORIG_TCLASS; + } + } else if (strcmp(*argv, "flowlabel") == 0 || + strcmp(*argv, "fl") == 0) { + __u32 uval; + NEXT_ARG(); + if (strcmp(*argv, "inherit") == 0) + flags |= IP6_TNL_F_USE_ORIG_FLOWLABEL; + else { + if (get_u32(&uval, *argv, 16)) + invarg("invalid Flowlabel", *argv); + if (uval > 0xFFFFF) + invarg("invalid Flowlabel", *argv); + flowinfo |= htonl(uval) & IP6_FLOWINFO_FLOWLABEL; + flags &= ~IP6_TNL_F_USE_ORIG_FLOWLABEL; + } + } else if (strcmp(*argv, "dscp") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "inherit") != 0) + invarg("not inherit", *argv); + flags |= IP6_TNL_F_RCV_DSCP_COPY; + } else + usage(); + argc--; argv++; + } + + addattr32(n, 1024, IFLA_GRE_IKEY, ikey); + addattr32(n, 1024, IFLA_GRE_OKEY, okey); + addattr_l(n, 1024, IFLA_GRE_IFLAGS, &iflags, 2); + addattr_l(n, 1024, IFLA_GRE_OFLAGS, &oflags, 2); + addattr_l(n, 1024, IFLA_GRE_LOCAL, &laddr, sizeof(laddr)); + addattr_l(n, 1024, IFLA_GRE_REMOTE, &raddr, sizeof(raddr)); + if (link) + addattr32(n, 1024, IFLA_GRE_LINK, link); + addattr_l(n, 1024, IFLA_GRE_TTL, &hop_limit, 1); + addattr_l(n, 1024, IFLA_GRE_ENCAP_LIMIT, &encap_limit, 1); + addattr_l(n, 1024, IFLA_GRE_FLOWINFO, &flowinfo, 4); + addattr_l(n, 1024, IFLA_GRE_FLAGS, &flowinfo, 4); + + return 0; +} + +static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) +{ + char s1[1024]; + char s2[64]; + const char *local = "any"; + const char *remote = "any"; + unsigned iflags = 0; + unsigned oflags = 0; + unsigned flags = 0; + unsigned flowinfo = 0; + struct in6_addr in6_addr_any = IN6ADDR_ANY_INIT; + + if (!tb) + return; + + if (tb[IFLA_GRE_FLAGS]) + flags = rta_getattr_u32(tb[IFLA_GRE_FLAGS]); + + if (tb[IFLA_GRE_FLOWINFO]) + flags = rta_getattr_u32(tb[IFLA_GRE_FLOWINFO]); + + if (tb[IFLA_GRE_REMOTE]) { + struct in6_addr addr; + memcpy(&addr, RTA_DATA(tb[IFLA_GRE_REMOTE]), sizeof(addr)); + + if (memcmp(&addr, &in6_addr_any, sizeof(addr))) + remote = format_host(AF_INET6, sizeof(addr), &addr, s1, sizeof(s1)); + } + + fprintf(f, "remote %s ", remote); + + if (tb[IFLA_GRE_LOCAL]) { + struct in6_addr addr; + memcpy(&addr, RTA_DATA(tb[IFLA_GRE_LOCAL]), sizeof(addr)); + + if (memcmp(&addr, &in6_addr_any, sizeof(addr))) + local = format_host(AF_INET6, sizeof(addr), &addr, s1, sizeof(s1)); + } + + fprintf(f, "local %s ", local); + + if (tb[IFLA_GRE_LINK] && rta_getattr_u32(tb[IFLA_GRE_LINK])) { + unsigned link = rta_getattr_u32(tb[IFLA_GRE_LINK]); + const char *n = if_indextoname(link, s2); + + if (n) + fprintf(f, "dev %s ", n); + else + fprintf(f, "dev %u ", link); + } + + if (tb[IFLA_GRE_TTL] && rta_getattr_u8(tb[IFLA_GRE_TTL])) + fprintf(f, "hoplimit %d ", rta_getattr_u8(tb[IFLA_GRE_TTL])); + + if (flags & IP6_TNL_F_IGN_ENCAP_LIMIT) + fprintf(f, "encaplimit none "); + else if (tb[IFLA_GRE_ENCAP_LIMIT]) { + int encap_limit = rta_getattr_u8(tb[IFLA_GRE_ENCAP_LIMIT]); + + fprintf(f, "encaplimit %d ", encap_limit); + } + + if (flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fprintf(f, "flowlabel inherit "); + else + fprintf(f, "flowlabel 0x%05x ", ntohl(flowinfo & IP6_FLOWINFO_FLOWLABEL)); + + if (flags & IP6_TNL_F_RCV_DSCP_COPY) + fprintf(f, "dscp inherit "); + + if (tb[IFLA_GRE_IFLAGS]) + iflags = rta_getattr_u16(tb[IFLA_GRE_IFLAGS]); + + if (tb[IFLA_GRE_OFLAGS]) + oflags = rta_getattr_u16(tb[IFLA_GRE_OFLAGS]); + + if ((iflags & GRE_KEY) && tb[IFLA_GRE_IKEY]) { + inet_ntop(AF_INET, RTA_DATA(tb[IFLA_GRE_IKEY]), s2, sizeof(s2)); + fprintf(f, "ikey %s ", s2); + } + + if ((oflags & GRE_KEY) && tb[IFLA_GRE_OKEY]) { + inet_ntop(AF_INET, RTA_DATA(tb[IFLA_GRE_OKEY]), s2, sizeof(s2)); + fprintf(f, "okey %s ", s2); + } + + if (iflags & GRE_SEQ) + fputs("iseq ", f); + if (oflags & GRE_SEQ) + fputs("oseq ", f); + if (iflags & GRE_CSUM) + fputs("icsum ", f); + if (oflags & GRE_CSUM) + fputs("ocsum ", f); +} + +struct link_util ip6gre_link_util = { + .id = "ip6gre", + .maxattr = IFLA_GRE_MAX, + .parse_opt = gre_parse_opt, + .print_opt = gre_print_opt, +}; + +struct link_util ip6gretap_link_util = { + .id = "ip6gretap", + .maxattr = IFLA_GRE_MAX, + .parse_opt = gre_parse_opt, + .print_opt = gre_print_opt, +}; From 4e20cc55e9c1196d0ef97e81357e5fb43712a65e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 30 Sep 2013 21:40:05 -0700 Subject: [PATCH 17/25] ipv6 gre: add entry to ether types --- lib/ll_types.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/ll_types.c b/lib/ll_types.c index 448892bf..0c391836 100644 --- a/lib/ll_types.c +++ b/lib/ll_types.c @@ -103,6 +103,7 @@ __PF(IEEE802154, ieee802.15.4) __PF(PHONET, phonet) __PF(PHONET_PIPE, phonet_pipe) __PF(CAIF, caif) +__PF(IP6GRE, gre6) __PF(NONE, none) __PF(VOID,void) From 9abde37cde92e9b84f2f72aad9a03635972ffdf3 Mon Sep 17 00:00:00 2001 From: "xeb@mail.ru" Date: Tue, 1 Oct 2013 11:18:34 +0400 Subject: [PATCH 18/25] iproute2: ip6gre: update man pages Update man pages with ip6gre info. Signed-off-by: Dmitry Kozlov --- man/man8/ip-link.8.in | 124 +++++++++++++++++++++++++++++++++++++++++- man/man8/ip-tunnel.8 | 4 +- 2 files changed, 125 insertions(+), 3 deletions(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 76f92ddb..8b68c78d 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -62,7 +62,11 @@ ip-link \- network device configuration .BR vxlan " |" .BR ip6tnl " |" .BR ipip " |" -.BR sit " ]" +.BR sit " |" +.BR gre " |" +.BR gretap " |" +.BR ip6gre " |" +.BR ip6gretap " ]" .ti -8 .BI "ip link delete " DEVICE @@ -186,6 +190,18 @@ Link types: .sp .BR sit - Virtual tunnel interface IPv6 over IPv4 +.sp +.BR gre +- Virtual tunnel interface GRE over IPv4 +.sp +.BR gretap +- Virtual L2 tuunel interface GRE over IPv4 +.sp +.BR ip6gre +- Virtual tuunel interface GRE over IPv6 +.sp +.BR ip6gretap +- Virtual L2 tuunel interface GRE over IPv6 .in -8 .TP @@ -292,6 +308,112 @@ are entered into the VXLAN device forwarding database. .in -8 +.TP +IP6GRE/IP6GRETAP Type Support +For a link of type +.I IP6GRE/IP6GRETAP +the following additional arguments are supported: + +.BI "ip link add " DEVICE +.BI type " { ip6gre | ip6gretap } " remote " ADDR " local " ADDR +.R " [ " +.I "[i|o]seq]" +.R " ] [ " +.I "[i|o]key" KEY +.R " ] [ " +.I " [i|o]csum " +.R " ] [ " +.BI hoplimit " TTL " +.R " ] [ " +.BI encaplimit " ELIM " +.R " ] [ " +.BI tclass " TCLASS " +.R " ] [ " +.BI flowlabel " FLOWLABEL " +.R " ] [ " +.BI "dscp inherit" +.R " ] [ " +.BI dev " PHYS_DEV " +.R " ]" + +.in +8 +.sp +.BI remote " ADDR " +- specifies the remote IPv6 address of the tunnel. + +.sp +.BI local " ADDR " +- specifies the fixed local IPv6 address for tunneled packets. +It must be and address on another interface on this host. + +.sp +.BI [i|o]seq +- serialize packets. +The +.B oseq +flag enables sequencing of outgoing packets. +The +.B iseq +flag requires that all input packets are serialized. + +.sp +.BI [i|o]key " KEY" +- use keyed GRE with key +.IR KEY ". "KEY +is either a number or an IPv4 address-like dotted quad. +The +.B key +parameter specifies the same key to use in both directions. +The +.BR ikey " and " okey +parameters specify different keys for input and output. + +.sp +.BI [i|o]csum +- generate/require checksums for tunneled packets. +The +.B ocsum +flag calculates checksums for outgoing packets. +The +.B icsum +flag requires that all input packets have the correct +checksum. The +.B csum +flag is equivalent to the combination +.BR "icsum ocsum" . + +.sp +.BI hoplimit " TTL" +- specifies Hop Limit value to use in outgoing packets. + +.sp +.BI encaplimit " ELIM" +- specifies a fixed encapsulation limit. Default is 4. + +.sp +.BI flowlabel " FLOWLABEL" +- specifies a fixed flowlabel. + +.sp +.BI tclass " TCLASS" +- specifies the traffic class field on +tunneled packets, which can be specified as either a two-digit +hex value (e.g. c0) or a predefined string (e.g. internet). +The value +.B inherit +causes the field to be copied from the original IP header. The +values +.BI "inherit/" STRING +or +.BI "inherit/" 00 ".." ff +will set the field to +.I STRING +or +.IR 00 ".." ff +when tunneling non-IP packets. The default value is 00. + +.in -8 + .SS ip link delete - delete virtual link .I DEVICE specifies the virtual device to act operate on. diff --git a/man/man8/ip-tunnel.8 b/man/man8/ip-tunnel.8 index 697e80d5..fc2d6bde 100644 --- a/man/man8/ip-tunnel.8 +++ b/man/man8/ip-tunnel.8 @@ -50,7 +50,7 @@ ip-tunnel - tunnel configuration .ti -8 .IR MODE " := " -.RB " { " ipip " | " gre " | " sit " | " isatap " | " ip6ip6 " | " ipip6 " | " any " }" +.RB " { " ipip " | " gre " | " sit " | " isatap " | " ip6ip6 " | " ipip6 " | " ip6gre " | " any " }" .ti -8 .IR ADDR " := { " IP_ADDRESS " |" @@ -110,7 +110,7 @@ Modes for IPv4 encapsulation available: .BR ipip ", " sit ", " isatap " and " gre "." .br Modes for IPv6 encapsulation available: -.BR ip6ip6 ", " ipip6 " and " any "." +.BR ip6ip6 ", " ipip6 ", " ip6gre ", and " any "." .TP .BI remote " ADDRESS" From b557416532f3db745cb9cceaaf343b4bc5b57003 Mon Sep 17 00:00:00 2001 From: Christophe Gouault Date: Tue, 8 Oct 2013 05:56:54 -0700 Subject: [PATCH 19/25] xfrm: enable to set non-wildcard mark 0 on SAs and SPs ip xfrm considers that the user-defined mark is "any" as soon as (mark.v & mark.m == 0), which prevents from specifying non-wildcard marks that include the value 0 (typically 0/0xffffffff). Yet, matching exactly mark 0 is useful for instance to separate vti policies from global policies. Always configure the user mark if mark.m != 0. Signed-off-by: Christophe Gouault --- ip/xfrm_policy.c | 2 +- ip/xfrm_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c index 36e33c98..a8d8b98b 100644 --- a/ip/xfrm_policy.c +++ b/ip/xfrm_policy.c @@ -373,7 +373,7 @@ static int xfrm_policy_modify(int cmd, unsigned flags, int argc, char **argv) (void *)tmpls_buf, tmpls_len); } - if (mark.m & mark.v) { + if (mark.m) { int r = addattr_l(&req.n, sizeof(req.buf), XFRMA_MARK, (void *)&mark, sizeof(mark)); if (r < 0) { diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index f4ad4cb1..c4d2bf67 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -528,7 +528,7 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv) exit(1); } - if (mark.m & mark.v) { + if (mark.m) { int r = addattr_l(&req.n, sizeof(req.buf), XFRMA_MARK, (void *)&mark, sizeof(mark)); if (r < 0) { From 1253a10a63e417c5a7774c127aea1a3b0dd09e88 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 8 Oct 2013 07:59:45 -0700 Subject: [PATCH 20/25] iplink: update available type list macvtap and vti were missing. Signed-off-by: Nicolas Dichtel --- ip/iplink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ip/iplink.c b/ip/iplink.c index 16cb6fed..6cde731a 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -84,9 +84,9 @@ void iplink_usage(void) if (iplink_have_newlink()) { fprintf(stderr, "\n"); - fprintf(stderr, "TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | can |\n"); - fprintf(stderr, " bridge | ipoib | ip6tnl | ipip | sit | vxlan |\n"); - fprintf(stderr, " gre | gretap | ip6gre | ip6gretap }\n"); + fprintf(stderr, "TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | macvtap |\n"); + fprintf(stderr, " can | bridge | ipoib | ip6tnl | ipip | sit | vxlan |\n"); + fprintf(stderr, " gre | gretap | ip6gre | ip6gretap | vti }\n"); } exit(-1); } From 0a502b21e30be835dcad8d9c6023a41da8709eb1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 27 Oct 2013 12:26:47 -0700 Subject: [PATCH 21/25] Fix handling of qdis without options Some qdisc like htb want the parse_qopt to be called even if no options present. Fixes regression caused by: e9e78b0db0e023035e346ba67de838be851eb665 is the first bad commit commit e9e78b0db0e023035e346ba67de838be851eb665 Author: Stephen Hemminger Date: Mon Aug 26 08:41:19 2013 -0700 tc: allow qdisc without options --- tc/tc_qdisc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c index 3002a56c..e3048580 100644 --- a/tc/tc_qdisc.c +++ b/tc/tc_qdisc.c @@ -137,15 +137,16 @@ static int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv) if (est.ewma_log) addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est)); - if (argc) { - if (q) { - if (!q->parse_qopt) { - fprintf(stderr, "qdisc '%s' does not support option parsing\n", k); - return -1; - } + if (q) { + if (q->parse_qopt) { if (q->parse_qopt(q, argc, argv, &req.n)) return 1; - } else { + } else if (argc) { + fprintf(stderr, "qdisc '%s' does not support option parsing\n", k); + return -1; + } + } else { + if (argc) { if (matches(*argv, "help") == 0) usage(); From 734c0ca2cabf96e2ac1abe6e1b0968d0b5b03b11 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 27 Oct 2013 12:28:38 -0700 Subject: [PATCH 22/25] htb: remove old unused duplicate qdisc name Alexey had htb2 as name for version in ancient code. --- tc/q_htb.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tc/q_htb.c b/tc/q_htb.c index 6737ddb0..e108857d 100644 --- a/tc/q_htb.c +++ b/tc/q_htb.c @@ -346,13 +346,3 @@ struct qdisc_util htb_qdisc_util = { .parse_copt = htb_parse_class_opt, .print_copt = htb_print_opt, }; - -/* for testing of old one */ -struct qdisc_util htb2_qdisc_util = { - .id = "htb2", - .parse_qopt = htb_parse_opt, - .print_qopt = htb_print_opt, - .print_xstats = htb_print_xstats, - .parse_copt = htb_parse_class_opt, - .print_copt = htb_print_opt, -}; From 03ddbbd5ad9b60ff51d04fcd838375cc98173687 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 30 Oct 2013 16:36:47 -0700 Subject: [PATCH 23/25] update kernel headers --- include/linux/tc_act/tc_defact.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/tc_act/tc_defact.h b/include/linux/tc_act/tc_defact.h index 6f65d07c..17dddb40 100644 --- a/include/linux/tc_act/tc_defact.h +++ b/include/linux/tc_act/tc_defact.h @@ -6,7 +6,7 @@ struct tc_defact { tc_gen; }; - + enum { TCA_DEF_UNSPEC, TCA_DEF_TM, From aa574cd60e57d6a31b4d433e0b09bfacd2cb79d1 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 16 Oct 2013 22:03:48 -0700 Subject: [PATCH 24/25] vxlan: add ipv6 support The kernel already supports it, so add the support to iproute2 as well. Cc: Stephen Hemminger Signed-off-by: Cong Wang --- include/utils.h | 1 + ip/iplink_vxlan.c | 59 +++++++++++++++++++++++++++++++++++++++-------- lib/utils.c | 8 +++++++ 3 files changed, 58 insertions(+), 10 deletions(-) diff --git a/include/utils.h b/include/utils.h index 24ff19f8..a3e310ee 100644 --- a/include/utils.h +++ b/include/utils.h @@ -151,6 +151,7 @@ int print_timestamp(FILE *fp); extern int cmdlineno; extern ssize_t getcmdline(char **line, size_t *len, FILE *in); extern int makeargs(char *line, char *argv[], int maxargs); +extern int inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6); struct iplink_req; int iplink_parse(int argc, char **argv, struct iplink_req *req, diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c index 4304b0d1..aa551d89 100644 --- a/ip/iplink_vxlan.c +++ b/ip/iplink_vxlan.c @@ -43,6 +43,9 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, __u32 saddr = 0; __u32 gaddr = 0; __u32 daddr = 0; + struct in6_addr saddr6 = IN6ADDR_ANY_INIT; + struct in6_addr gaddr6 = IN6ADDR_ANY_INIT; + struct in6_addr daddr6 = IN6ADDR_ANY_INIT; unsigned link = 0; __u8 tos = 0; __u8 ttl = 0; @@ -66,21 +69,30 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, vni_set = 1; } else if (!matches(*argv, "group")) { NEXT_ARG(); - gaddr = get_addr32(*argv); - - if (!IN_MULTICAST(ntohl(gaddr))) + if (!inet_get_addr(*argv, &gaddr, &gaddr6)) { + fprintf(stderr, "Invalid address \"%s\"\n", *argv); + return -1; + } + if (!IN6_IS_ADDR_MULTICAST(&gaddr6) && !IN_MULTICAST(ntohl(gaddr))) invarg("invalid group address", *argv); } else if (!matches(*argv, "remote")) { NEXT_ARG(); - daddr = get_addr32(*argv); - - if (IN_MULTICAST(ntohl(daddr))) + if (!inet_get_addr(*argv, &daddr, &daddr6)) { + fprintf(stderr, "Invalid address \"%s\"\n", *argv); + return -1; + } + if (IN6_IS_ADDR_MULTICAST(&daddr6) || IN_MULTICAST(ntohl(daddr))) invarg("invalid remote address", *argv); } else if (!matches(*argv, "local")) { NEXT_ARG(); - if (strcmp(*argv, "any")) - saddr = get_addr32(*argv); - if (IN_MULTICAST(ntohl(saddr))) + if (strcmp(*argv, "any")) { + if (!inet_get_addr(*argv, &saddr, &saddr6)) { + fprintf(stderr, "Invalid address \"%s\"\n", *argv); + return -1; + } + } + + if (IN_MULTICAST(ntohl(saddr)) || IN6_IS_ADDR_MULTICAST(&saddr6)) invarg("invalid local address", *argv); } else if (!matches(*argv, "dev")) { NEXT_ARG(); @@ -167,7 +179,9 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, fprintf(stderr, "vxlan: missing virtual network identifier\n"); return -1; } - if (gaddr && daddr) { + if ((gaddr && daddr) || + (memcmp(&gaddr6, &in6addr_any, sizeof(gaddr6)) && + memcmp(&daddr6, &in6addr_any, sizeof(daddr6)))) { fprintf(stderr, "vxlan: both group and remote cannot be specified\n"); return -1; } @@ -176,8 +190,16 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, addattr_l(n, 1024, IFLA_VXLAN_GROUP, &gaddr, 4); else if (daddr) addattr_l(n, 1024, IFLA_VXLAN_GROUP, &daddr, 4); + if (memcmp(&gaddr6, &in6addr_any, sizeof(gaddr6)) != 0) + addattr_l(n, 1024, IFLA_VXLAN_GROUP6, &gaddr6, sizeof(struct in6_addr)); + else if (memcmp(&daddr6, &in6addr_any, sizeof(daddr6)) != 0) + addattr_l(n, 1024, IFLA_VXLAN_GROUP6, &daddr6, sizeof(struct in6_addr)); + if (saddr) addattr_l(n, 1024, IFLA_VXLAN_LOCAL, &saddr, 4); + else if (memcmp(&saddr6, &in6addr_any, sizeof(saddr6)) != 0) + addattr_l(n, 1024, IFLA_VXLAN_LOCAL6, &saddr6, sizeof(struct in6_addr)); + if (link) addattr32(n, 1024, IFLA_VXLAN_LINK, link); addattr8(n, 1024, IFLA_VXLAN_TTL, ttl); @@ -229,6 +251,17 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) fprintf(f, "remote %s ", format_host(AF_INET, 4, &addr, s1, sizeof(s1))); } + } else if (tb[IFLA_VXLAN_GROUP6]) { + struct in6_addr addr; + memcpy(&addr, RTA_DATA(tb[IFLA_VXLAN_GROUP6]), sizeof(struct in6_addr)); + if (memcmp(&addr, &in6addr_any, sizeof(addr)) != 0) { + if (IN6_IS_ADDR_MULTICAST(&addr)) + fprintf(f, "group %s ", + format_host(AF_INET6, sizeof(struct in6_addr), &addr, s1, sizeof(s1))); + else + fprintf(f, "remote %s ", + format_host(AF_INET6, sizeof(struct in6_addr), &addr, s1, sizeof(s1))); + } } if (tb[IFLA_VXLAN_LOCAL]) { @@ -236,6 +269,12 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (addr) fprintf(f, "local %s ", format_host(AF_INET, 4, &addr, s1, sizeof(s1))); + } else if (tb[IFLA_VXLAN_LOCAL6]) { + struct in6_addr addr; + memcpy(&addr, RTA_DATA(tb[IFLA_VXLAN_LOCAL6]), sizeof(struct in6_addr)); + if (memcmp(&addr, &in6addr_any, sizeof(addr)) != 0) + fprintf(f, "local %s ", + format_host(AF_INET6, sizeof(struct in6_addr), &addr, s1, sizeof(s1))); } if (tb[IFLA_VXLAN_LINK] && diff --git a/lib/utils.c b/lib/utils.c index dae1b518..4e9c719a 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -868,3 +868,11 @@ int makeargs(char *line, char *argv[], int maxargs) return argc; } + +int inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6) +{ + if (strchr(src, ':')) + return inet_pton(AF_INET6, src, dst6); + else + return inet_pton(AF_INET, src, dst); +} From 9bea14ff6b64f3fe56af2d93ca70ac56b355027e Mon Sep 17 00:00:00 2001 From: Nigel Kukard Date: Wed, 30 Oct 2013 18:44:58 +0000 Subject: [PATCH 25/25] Fix tc stats when using -batch mode There are two global variables in tc/tc_class.c: __u32 filter_qdisc; __u32 filter_classid; These are not re-initialized for each line received in -batch mode: class show dev eth0 parent 1: classid 1:1 class show dev eth0 parent 1: classid 1:1 Error: duplicate "classid": "1:1" is the second value. This patch fixes the issue by initializing the two globals when we enter print_class(). Signed-off-by: Nigel Kukard --- tc/tc_class.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tc/tc_class.c b/tc/tc_class.c index 6c0850d0..e56bf07c 100644 --- a/tc/tc_class.c +++ b/tc/tc_class.c @@ -241,6 +241,9 @@ static int tc_class_list(int argc, char **argv) t.tcm_family = AF_UNSPEC; memset(d, 0, sizeof(d)); + filter_qdisc = 0; + filter_classid = 0; + while (argc > 0) { if (strcmp(*argv, "dev") == 0) { NEXT_ARG();