From 6d64ec0237751a98f5fc0d22e7ddc8cd069bc633 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Aug 2013 19:30:36 -0700 Subject: [PATCH 1/7] pkt_sched: fq: Fair Queue packet scheduler Support for FQ packet scheduler $ tc qd add dev eth0 root fq help Usage: ... fq [ limit PACKETS ] [ flow_limit PACKETS ] [ quantum BYTES ] [ initial_quantum BYTES ] [ maxrate RATE ] [ buckets NUMBER ] [ [no]pacing ] $ tc -s -d qd qdisc fq 8002: dev eth0 root refcnt 32 limit 10000p flow_limit 100p buckets 256 quantum 3028 initial_quantum 15140 Sent 216532416 bytes 148395 pkt (dropped 0, overlimits 0 requeues 14) backlog 0b 0p requeues 14 511 flows (511 inactive, 0 throttled) 110 gc, 0 highprio, 0 retrans, 1143 throttled, 0 flows_plimit limit : max number of packets on whole Qdisc (default 10000) flow_limit : max number of packets per flow (default 100) quantum : the max deficit per RR round (default is 2 MTU) initial_quantum : initial credit for new flows (default is 10 MTU) maxrate : max per flow rate (default : unlimited) buckets : number of RB trees (default : 1024) in hash table. (consumes 8 bytes per bucket) [no]pacing : disable/enable pacing (default is enable) Usage : tc qdisc add dev $ETH root fq tc qdisc del dev $ETH root 2>/dev/null tc qdisc add dev $ETH root handle 1: mq for i in `seq 1 4` do tc qdisc add dev $ETH parent 1:$i est 1sec 4sec fq done Signed-off-by: Eric Dumazet --- tc/Makefile | 1 + tc/q_fq.c | 279 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 280 insertions(+) create mode 100644 tc/q_fq.c diff --git a/tc/Makefile b/tc/Makefile index f26e7646..1eeabd8c 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -50,6 +50,7 @@ TCMODULES += em_meta.o TCMODULES += q_mqprio.o TCMODULES += q_codel.o TCMODULES += q_fq_codel.o +TCMODULES += q_fq.o ifeq ($(TC_CONFIG_IPSET), y) ifeq ($(TC_CONFIG_XT), y) diff --git a/tc/q_fq.c b/tc/q_fq.c new file mode 100644 index 00000000..c0bcdb99 --- /dev/null +++ b/tc/q_fq.c @@ -0,0 +1,279 @@ +/* + * Fair Queue + * + * Copyright (C) 2013 Eric Dumazet + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" + +static void explain(void) +{ + fprintf(stderr, "Usage: ... fq [ limit PACKETS ] [ flow_limit PACKETS ]\n"); + fprintf(stderr, " [ quantum BYTES ] [ initial_quantum BYTES ]\n"); + fprintf(stderr, " [ maxrate RATE ] [ buckets NUMBER ]\n"); + fprintf(stderr, " [ [no]pacing ]\n"); +} + +static unsigned int ilog2(unsigned int val) +{ + unsigned int res = 0; + + val--; + while (val) { + res++; + val >>= 1; + } + return res; +} + +static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv, + struct nlmsghdr *n) +{ + unsigned int plimit = ~0U; + unsigned int flow_plimit = ~0U; + unsigned int quantum = ~0U; + unsigned int initial_quantum = ~0U; + unsigned int buckets = 0; + unsigned int maxrate = ~0U; + unsigned int defrate = ~0U; + int pacing = -1; + struct rtattr *tail; + + while (argc > 0) { + if (strcmp(*argv, "limit") == 0) { + NEXT_ARG(); + if (get_unsigned(&plimit, *argv, 0)) { + fprintf(stderr, "Illegal \"limit\"\n"); + return -1; + } + } else if (strcmp(*argv, "flow_limit") == 0) { + NEXT_ARG(); + if (get_unsigned(&flow_plimit, *argv, 0)) { + fprintf(stderr, "Illegal \"flow_limit\"\n"); + return -1; + } + } else if (strcmp(*argv, "buckets") == 0) { + NEXT_ARG(); + if (get_unsigned(&buckets, *argv, 0)) { + fprintf(stderr, "Illegal \"buckets\"\n"); + return -1; + } + } else if (strcmp(*argv, "maxrate") == 0) { + NEXT_ARG(); + if (get_rate(&maxrate, *argv)) { + fprintf(stderr, "Illegal \"maxrate\"\n"); + return -1; + } + } else if (strcmp(*argv, "defrate") == 0) { + NEXT_ARG(); + if (get_rate(&defrate, *argv)) { + fprintf(stderr, "Illegal \"defrate\"\n"); + return -1; + } + } else if (strcmp(*argv, "quantum") == 0) { + NEXT_ARG(); + if (get_unsigned(&quantum, *argv, 0)) { + fprintf(stderr, "Illegal \"quantum\"\n"); + return -1; + } + } else if (strcmp(*argv, "initial_quantum") == 0) { + NEXT_ARG(); + if (get_unsigned(&initial_quantum, *argv, 0)) { + fprintf(stderr, "Illegal \"initial_quantum\"\n"); + return -1; + } + } else if (strcmp(*argv, "pacing") == 0) { + pacing = 1; + } else if (strcmp(*argv, "nopacing") == 0) { + pacing = 0; + } else if (strcmp(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--; argv++; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + if (buckets) { + unsigned int log = ilog2(buckets); + + addattr_l(n, 1024, TCA_FQ_BUCKETS_LOG, + &log, sizeof(log)); + } + if (plimit != ~0U) + addattr_l(n, 1024, TCA_FQ_PLIMIT, + &plimit, sizeof(plimit)); + if (flow_plimit != ~0U) + addattr_l(n, 1024, TCA_FQ_FLOW_PLIMIT, + &flow_plimit, sizeof(flow_plimit)); + if (quantum != ~0U) + addattr_l(n, 1024, TCA_FQ_QUANTUM, &quantum, sizeof(quantum)); + if (initial_quantum != ~0U) + addattr_l(n, 1024, TCA_FQ_INITIAL_QUANTUM, + &initial_quantum, sizeof(initial_quantum)); + if (pacing != -1) + addattr_l(n, 1024, TCA_FQ_RATE_ENABLE, + &pacing, sizeof(pacing)); + if (maxrate != ~0U) + addattr_l(n, 1024, TCA_FQ_FLOW_MAX_RATE, + &maxrate, sizeof(maxrate)); + if (defrate != ~0U) + addattr_l(n, 1024, TCA_FQ_FLOW_DEFAULT_RATE, + &defrate, sizeof(defrate)); + tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; + return 0; +} + +static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_FQ_MAX + 1]; + unsigned int plimit, flow_plimit; + unsigned int buckets_log; + int pacing; + unsigned int rate, quantum; + SPRINT_BUF(b1); + + if (opt == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_FQ_MAX, opt); + + if (tb[TCA_FQ_PLIMIT] && + RTA_PAYLOAD(tb[TCA_FQ_PLIMIT]) >= sizeof(__u32)) { + plimit = rta_getattr_u32(tb[TCA_FQ_PLIMIT]); + fprintf(f, "limit %up ", plimit); + } + if (tb[TCA_FQ_FLOW_PLIMIT] && + RTA_PAYLOAD(tb[TCA_FQ_FLOW_PLIMIT]) >= sizeof(__u32)) { + flow_plimit = rta_getattr_u32(tb[TCA_FQ_FLOW_PLIMIT]); + fprintf(f, "flow_limit %up ", flow_plimit); + } + if (tb[TCA_FQ_BUCKETS_LOG] && + RTA_PAYLOAD(tb[TCA_FQ_BUCKETS_LOG]) >= sizeof(__u32)) { + buckets_log = rta_getattr_u32(tb[TCA_FQ_BUCKETS_LOG]); + fprintf(f, "buckets %u ", 1U << buckets_log); + } + if (tb[TCA_FQ_RATE_ENABLE] && + RTA_PAYLOAD(tb[TCA_FQ_RATE_ENABLE]) >= sizeof(int)) { + pacing = rta_getattr_u32(tb[TCA_FQ_RATE_ENABLE]); + if (pacing == 0) + fprintf(f, "nopacing "); + } + if (tb[TCA_FQ_QUANTUM] && + RTA_PAYLOAD(tb[TCA_FQ_QUANTUM]) >= sizeof(__u32)) { + quantum = rta_getattr_u32(tb[TCA_FQ_QUANTUM]); + fprintf(f, "quantum %u ", quantum); + } + if (tb[TCA_FQ_INITIAL_QUANTUM] && + RTA_PAYLOAD(tb[TCA_FQ_INITIAL_QUANTUM]) >= sizeof(__u32)) { + quantum = rta_getattr_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + fprintf(f, "initial_quantum %u ", quantum); + } + if (tb[TCA_FQ_FLOW_MAX_RATE] && + RTA_PAYLOAD(tb[TCA_FQ_FLOW_MAX_RATE]) >= sizeof(__u32)) { + rate = rta_getattr_u32(tb[TCA_FQ_FLOW_MAX_RATE]); + + if (rate != ~0U) + fprintf(f, "maxrate %s ", sprint_rate(rate, b1)); + } + if (tb[TCA_FQ_FLOW_DEFAULT_RATE] && + RTA_PAYLOAD(tb[TCA_FQ_FLOW_DEFAULT_RATE]) >= sizeof(__u32)) { + rate = rta_getattr_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); + + if (rate != 0) + fprintf(f, "defrate %s ", sprint_rate(rate, b1)); + } + + return 0; +} + +static int fq_print_xstats(struct qdisc_util *qu, FILE *f, + struct rtattr *xstats) +{ + struct tc_fq_qd_stats *st; + + if (xstats == NULL) + return 0; + + if (RTA_PAYLOAD(xstats) < sizeof(*st)) + return -1; + + st = RTA_DATA(xstats); + + fprintf(f, " %u flows (%u inactive, %u throttled)", + st->flows, st->inactive_flows, st->throttled_flows); + + if (st->time_next_delayed_flow > 0) + fprintf(f, ", next packet delay %llu ns", st->time_next_delayed_flow); + + fprintf(f, "\n %llu gc, %llu highprio", + st->gc_flows, st->highprio_packets); + + if (st->tcp_retrans) + fprintf(f, ", %llu retrans", st->tcp_retrans); + + fprintf(f, ", %llu throttled", st->throttled); + + if (st->flows_plimit) + fprintf(f, ", %llu flows_plimit", st->flows_plimit); + + if (st->pkts_too_long || st->allocation_errors) + fprintf(f, "\n %llu too long pkts, %llu alloc errors\n", + st->pkts_too_long, st->allocation_errors); + + return 0; +} + +struct qdisc_util fq_qdisc_util = { + .id = "fq", + .parse_qopt = fq_parse_opt, + .print_qopt = fq_print_opt, + .print_xstats = fq_print_xstats, +}; From 793da0e7023ce29e32914af477fdbcc4bdbb022f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 30 Oct 2013 16:42:03 -0700 Subject: [PATCH 2/7] Update kernel headers Lastest from net-next --- include/linux/can/netlink.h | 8 ++++++++ include/linux/if_link.h | 11 +++++++++++ include/linux/pkt_cls.h | 14 ++++++++++++++ include/linux/pkt_sched.h | 2 ++ 4 files changed, 35 insertions(+) diff --git a/include/linux/can/netlink.h b/include/linux/can/netlink.h index 14966ddb..df944ed2 100644 --- a/include/linux/can/netlink.h +++ b/include/linux/can/netlink.h @@ -5,6 +5,14 @@ * * Copyright (c) 2009 Wolfgang Grandegger * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. */ #ifndef CAN_NETLINK_H diff --git a/include/linux/if_link.h b/include/linux/if_link.h index ee4f2ba9..a485920f 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -323,6 +323,17 @@ struct ifla_vxlan_port_range { __be16 high; }; +/* Bonding section */ + +enum { + IFLA_BOND_UNSPEC, + IFLA_BOND_MODE, + IFLA_BOND_ACTIVE_SLAVE, + __IFLA_BOND_MAX, +}; + +#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) + /* SR-IOV virtual function management section */ enum { diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 082eafaf..25731dfb 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -388,6 +388,20 @@ enum { #define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) +/* BPF classifier */ + +enum { + TCA_BPF_UNSPEC, + TCA_BPF_ACT, + TCA_BPF_POLICE, + TCA_BPF_CLASSID, + TCA_BPF_OPS_LEN, + TCA_BPF_OPS, + __TCA_BPF_MAX, +}; + +#define TCA_BPF_MAX (__TCA_BPF_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr { diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 9b829134..f2624b54 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -357,6 +357,8 @@ enum { TCA_HTB_CTAB, TCA_HTB_RTAB, TCA_HTB_DIRECT_QLEN, + TCA_HTB_RATE64, + TCA_HTB_CEIL64, __TCA_HTB_MAX, }; From cc26a8909ff31388ca0b3703b08605e116e490bb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 18 Oct 2013 17:50:01 +0200 Subject: [PATCH 3/7] iplink: add support for bonding netlink Signed-off-by: Jiri Pirko --- ip/Makefile | 2 +- ip/iplink.c | 4 +- ip/iplink_bond.c | 92 +++++++++++++++++++++++++++++++++++++++++++ man/man8/ip-link.8.in | 4 ++ 4 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 ip/iplink_bond.c diff --git a/ip/Makefile b/ip/Makefile index f10d22fc..89a7a36a 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -5,7 +5,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \ iplink_vlan.o link_veth.o link_gre.o iplink_can.o \ iplink_macvlan.o iplink_macvtap.o ipl2tp.o link_vti.o \ iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \ - link_iptnl.o link_gre6.o + link_iptnl.o link_gre6.o iplink_bond.o RTMONOBJ=rtmon.o diff --git a/ip/iplink.c b/ip/iplink.c index 6cde731a..58b6c203 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -85,8 +85,8 @@ void iplink_usage(void) if (iplink_have_newlink()) { fprintf(stderr, "\n"); fprintf(stderr, "TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | macvtap |\n"); - fprintf(stderr, " can | bridge | ipoib | ip6tnl | ipip | sit | vxlan |\n"); - fprintf(stderr, " gre | gretap | ip6gre | ip6gretap | vti }\n"); + fprintf(stderr, " can | bridge | bond | ipoib | ip6tnl | ipip | sit |\n"); + fprintf(stderr, " vxlan | gre | gretap | ip6gre | ip6gretap | vti }\n"); } exit(-1); } diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c new file mode 100644 index 00000000..3fb7f4f4 --- /dev/null +++ b/ip/iplink_bond.c @@ -0,0 +1,92 @@ +/* + * iplink_bond.c Bonding device support + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Jiri Pirko + */ + +#include +#include +#include +#include +#include + +#include "rt_names.h" +#include "utils.h" +#include "ip_common.h" + +static void explain(void) +{ + fprintf(stderr, + "Usage: ... bond [ mode BONDMODE ] [ active_slave SLAVE_DEV ]\n" + " [ clear_active_slave ]\n" + "\n" + "BONDMODE := 0-6\n" + ); +} + +static int bond_parse_opt(struct link_util *lu, int argc, char **argv, + struct nlmsghdr *n) +{ + __u8 mode; + unsigned ifindex; + + while (argc > 0) { + if (matches(*argv, "mode") == 0) { + NEXT_ARG(); + if (get_u8(&mode, *argv, 0)) { + invarg("mode %s is invalid", *argv); + return -1; + } + addattr8(n, 1024, IFLA_BOND_MODE, mode); + } else if (matches(*argv, "active_slave") == 0) { + NEXT_ARG(); + ifindex = if_nametoindex(*argv); + if (!ifindex) + return -1; + addattr32(n, 1024, IFLA_BOND_ACTIVE_SLAVE, ifindex); + } else if (matches(*argv, "clear_active_slave") == 0) { + addattr32(n, 1024, IFLA_BOND_ACTIVE_SLAVE, 0); + } else { + fprintf(stderr, "bond: unknown command \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--, argv++; + } + + return 0; +} + +static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) +{ + unsigned ifindex; + + if (!tb) + return; + + if (tb[IFLA_BOND_MODE]) + fprintf(f, "mode %u ", rta_getattr_u8(tb[IFLA_BOND_MODE])); + + if (tb[IFLA_BOND_ACTIVE_SLAVE] && + (ifindex = rta_getattr_u32(tb[IFLA_BOND_ACTIVE_SLAVE]))) { + char buf[IFNAMSIZ]; + const char *n = if_indextoname(ifindex, buf); + + if (n) + fprintf(f, "active_slave %s ", n); + else + fprintf(f, "active_slave %u ", ifindex); + } +} + +struct link_util bond_link_util = { + .id = "bond", + .maxattr = IFLA_BOND_MAX, + .parse_opt = bond_parse_opt, + .print_opt = bond_print_opt, +}; diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 8b68c78d..1825dc54 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -51,6 +51,7 @@ ip-link \- network device configuration .ti -8 .IR TYPE " := [ " .BR bridge " | " +.BR bond " ]" .BR can " | " .BR dummy " | " .BR ifb " | " @@ -155,6 +156,9 @@ Link types: .B bridge - Ethernet Bridge device .sp +.B bond +- Bonding device +.sp .B can - Controller Area Network interface .sp From d05df6861f99f251282e59f36dd94d7ff28d1bb8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Oct 2013 12:35:33 +0100 Subject: [PATCH 4/7] tc: add cls_bpf frontend This is the iproute2 part of the kernel patch "net: sched: add BPF-based traffic classifier". [Will re-submit later again for iproute2 when window for -next submissions opens.] Signed-off-by: Daniel Borkmann Cc: Thomas Graf --- tc/Makefile | 1 + tc/f_bpf.c | 288 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 tc/f_bpf.c diff --git a/tc/Makefile b/tc/Makefile index f54a955b..84215c06 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -22,6 +22,7 @@ TCMODULES += f_u32.o TCMODULES += f_route.o TCMODULES += f_fw.o TCMODULES += f_basic.o +TCMODULES += f_bpf.o TCMODULES += f_flow.o TCMODULES += f_cgroup.o TCMODULES += q_dsmark.o diff --git a/tc/f_bpf.c b/tc/f_bpf.c new file mode 100644 index 00000000..d52d7d8e --- /dev/null +++ b/tc/f_bpf.c @@ -0,0 +1,288 @@ +/* + * f_bpf.c BPF-based Classifier + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Daniel Borkmann + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" + +static void explain(void) +{ + fprintf(stderr, "Usage: ... bpf ...\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n"); + fprintf(stderr, " [from file]: run bytecode-file FILE\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " [ police POLICE_SPEC ] [ action ACTION_SPEC ]\n"); + fprintf(stderr, " [ classid CLASSID ]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n"); + fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); + fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n"); + fprintf(stderr, "\nNOTE: CLASSID is parsed as hexadecimal input.\n"); +} + +static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator) +{ + char sp; + + if (from_file) { + size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); + char *tmp_string; + FILE *fp; + + tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; + tmp_string = malloc(tmp_len); + if (tmp_string == NULL) + return -ENOMEM; + + memset(tmp_string, 0, tmp_len); + + fp = fopen(arg, "r"); + if (fp == NULL) { + perror("Cannot fopen"); + free(tmp_string); + return -ENOENT; + } + + if (!fgets(tmp_string, tmp_len, fp)) { + free(tmp_string); + fclose(fp); + return -EIO; + } + + fclose(fp); + + *need_release = true; + *bpf_string = tmp_string; + } else { + *need_release = false; + *bpf_string = arg; + } + + if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || + sp != separator) { + if (*need_release) + free(*bpf_string); + return -EINVAL; + } + + return 0; +} + +static int bpf_parse_ops(int argc, char **argv, struct nlmsghdr *n, + bool from_file) +{ + char *bpf_string, *token, separator = ','; + struct sock_filter bpf_ops[BPF_MAXINSNS]; + int ret = 0, i = 0; + bool need_release; + __u16 bpf_len = 0; + + if (argc < 1) + return -EINVAL; + if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, + &need_release, separator)) + return -EINVAL; + if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { + ret = -EINVAL; + goto out; + } + + token = bpf_string; + while ((token = strchr(token, separator)) && (++token)[0]) { + if (i >= bpf_len) { + fprintf(stderr, "Real program length exceeds encoded " + "length parameter!\n"); + ret = -EINVAL; + goto out; + } + + if (sscanf(token, "%hu %hhu %hhu %u,", + &bpf_ops[i].code, &bpf_ops[i].jt, + &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { + fprintf(stderr, "Error at instruction %d!\n", i); + ret = -EINVAL; + goto out; + } + + i++; + } + + if (i != bpf_len) { + fprintf(stderr, "Parsed program length is less than encoded" + "length parameter!\n"); + ret = -EINVAL; + goto out; + } + + addattr_l(n, MAX_MSG, TCA_BPF_OPS_LEN, &bpf_len, sizeof(bpf_len)); + addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, + bpf_len * sizeof(struct sock_filter)); +out: + if (need_release) + free(bpf_string); + + return ret; +} + +static void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) +{ + struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); + int i; + + if (len == 0) + return; + + fprintf(f, "bytecode \'%u,", len); + + for (i = 0; i < len - 1; i++) + fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, + ops[i].jf, ops[i].k); + + fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt, + ops[i].jf, ops[i].k); +} + +static int bpf_parse_opt(struct filter_util *qu, char *handle, + int argc, char **argv, struct nlmsghdr *n) +{ + struct tcmsg *t = NLMSG_DATA(n); + struct rtattr *tail; + long h = 0; + + if (argc == 0) + return 0; + + if (handle) { + h = strtol(handle, NULL, 0); + if (h == LONG_MIN || h == LONG_MAX) { + fprintf(stderr, "Illegal handle \"%s\", must be " + "numeric.\n", handle); + return -1; + } + } + + t->tcm_handle = h; + + tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len)); + addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0); + + while (argc > 0) { + if (matches(*argv, "run") == 0) { + bool from_file; + NEXT_ARG(); + if (strcmp(*argv, "bytecode-file") == 0) { + from_file = true; + } else if (strcmp(*argv, "bytecode") == 0) { + from_file = false; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + explain(); + return -1; + } + NEXT_ARG(); + if (bpf_parse_ops(argc, argv, n, from_file)) { + fprintf(stderr, "Illegal \"bytecode\"\n"); + return -1; + } + } else if (matches(*argv, "classid") == 0 || + strcmp(*argv, "flowid") == 0) { + unsigned handle; + NEXT_ARG(); + if (get_tc_classid(&handle, *argv)) { + fprintf(stderr, "Illegal \"classid\"\n"); + return -1; + } + addattr_l(n, MAX_MSG, TCA_BPF_CLASSID, &handle, 4); + } else if (matches(*argv, "action") == 0) { + NEXT_ARG(); + if (parse_action(&argc, &argv, TCA_BPF_ACT, n)) { + fprintf(stderr, "Illegal \"action\"\n"); + return -1; + } + continue; + } else if (matches(*argv, "police") == 0) { + NEXT_ARG(); + if (parse_police(&argc, &argv, TCA_BPF_POLICE, n)) { + fprintf(stderr, "Illegal \"police\"\n"); + return -1; + } + continue; + } else if (strcmp(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--; argv++; + } + + tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail; + return 0; +} + +static int bpf_print_opt(struct filter_util *qu, FILE *f, + struct rtattr *opt, __u32 handle) +{ + struct rtattr *tb[TCA_BPF_MAX + 1]; + + if (opt == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_BPF_MAX, opt); + + if (handle) + fprintf(f, "handle 0x%x ", handle); + + if (tb[TCA_BPF_CLASSID]) { + SPRINT_BUF(b1); + fprintf(f, "flowid %s ", + sprint_tc_classid(rta_getattr_u32(tb[TCA_BPF_CLASSID]), b1)); + } + + if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN]) + bpf_print_ops(f, tb[TCA_BPF_OPS], + rta_getattr_u16(tb[TCA_BPF_OPS_LEN])); + + if (tb[TCA_BPF_POLICE]) { + fprintf(f, "\n"); + tc_print_police(f, tb[TCA_BPF_POLICE]); + } + + if (tb[TCA_BPF_ACT]) { + tc_print_action(f, tb[TCA_BPF_ACT]); + } + + return 0; +} + +struct filter_util bpf_filter_util = { + .id = "bpf", + .parse_fopt = bpf_parse_opt, + .print_fopt = bpf_print_opt, +}; From bc7635a8b377dafd7b074661294ccd217a6aaf9b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 19 Nov 2013 23:46:20 +0800 Subject: [PATCH 5/7] ipaddrlabel: use uint32_t instead of int32_t As both linux kernel and function ipaddrlabel_modify use unsigned int for label. We should also use unsigned int value when print addrlabel in case of misunderstanding. Signed-off-by: Hangbin Liu --- ip/ipaddrlabel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ip/ipaddrlabel.c b/ip/ipaddrlabel.c index 1789d9c1..68a94ddf 100644 --- a/ip/ipaddrlabel.c +++ b/ip/ipaddrlabel.c @@ -86,10 +86,10 @@ int print_addrlabel(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg if (ifal->ifal_index) fprintf(fp, "dev %s ", ll_index_to_name(ifal->ifal_index)); - if (tb[IFAL_LABEL] && RTA_PAYLOAD(tb[IFAL_LABEL]) == sizeof(int32_t)) { - int32_t label; + if (tb[IFAL_LABEL] && RTA_PAYLOAD(tb[IFAL_LABEL]) == sizeof(uint32_t)) { + uint32_t label; memcpy(&label, RTA_DATA(tb[IFAL_LABEL]), sizeof(label)); - fprintf(fp, "label %d ", label); + fprintf(fp, "label %u ", label); } fprintf(fp, "\n"); From f26ef6ec09ea3b7a57916885ae74d9b3e5afadcc Mon Sep 17 00:00:00 2001 From: Andreas Henriksson Date: Wed, 13 Nov 2013 09:46:42 +0100 Subject: [PATCH 6/7] ss: avoid passing negative numbers to malloc Example: $ ss state established \( sport = :4060 or sport = :4061 or sport = :4062 or sport = :4063 or sport = :4064 or sport = :4065 or sport = :4066 or sport = :4067 \) > /dev/null Aborted In the example above ssfilter_bytecompile(...) will return (int)136. char l1 = 136; means -120 which will result in a negative number being passed to malloc at misc/ss.c:913. Simply declare l1 and l2 as integers to avoid the char overflow. This is one of the issues originally reported in http://bugs.debian.org/511720 Fix the same problem in other code paths as well (thanks to Eric Dumazet). Reported-by: Andreas Schuldei Signed-off-by: Andreas Henriksson Reviewed-by: Eric Dumazet --- misc/ss.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index c0369f11..6f38ae7e 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -894,7 +894,8 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) case SSF_AND: { - char *a1, *a2, *a, l1, l2; + char *a1, *a2, *a; + int l1, l2; l1 = ssfilter_bytecompile(f->pred, &a1); l2 = ssfilter_bytecompile(f->post, &a2); if (!(a = malloc(l1+l2))) abort(); @@ -907,7 +908,8 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) } case SSF_OR: { - char *a1, *a2, *a, l1, l2; + char *a1, *a2, *a; + int l1, l2; l1 = ssfilter_bytecompile(f->pred, &a1); l2 = ssfilter_bytecompile(f->post, &a2); if (!(a = malloc(l1+l2+4))) abort(); @@ -920,7 +922,8 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) } case SSF_NOT: { - char *a1, *a, l1; + char *a1, *a; + int l1; l1 = ssfilter_bytecompile(f->pred, &a1); if (!(a = malloc(l1+4))) abort(); memcpy(a, a1, l1); From fa10855a7e7c1cca7940001c133f09df094551dc Mon Sep 17 00:00:00 2001 From: Sami Kerola Date: Mon, 30 Sep 2013 22:01:48 +0100 Subject: [PATCH 7/7] ip: make -resolve addr to print names rather than addresses As a system admin I occasionally want to be able to check that all interfaces has a name in DNS or /etc/hosts file. Signed-off-by: Sami Kerola --- ip/ipaddress.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 1c3e4da0..d02eaaf8 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -636,7 +636,7 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, fprintf(fp, " family %d ", ifa->ifa_family); if (rta_tb[IFA_LOCAL]) { - fprintf(fp, "%s", rt_addr_n2a(ifa->ifa_family, + fprintf(fp, "%s", format_host(ifa->ifa_family, RTA_PAYLOAD(rta_tb[IFA_LOCAL]), RTA_DATA(rta_tb[IFA_LOCAL]), abuf, sizeof(abuf))); @@ -647,7 +647,7 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, fprintf(fp, "/%d ", ifa->ifa_prefixlen); } else { fprintf(fp, " peer %s/%d ", - rt_addr_n2a(ifa->ifa_family, + format_host(ifa->ifa_family, RTA_PAYLOAD(rta_tb[IFA_ADDRESS]), RTA_DATA(rta_tb[IFA_ADDRESS]), abuf, sizeof(abuf)), @@ -657,14 +657,14 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, if (rta_tb[IFA_BROADCAST]) { fprintf(fp, "brd %s ", - rt_addr_n2a(ifa->ifa_family, + format_host(ifa->ifa_family, RTA_PAYLOAD(rta_tb[IFA_BROADCAST]), RTA_DATA(rta_tb[IFA_BROADCAST]), abuf, sizeof(abuf))); } if (rta_tb[IFA_ANYCAST]) { fprintf(fp, "any %s ", - rt_addr_n2a(ifa->ifa_family, + format_host(ifa->ifa_family, RTA_PAYLOAD(rta_tb[IFA_ANYCAST]), RTA_DATA(rta_tb[IFA_ANYCAST]), abuf, sizeof(abuf)));