From dc0e9c7f2291daecf474f8805f402974e8f3e975 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 22 Nov 2013 17:29:02 -0800 Subject: [PATCH 01/16] update to net-next headers --- include/linux/genetlink.h | 1 - include/linux/pkt_sched.h | 7 +++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index d48123a1..1f85a276 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -27,7 +27,6 @@ struct genlmsghdr { */ #define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE -#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) /************************************************************************** * Controller diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index a806687a..307f2934 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -763,14 +763,13 @@ enum { TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ - TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ + TCA_FQ_FLOW_DEFAULT_RATE,/* for sockets with unspecified sk_rate, + * use the following rate + */ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ - - TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ - __TCA_FQ_MAX }; From 8334bb325d5178483a3063c5f06858b46d993dc7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Nov 2013 14:34:07 -0800 Subject: [PATCH 02/16] htb: support 64bit rates Starting from linux-3.13, we can break the 32bit limitation of rates on HTB qdisc/classes. Prior limit was 34.359.738.360 bits per second. lpq83:~# tc -s qdisc show dev lo ; tc -s class show dev lo qdisc htb 1: root refcnt 2 r2q 2000 default 1 direct_packets_stat 0 direct_qlen 6000 Sent 6591936144493 bytes 149549182 pkt (dropped 0, overlimits 213757419 requeues 0) rate 39464Mbit 114938pps backlog 0b 15p requeues 0 class htb 1:1 root prio 0 rate 50000Mbit ceil 50000Mbit burst 200000b cburst 0b Sent 6591942184547 bytes 149549310 pkt (dropped 0, overlimits 0 requeues 0) rate 39464Mbit 114938pps backlog 0b 15p requeues 0 lended: 149549310 borrowed: 0 giants: 0 tokens: 336 ctokens: -164 Signed-off-by: Eric Dumazet --- tc/q_htb.c | 56 +++++++++++++++++++++++++++++++++++++++------------- tc/tc_core.c | 6 +++--- tc/tc_core.h | 4 ++-- tc/tc_util.c | 25 +++++++++++++++++++++++ tc/tc_util.h | 1 + 5 files changed, 73 insertions(+), 19 deletions(-) diff --git a/tc/q_htb.c b/tc/q_htb.c index e108857d..1d8c56f2 100644 --- a/tc/q_htb.c +++ b/tc/q_htb.c @@ -113,6 +113,7 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str unsigned int direct_qlen = ~0U; unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */ struct rtattr *tail; + __u64 ceil64 = 0, rate64 = 0; memset(&opt, 0, sizeof(opt)); mtu = 1600; /* eth packet len */ @@ -173,22 +174,22 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str ok++; } else if (strcmp(*argv, "ceil") == 0) { NEXT_ARG(); - if (opt.ceil.rate) { + if (ceil64) { fprintf(stderr, "Double \"ceil\" spec\n"); return -1; } - if (get_rate(&opt.ceil.rate, *argv)) { + if (get_rate64(&ceil64, *argv)) { explain1("ceil"); return -1; } ok++; } else if (strcmp(*argv, "rate") == 0) { NEXT_ARG(); - if (opt.rate.rate) { + if (rate64) { fprintf(stderr, "Double \"rate\" spec\n"); return -1; } - if (get_rate(&opt.rate.rate, *argv)) { + if (get_rate64(&rate64, *argv)) { explain1("rate"); return -1; } @@ -207,17 +208,23 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str /* if (!ok) return 0;*/ - if (opt.rate.rate == 0) { + if (!rate64) { fprintf(stderr, "\"rate\" is required.\n"); return -1; } /* if ceil params are missing, use the same as rate */ - if (!opt.ceil.rate) opt.ceil = opt.rate; + if (!ceil64) + ceil64 = rate64; + + opt.rate.rate = (rate64 >= (1ULL << 32)) ? ~0U : rate64; + opt.ceil.rate = (ceil64 >= (1ULL << 32)) ? ~0U : ceil64; /* compute minimal allowed burst from rate; mtu is added here to make sute that buffer is larger than mtu and to have some safeguard space */ - if (!buffer) buffer = opt.rate.rate / get_hz() + mtu; - if (!cbuffer) cbuffer = opt.ceil.rate / get_hz() + mtu; + if (!buffer) + buffer = rate64 / get_hz() + mtu; + if (!cbuffer) + cbuffer = ceil64 / get_hz() + mtu; opt.ceil.overhead = overhead; opt.rate.overhead = overhead; @@ -229,19 +236,26 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str fprintf(stderr, "htb: failed to calculate rate table.\n"); return -1; } - opt.buffer = tc_calc_xmittime(opt.rate.rate, buffer); + opt.buffer = tc_calc_xmittime(rate64, buffer); if (tc_calc_rtable(&opt.ceil, ctab, ccell_log, mtu, linklayer) < 0) { fprintf(stderr, "htb: failed to calculate ceil rate table.\n"); return -1; } - opt.cbuffer = tc_calc_xmittime(opt.ceil.rate, cbuffer); + opt.cbuffer = tc_calc_xmittime(ceil64, cbuffer); tail = NLMSG_TAIL(n); if (direct_qlen != ~0U) addattr_l(n, 1024, TCA_HTB_DIRECT_QLEN, &direct_qlen, sizeof(direct_qlen)); addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + + if (rate64 >= (1ULL << 32)) + addattr_l(n, 1124, TCA_HTB_RATE64, &rate64, sizeof(rate64)); + + if (ceil64 >= (1ULL << 32)) + addattr_l(n, 1224, TCA_HTB_CEIL64, &ceil64, sizeof(ceil64)); + addattr_l(n, 2024, TCA_HTB_PARMS, &opt, sizeof(opt)); addattr_l(n, 3024, TCA_HTB_RTAB, rtab, 1024); addattr_l(n, 4024, TCA_HTB_CTAB, ctab, 1024); @@ -256,6 +270,7 @@ static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) struct tc_htb_glob *gopt; double buffer,cbuffer; unsigned int linklayer; + __u64 rate64, ceil64; SPRINT_BUF(b1); SPRINT_BUF(b2); SPRINT_BUF(b3); @@ -275,12 +290,25 @@ static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (show_details) fprintf(f, "quantum %d ", (int)hopt->quantum); } - fprintf(f, "rate %s ", sprint_rate(hopt->rate.rate, b1)); + + rate64 = hopt->rate.rate; + if (tb[TCA_HTB_RATE64] && + RTA_PAYLOAD(tb[TCA_HTB_RATE64]) >= sizeof(rate64)) { + rate64 = rta_getattr_u64(tb[TCA_HTB_RATE64]); + } + + ceil64 = hopt->ceil.rate; + if (tb[TCA_HTB_CEIL64] && + RTA_PAYLOAD(tb[TCA_HTB_CEIL64]) >= sizeof(ceil64)) + ceil64 = rta_getattr_u64(tb[TCA_HTB_CEIL64]); + + fprintf(f, "rate %s ", sprint_rate(rate64, b1)); if (hopt->rate.overhead) fprintf(f, "overhead %u ", hopt->rate.overhead); - buffer = tc_calc_xmitsize(hopt->rate.rate, hopt->buffer); - fprintf(f, "ceil %s ", sprint_rate(hopt->ceil.rate, b1)); - cbuffer = tc_calc_xmitsize(hopt->ceil.rate, hopt->cbuffer); + buffer = tc_calc_xmitsize(rate64, hopt->buffer); + + fprintf(f, "ceil %s ", sprint_rate(ceil64, b1)); + cbuffer = tc_calc_xmitsize(ceil64, hopt->cbuffer); linklayer = (hopt->rate.linklayer & TC_LINKLAYER_MASK); if (linklayer > TC_LINKLAYER_ETHERNET || show_details) fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b4)); diff --git a/tc/tc_core.c b/tc/tc_core.c index a5243370..46eaefb5 100644 --- a/tc/tc_core.c +++ b/tc/tc_core.c @@ -56,12 +56,12 @@ unsigned tc_core_ktime2time(unsigned ktime) return ktime / clock_factor; } -unsigned tc_calc_xmittime(unsigned rate, unsigned size) +unsigned tc_calc_xmittime(__u64 rate, unsigned size) { - return tc_core_time2tick(TIME_UNITS_PER_SEC*((double)size/rate)); + return tc_core_time2tick(TIME_UNITS_PER_SEC*((double)size/(double)rate)); } -unsigned tc_calc_xmitsize(unsigned rate, unsigned ticks) +unsigned tc_calc_xmitsize(__u64 rate, unsigned ticks) { return ((double)rate*tc_core_tick2time(ticks))/TIME_UNITS_PER_SEC; } diff --git a/tc/tc_core.h b/tc/tc_core.h index 5a693bad..8a63b79c 100644 --- a/tc/tc_core.h +++ b/tc/tc_core.h @@ -18,8 +18,8 @@ unsigned tc_core_time2tick(unsigned time); unsigned tc_core_tick2time(unsigned tick); unsigned tc_core_time2ktime(unsigned time); unsigned tc_core_ktime2time(unsigned ktime); -unsigned tc_calc_xmittime(unsigned rate, unsigned size); -unsigned tc_calc_xmitsize(unsigned rate, unsigned ticks); +unsigned tc_calc_xmittime(__u64 rate, unsigned size); +unsigned tc_calc_xmitsize(__u64 rate, unsigned ticks); int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab, int cell_log, unsigned mtu, enum link_layer link_layer); int tc_calc_size_table(struct tc_sizespec *s, __u16 **stab); diff --git a/tc/tc_util.c b/tc/tc_util.c index be3ed071..2f979dfa 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -171,6 +171,31 @@ int get_rate(unsigned *rate, const char *str) return 0; } +int get_rate64(__u64 *rate, const char *str) +{ + char *p; + double bps = strtod(str, &p); + const struct rate_suffix *s; + + if (p == str) + return -1; + + for (s = suffixes; s->name; ++s) { + if (strcasecmp(s->name, p) == 0) { + bps *= s->scale; + p += strlen(p); + break; + } + } + + if (*p) + return -1; /* unknown suffix */ + + bps /= 8; /* -> bytes per second */ + *rate = bps; + return 0; +} + void print_rate(char *buf, int len, __u64 rate) { double tmp = (double)rate*8; diff --git a/tc/tc_util.h b/tc/tc_util.h index 7c3709f5..d4183679 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -58,6 +58,7 @@ extern struct filter_util *get_filter_kind(const char *str); extern int get_qdisc_handle(__u32 *h, const char *str); extern int get_rate(unsigned *rate, const char *str); +extern int get_rate64(__u64 *rate, const char *str); extern int get_size(unsigned *size, const char *str); extern int get_size_and_cell(unsigned *size, int *cell_log, char *str); extern int get_time(unsigned *time, const char *str); From ddc6243e9aafa54ed120e5868e6c0f9b27475fec Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 26 Nov 2013 09:55:35 +0800 Subject: [PATCH 03/16] tbf: add 64bit rates support tbf support 64bit rates start from linux-3.13. Add 64bit rates support in tc tools. tc qdisc show dev eth0 qdisc tbf 1: root refcnt 2 rate 40000Mbit burst 230000b peakrate 50000Mbit minburst 87500b lat 50.0ms This is a followup to ("htb: support 64bit rates"). Signed-off-by: Yang Yingliang Cc: Eric Dumazet --- tc/q_tbf.c | 58 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/tc/q_tbf.c b/tc/q_tbf.c index 34784a41..2fbfd3b5 100644 --- a/tc/q_tbf.c +++ b/tc/q_tbf.c @@ -47,6 +47,7 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl unsigned short overhead=0; unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */ struct rtattr *tail; + __u64 rate64 = 0, prate64 = 0; memset(&opt, 0, sizeof(opt)); @@ -121,22 +122,22 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl ok++; } else if (strcmp(*argv, "rate") == 0) { NEXT_ARG(); - if (opt.rate.rate) { + if (rate64) { fprintf(stderr, "tbf: duplicate \"rate\" specification\n"); return -1; } - if (get_rate(&opt.rate.rate, *argv)) { + if (get_rate64(&rate64, *argv)) { explain1("rate", *argv); return -1; } ok++; } else if (matches(*argv, "peakrate") == 0) { NEXT_ARG(); - if (opt.peakrate.rate) { + if (prate64) { fprintf(stderr, "tbf: duplicate \"peakrate\" specification\n"); return -1; } - if (get_rate(&opt.peakrate.rate, *argv)) { + if (get_rate64(&prate64, *argv)) { explain1("peakrate", *argv); return -1; } @@ -172,7 +173,7 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl * one go rather than reveal one more problem when a * previous one has been fixed. */ - if (opt.rate.rate == 0) { + if (rate64 == 0) { fprintf(stderr, "tbf: the \"rate\" parameter is mandatory.\n"); verdict = -1; } @@ -180,7 +181,7 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl fprintf(stderr, "tbf: the \"burst\" parameter is mandatory.\n"); verdict = -1; } - if (opt.peakrate.rate) { + if (prate64) { if (!mtu) { fprintf(stderr, "tbf: when \"peakrate\" is specified, \"mtu\" must also be specified.\n"); verdict = -1; @@ -197,10 +198,13 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl return verdict; } + opt.rate.rate = (rate64 >= (1ULL << 32)) ? ~0U : rate64; + opt.peakrate.rate = (prate64 >= (1ULL << 32)) ? ~0U : prate64; + if (opt.limit == 0) { - double lim = opt.rate.rate*(double)latency/TIME_UNITS_PER_SEC + buffer; - if (opt.peakrate.rate) { - double lim2 = opt.peakrate.rate*(double)latency/TIME_UNITS_PER_SEC + mtu; + double lim = rate64*(double)latency/TIME_UNITS_PER_SEC + buffer; + if (prate64) { + double lim2 = prate64*(double)latency/TIME_UNITS_PER_SEC + mtu; if (lim2 < lim) lim = lim2; } @@ -228,20 +232,26 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl tail = NLMSG_TAIL(n); addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); addattr_l(n, 2024, TCA_TBF_PARMS, &opt, sizeof(opt)); + if (rate64 >= (1ULL << 32)) + addattr_l(n, 2124, TCA_TBF_RATE64, &rate64, sizeof(rate64)); addattr_l(n, 3024, TCA_TBF_RTAB, rtab, 1024); - if (opt.peakrate.rate) + if (opt.peakrate.rate) { + if (prate64 >= (1ULL << 32)) + addattr_l(n, 3124, TCA_TBF_PRATE64, &prate64, sizeof(prate64)); addattr_l(n, 4096, TCA_TBF_PTAB, ptab, 1024); + } tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; return 0; } static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { - struct rtattr *tb[TCA_TBF_PTAB+1]; + struct rtattr *tb[TCA_TBF_MAX+1]; struct tc_tbf_qopt *qopt; unsigned int linklayer; double buffer, mtu; double latency; + __u64 rate64 = 0, prate64 = 0; SPRINT_BUF(b1); SPRINT_BUF(b2); SPRINT_BUF(b3); @@ -249,7 +259,7 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (opt == NULL) return 0; - parse_rtattr_nested(tb, TCA_TBF_PTAB, opt); + parse_rtattr_nested(tb, TCA_TBF_MAX, opt); if (tb[TCA_TBF_PARMS] == NULL) return -1; @@ -257,8 +267,12 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) qopt = RTA_DATA(tb[TCA_TBF_PARMS]); if (RTA_PAYLOAD(tb[TCA_TBF_PARMS]) < sizeof(*qopt)) return -1; - fprintf(f, "rate %s ", sprint_rate(qopt->rate.rate, b1)); - buffer = tc_calc_xmitsize(qopt->rate.rate, qopt->buffer); + rate64 = qopt->rate.rate; + if (tb[TCA_TBF_RATE64] && + RTA_PAYLOAD(tb[TCA_TBF_RATE64]) >= sizeof(rate64)) + rate64 = rta_getattr_u64(tb[TCA_TBF_RATE64]); + fprintf(f, "rate %s ", sprint_rate(rate64, b1)); + buffer = tc_calc_xmitsize(rate64, qopt->buffer); if (show_details) { fprintf(f, "burst %s/%u mpu %s ", sprint_size(buffer, b1), 1<rate.cell_log, sprint_size(qopt->rate.mpu, b2)); @@ -267,10 +281,14 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) } if (show_raw) fprintf(f, "[%08x] ", qopt->buffer); - if (qopt->peakrate.rate) { - fprintf(f, "peakrate %s ", sprint_rate(qopt->peakrate.rate, b1)); + prate64 = qopt->peakrate.rate; + if (tb[TCA_TBF_PRATE64] && + RTA_PAYLOAD(tb[TCA_TBF_PRATE64]) >= sizeof(prate64)) + prate64 = rta_getattr_u64(tb[TCA_TBF_PRATE64]); + if (prate64) { + fprintf(f, "peakrate %s ", sprint_rate(prate64, b1)); if (qopt->mtu || qopt->peakrate.mpu) { - mtu = tc_calc_xmitsize(qopt->peakrate.rate, qopt->mtu); + mtu = tc_calc_xmitsize(prate64, qopt->mtu); if (show_details) { fprintf(f, "mtu %s/%u mpu %s ", sprint_size(mtu, b1), 1<peakrate.cell_log, sprint_size(qopt->peakrate.mpu, b2)); @@ -285,9 +303,9 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (show_raw) fprintf(f, "limit %s ", sprint_size(qopt->limit, b1)); - latency = TIME_UNITS_PER_SEC*(qopt->limit/(double)qopt->rate.rate) - tc_core_tick2time(qopt->buffer); - if (qopt->peakrate.rate) { - double lat2 = TIME_UNITS_PER_SEC*(qopt->limit/(double)qopt->peakrate.rate) - tc_core_tick2time(qopt->mtu); + latency = TIME_UNITS_PER_SEC*(qopt->limit/(double)rate64) - tc_core_tick2time(qopt->buffer); + if (prate64) { + double lat2 = TIME_UNITS_PER_SEC*(qopt->limit/(double)prate64) - tc_core_tick2time(qopt->mtu); if (lat2 > latency) latency = lat2; } From ec69a50cc8e41b3b7386cd4faadb0fb4c30dedb1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 17 Dec 2013 22:22:01 -0800 Subject: [PATCH 04/16] Update header files to 3.13-rc2 net-next --- include/linux/genetlink.h | 2 ++ include/linux/if_addr.h | 5 +++++ include/linux/if_link.h | 17 ++++++++++++++++- include/linux/netconf.h | 1 + include/linux/netlink_diag.h | 1 + include/linux/packet_diag.h | 1 + include/linux/pkt_sched.h | 7 ++++--- include/linux/unix_diag.h | 1 + 8 files changed, 31 insertions(+), 4 deletions(-) diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 1f85a276..8a1d5006 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -27,6 +27,8 @@ struct genlmsghdr { */ #define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE +#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) /************************************************************************** * Controller diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index 58b39f47..9ce823d3 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -18,6 +18,9 @@ struct ifaddrmsg { * It makes no difference for normally configured broadcast interfaces, * but for point-to-point IFA_ADDRESS is DESTINATION address, * local address is supplied in IFA_LOCAL attribute. + * + * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags. + * If present, the value from struct ifaddrmsg will be ignored. */ enum { IFA_UNSPEC, @@ -28,6 +31,7 @@ enum { IFA_ANYCAST, IFA_CACHEINFO, IFA_MULTICAST, + IFA_FLAGS, __IFA_MAX, }; @@ -44,6 +48,7 @@ enum { #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 +#define IFA_F_MANAGETEMPADDR 0x100 struct ifa_cacheinfo { __u32 ifa_prefered; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 93654e7a..62c3dd1b 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -329,6 +329,19 @@ enum { IFLA_BOND_UNSPEC, IFLA_BOND_MODE, IFLA_BOND_ACTIVE_SLAVE, + IFLA_BOND_MIIMON, + IFLA_BOND_UPDELAY, + IFLA_BOND_DOWNDELAY, + IFLA_BOND_USE_CARRIER, + IFLA_BOND_ARP_INTERVAL, + IFLA_BOND_ARP_IP_TARGET, + IFLA_BOND_ARP_VALIDATE, + IFLA_BOND_ARP_ALL_TARGETS, + IFLA_BOND_PRIMARY, + IFLA_BOND_PRIMARY_RESELECT, + IFLA_BOND_FAIL_OVER_MAC, + IFLA_BOND_XMIT_HASH_POLICY, + IFLA_BOND_RESEND_IGMP, __IFLA_BOND_MAX, }; @@ -486,7 +499,9 @@ enum { IFLA_HSR_UNSPEC, IFLA_HSR_SLAVE1, IFLA_HSR_SLAVE2, - IFLA_HSR_MULTICAST_SPEC, + IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ + IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ + IFLA_HSR_SEQ_NR, __IFLA_HSR_MAX, }; diff --git a/include/linux/netconf.h b/include/linux/netconf.h index 52c44244..6ceb170d 100644 --- a/include/linux/netconf.h +++ b/include/linux/netconf.h @@ -14,6 +14,7 @@ enum { NETCONFA_FORWARDING, NETCONFA_RP_FILTER, NETCONFA_MC_FORWARDING, + NETCONFA_PROXY_NEIGH, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/include/linux/netlink_diag.h b/include/linux/netlink_diag.h index 4e31db4e..f2159d30 100644 --- a/include/linux/netlink_diag.h +++ b/include/linux/netlink_diag.h @@ -33,6 +33,7 @@ struct netlink_diag_ring { }; enum { + /* NETLINK_DIAG_NONE, standard nl API requires this attribute! */ NETLINK_DIAG_MEMINFO, NETLINK_DIAG_GROUPS, NETLINK_DIAG_RX_RING, diff --git a/include/linux/packet_diag.h b/include/linux/packet_diag.h index b2cc0cd9..d08c63f3 100644 --- a/include/linux/packet_diag.h +++ b/include/linux/packet_diag.h @@ -29,6 +29,7 @@ struct packet_diag_msg { }; enum { + /* PACKET_DIAG_NONE, standard nl API requires this attribute! */ PACKET_DIAG_INFO, PACKET_DIAG_MCLIST, PACKET_DIAG_RX_RING, diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 307f2934..a806687a 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -763,13 +763,14 @@ enum { TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ - TCA_FQ_FLOW_DEFAULT_RATE,/* for sockets with unspecified sk_rate, - * use the following rate - */ + TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + + TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + __TCA_FQ_MAX }; diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index b9e2a6a7..1eb0b8dd 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -31,6 +31,7 @@ struct unix_diag_msg { }; enum { + /* UNIX_DIAG_NONE, standard nl API requires this attribute! */ UNIX_DIAG_NAME, UNIX_DIAG_VFS, UNIX_DIAG_PEER, From 29cc864089ac311d1819d609e372b8f2083b3ec7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 17 Dec 2013 22:26:56 -0800 Subject: [PATCH 05/16] netconf: add support for neighbor proxy attribute Report changes to proxy_arp/proxy_ndp attribute. --- ip/ipnetconf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ip/ipnetconf.c b/ip/ipnetconf.c index 9a77ecbf..7353f595 100644 --- a/ip/ipnetconf.c +++ b/ip/ipnetconf.c @@ -114,6 +114,10 @@ int print_netconf(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) fprintf(fp, "mc_forwarding %d ", *(int *)RTA_DATA(tb[NETCONFA_MC_FORWARDING])); + if (tb[NETCONFA_PROXY_NEIGH]) + fprintf(fp, "proxy_neigh %s ", + *(int *)RTA_DATA(tb[NETCONFA_PROXY_NEIGH])?"on":"off"); + fprintf(fp, "\n"); fflush(fp); return 0; From a4c51eb3483b48c4f4117295900761121cff9823 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 28 Dec 2013 11:15:10 -0800 Subject: [PATCH 06/16] update to latest net-next headers --- include/linux/if_link.h | 5 +++++ include/linux/pkt_sched.h | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 62c3dd1b..c6c93b95 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -342,6 +342,11 @@ enum { IFLA_BOND_FAIL_OVER_MAC, IFLA_BOND_XMIT_HASH_POLICY, IFLA_BOND_RESEND_IGMP, + IFLA_BOND_NUM_PEER_NOTIF, + IFLA_BOND_ALL_SLAVES_ACTIVE, + IFLA_BOND_MIN_LINKS, + IFLA_BOND_LP_INTERVAL, + IFLA_BOND_PACKETS_PER_SLAVE, __IFLA_BOND_MAX, }; diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index a806687a..fe119205 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -173,6 +173,8 @@ enum { TCA_TBF_PTAB, TCA_TBF_RATE64, TCA_TBF_PRATE64, + TCA_TBF_BURST, + TCA_TBF_PBURST, __TCA_TBF_MAX, }; @@ -790,4 +792,29 @@ struct tc_fq_qd_stats { __u32 throttled_flows; __u32 pad; }; + +/* Heavy-Hitter Filter */ + +enum { + TCA_HHF_UNSPEC, + TCA_HHF_BACKLOG_LIMIT, + TCA_HHF_QUANTUM, + TCA_HHF_HH_FLOWS_LIMIT, + TCA_HHF_RESET_TIMEOUT, + TCA_HHF_ADMIT_BYTES, + TCA_HHF_EVICT_TIMEOUT, + TCA_HHF_NON_HH_WEIGHT, + __TCA_HHF_MAX +}; + +#define TCA_HHF_MAX (__TCA_HHF_MAX - 1) + +struct tc_hhf_xstats { + __u32 drop_overlimit; /* number of times max qdisc packet limit + * was hit + */ + __u32 hh_overlimit; /* number of times max heavy-hitters was hit */ + __u32 hh_tot_count; /* number of captured heavy-hitters so far */ + __u32 hh_cur_count; /* number of current heavy-hitters */ +}; #endif From af9cd91228379b637c8b18640608201ccf5e557e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 9 Jan 2014 22:45:49 -0800 Subject: [PATCH 07/16] Update to 3.13-rc6 + net-next headers --- include/linux/if_link.h | 14 ++++++++++++++ include/linux/netfilter.h | 1 + include/linux/pkt_sched.h | 26 ++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/include/linux/if_link.h b/include/linux/if_link.h index c6c93b95..7d8f543c 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -347,11 +347,25 @@ enum { IFLA_BOND_MIN_LINKS, IFLA_BOND_LP_INTERVAL, IFLA_BOND_PACKETS_PER_SLAVE, + IFLA_BOND_AD_LACP_RATE, + IFLA_BOND_AD_SELECT, + IFLA_BOND_AD_INFO, __IFLA_BOND_MAX, }; #define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) +enum { + IFLA_BOND_AD_INFO_AGGREGATOR, + IFLA_BOND_AD_INFO_NUM_PORTS, + IFLA_BOND_AD_INFO_ACTOR_KEY, + IFLA_BOND_AD_INFO_PARTNER_KEY, + IFLA_BOND_AD_INFO_PARTNER_MAC, + __IFLA_BOND_AD_INFO_MAX, +}; + +#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) + /* SR-IOV virtual function management section */ enum { diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f05c3d92..be0bc182 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -51,6 +51,7 @@ enum nf_inet_hooks { enum { NFPROTO_UNSPEC = 0, + NFPROTO_INET = 1, NFPROTO_IPV4 = 2, NFPROTO_ARP = 3, NFPROTO_BRIDGE = 7, diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index fe119205..d62316ba 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -525,6 +525,7 @@ enum { TCA_NETEM_LOSS, TCA_NETEM_RATE, TCA_NETEM_ECN, + TCA_NETEM_RATE64, __TCA_NETEM_MAX, }; @@ -817,4 +818,29 @@ struct tc_hhf_xstats { __u32 hh_tot_count; /* number of captured heavy-hitters so far */ __u32 hh_cur_count; /* number of current heavy-hitters */ }; + +/* PIE */ +enum { + TCA_PIE_UNSPEC, + TCA_PIE_TARGET, + TCA_PIE_LIMIT, + TCA_PIE_TUPDATE, + TCA_PIE_ALPHA, + TCA_PIE_BETA, + TCA_PIE_ECN, + TCA_PIE_BYTEMODE, + __TCA_PIE_MAX +}; +#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) + +struct tc_pie_xstats { + __u32 prob; /* current probability */ + __u32 delay; /* current delay in ms */ + __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to pie_action */ + __u32 overlimit; /* dropped due to lack of space in queue */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ +}; #endif From 37c9b94ed21d5779acc23d89a46d86b4df400ff3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 6 Jan 2014 10:17:09 +0100 Subject: [PATCH 08/16] add support for extended ifa_flags Signed-off-by: Jiri Pirko --- ip/ipaddress.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index d02eaaf8..1e3f22c5 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -541,6 +541,13 @@ static int set_lifetime(unsigned int *lifetime, char *argv) return 0; } +static unsigned int get_ifa_flags(struct ifaddrmsg *ifa, + struct rtattr *ifa_flags_attr) +{ + return ifa_flags_attr ? rta_getattr_u32(ifa_flags_attr) : + ifa->ifa_flags; +} + int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) { @@ -567,6 +574,8 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, parse_rtattr(rta_tb, IFA_MAX, IFA_RTA(ifa), n->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); + ifa_flags = get_ifa_flags(ifa, rta_tb[IFA_FLAGS]); + if (!rta_tb[IFA_LOCAL]) rta_tb[IFA_LOCAL] = rta_tb[IFA_ADDRESS]; if (!rta_tb[IFA_ADDRESS]) @@ -576,7 +585,7 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, return 0; if ((filter.scope^ifa->ifa_scope)&filter.scopemask) return 0; - if ((filter.flags^ifa->ifa_flags)&filter.flagmask) + if ((filter.flags ^ ifa_flags) & filter.flagmask) return 0; if (filter.label) { SPRINT_BUF(b1); @@ -670,36 +679,35 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, abuf, sizeof(abuf))); } fprintf(fp, "scope %s ", rtnl_rtscope_n2a(ifa->ifa_scope, b1, sizeof(b1))); - ifa_flags = ifa->ifa_flags; - if (ifa->ifa_flags&IFA_F_SECONDARY) { + if (ifa_flags & IFA_F_SECONDARY) { ifa_flags &= ~IFA_F_SECONDARY; if (ifa->ifa_family == AF_INET6) fprintf(fp, "temporary "); else fprintf(fp, "secondary "); } - if (ifa->ifa_flags&IFA_F_TENTATIVE) { + if (ifa_flags & IFA_F_TENTATIVE) { ifa_flags &= ~IFA_F_TENTATIVE; fprintf(fp, "tentative "); } - if (ifa->ifa_flags&IFA_F_DEPRECATED) { + if (ifa_flags & IFA_F_DEPRECATED) { ifa_flags &= ~IFA_F_DEPRECATED; deprecated = 1; fprintf(fp, "deprecated "); } - if (ifa->ifa_flags&IFA_F_HOMEADDRESS) { + if (ifa_flags & IFA_F_HOMEADDRESS) { ifa_flags &= ~IFA_F_HOMEADDRESS; fprintf(fp, "home "); } - if (ifa->ifa_flags&IFA_F_NODAD) { + if (ifa_flags & IFA_F_NODAD) { ifa_flags &= ~IFA_F_NODAD; fprintf(fp, "nodad "); } - if (!(ifa->ifa_flags&IFA_F_PERMANENT)) { + if (!(ifa_flags & IFA_F_PERMANENT)) { fprintf(fp, "dynamic "); } else ifa_flags &= ~IFA_F_PERMANENT; - if (ifa->ifa_flags&IFA_F_DADFAILED) { + if (ifa_flags & IFA_F_DADFAILED) { ifa_flags &= ~IFA_F_DADFAILED; fprintf(fp, "dadfailed "); } @@ -926,6 +934,8 @@ static void ipaddr_filter(struct nlmsg_chain *linfo, struct nlmsg_chain *ainfo) for (a = ainfo->head; a; a = a->next) { struct nlmsghdr *n = &a->h; struct ifaddrmsg *ifa = NLMSG_DATA(n); + struct rtattr *tb[IFA_MAX + 1]; + unsigned int ifa_flags; if (ifa->ifa_index != ifi->ifi_index) continue; @@ -934,11 +944,13 @@ static void ipaddr_filter(struct nlmsg_chain *linfo, struct nlmsg_chain *ainfo) continue; if ((filter.scope^ifa->ifa_scope)&filter.scopemask) continue; - if ((filter.flags^ifa->ifa_flags)&filter.flagmask) + + parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(n)); + ifa_flags = get_ifa_flags(ifa, tb[IFA_FLAGS]); + + if ((filter.flags ^ ifa_flags) & filter.flagmask) continue; if (filter.pfx.family || filter.label) { - struct rtattr *tb[IFA_MAX+1]; - parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(n)); if (!tb[IFA_LOCAL]) tb[IFA_LOCAL] = tb[IFA_ADDRESS]; @@ -1252,6 +1264,7 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) __u32 preferred_lft = INFINITY_LIFE_TIME; __u32 valid_lft = INFINITY_LIFE_TIME; struct ifa_cacheinfo cinfo; + unsigned int ifa_flags = 0; memset(&req, 0, sizeof(req)); @@ -1329,9 +1342,9 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) if (set_lifetime(&preferred_lft, *argv)) invarg("preferred_lft value", *argv); } else if (strcmp(*argv, "home") == 0) { - req.ifa.ifa_flags |= IFA_F_HOMEADDRESS; + ifa_flags |= IFA_F_HOMEADDRESS; } else if (strcmp(*argv, "nodad") == 0) { - req.ifa.ifa_flags |= IFA_F_NODAD; + ifa_flags |= IFA_F_NODAD; } else { if (strcmp(*argv, "local") == 0) { NEXT_ARG(); @@ -1349,6 +1362,9 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) } argc--; argv++; } + req.ifa.ifa_flags = ifa_flags; + addattr32(&req.n, sizeof(req), IFA_FLAGS, ifa_flags); + if (d == NULL) { fprintf(stderr, "Not enough information: \"dev\" argument is required.\n"); return -1; From 80dd880dd099de8bde08cb07505e2c07a2e9c065 Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Tue, 7 Jan 2014 22:00:39 -0800 Subject: [PATCH 09/16] PIE: Proportional Integral controller Enhanced Proportional Integral controller Enhanced (PIE) is a scheduler to address the bufferbloat problem. We present here a lightweight design, PIE(Proportional Integral controller Enhanced) that can effectively control the average queueing latency to a target value. Simulation results, theoretical analysis and Linux testbed results have shown that PIE can ensure low latency and achieve high link utilization under various congestion situations. The design does not require per-packet timestamp, so it incurs very small overhead and is simple enough to implement in both hardware and software. " For more information, please see technical paper about PIE in the IEEE Conference on High Performance Switching and Routing 2013. A copy of the paper can be found at ftp://ftpeng.cisco.com/pie/. Please also refer to the IETF draft submission at http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 All relevant code, documents and test scripts and results can be found at ftp://ftpeng.cisco.com/pie/. For problems with the iproute2/tc or Linux kernel code, please contact Vijay Subramanian (vijaynsu@cisco.com or subramanian.vijay@gmail.com) Mythili Prabhu (mysuryan@cisco.com) Signed-off-by: Vijay Subramanian Signed-off-by: Mythili Prabhu CC: Dave Taht --- tc/Makefile | 1 + tc/q_pie.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 tc/q_pie.c diff --git a/tc/Makefile b/tc/Makefile index 84215c06..b6337714 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -53,6 +53,7 @@ TCMODULES += q_mqprio.o TCMODULES += q_codel.o TCMODULES += q_fq_codel.o TCMODULES += q_fq.o +TCMODULES += q_pie.o ifeq ($(TC_CONFIG_IPSET), y) ifeq ($(TC_CONFIG_XT), y) diff --git a/tc/q_pie.c b/tc/q_pie.c new file mode 100644 index 00000000..193b05de --- /dev/null +++ b/tc/q_pie.c @@ -0,0 +1,218 @@ +/* Copyright (C) 2013 Cisco Systems, Inc, 2013. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Vijay Subramanian + * Author: Mythili Prabhu + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" + +static void explain(void) +{ + fprintf(stderr, "Usage: ... pie [ limit PACKETS ][ target TIME us]\n"); + fprintf(stderr, " [ tupdate TIME us][ alpha ALPHA ]"); + fprintf(stderr, "[beta BETA ][bytemode | nobytemode][ecn | noecn ]\n"); +} + +#define ALPHA_MAX 32 +#define ALPHA_MIN 0 +#define BETA_MAX 32 +#define BETA_MIN 0 + +static int pie_parse_opt(struct qdisc_util *qu, int argc, char **argv, + struct nlmsghdr *n) +{ + unsigned int limit = 0; + unsigned int target = 0; + unsigned int tupdate = 0; + unsigned int alpha = 0; + unsigned int beta = 0; + int ecn = -1; + int bytemode = -1; + struct rtattr *tail; + + while (argc > 0) { + if (strcmp(*argv, "limit") == 0) { + NEXT_ARG(); + if (get_unsigned(&limit, *argv, 0)) { + fprintf(stderr, "Illegal \"limit\"\n"); + return -1; + } + } else if (strcmp(*argv, "target") == 0) { + NEXT_ARG(); + if (get_time(&target, *argv)) { + fprintf(stderr, "Illegal \"target\"\n"); + return -1; + } + } else if (strcmp(*argv, "tupdate") == 0) { + NEXT_ARG(); + if (get_time(&tupdate, *argv)) { + fprintf(stderr, "Illegal \"tupdate\"\n"); + return -1; + } + } else if (strcmp(*argv, "alpha") == 0) { + NEXT_ARG(); + if (get_unsigned(&alpha, *argv, 0) || + (alpha > ALPHA_MAX) || (alpha < ALPHA_MIN)) { + fprintf(stderr, "Illegal \"alpha\"\n"); + return -1; + } + } else if (strcmp(*argv, "beta") == 0) { + NEXT_ARG(); + if (get_unsigned(&beta, *argv, 0) || + (beta > BETA_MAX) || (beta < BETA_MIN)) { + fprintf(stderr, "Illegal \"beta\"\n"); + return -1; + } + } else if (strcmp(*argv, "ecn") == 0) { + ecn = 1; + } else if (strcmp(*argv, "noecn") == 0) { + ecn = 0; + } else if (strcmp(*argv, "bytemode") == 0) { + bytemode = 1; + } else if (strcmp(*argv, "nobytemode") == 0) { + bytemode = 0; + } else if (strcmp(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--; + argv++; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + if (limit) + addattr_l(n, 1024, TCA_PIE_LIMIT, &limit, sizeof(limit)); + if (tupdate) + addattr_l(n, 1024, TCA_PIE_TUPDATE, &tupdate, sizeof(tupdate)); + if (target) + addattr_l(n, 1024, TCA_PIE_TARGET, &target, sizeof(target)); + if (alpha) + addattr_l(n, 1024, TCA_PIE_ALPHA, &alpha, sizeof(alpha)); + if (beta) + addattr_l(n, 1024, TCA_PIE_BETA, &beta, sizeof(beta)); + if (ecn != -1) + addattr_l(n, 1024, TCA_PIE_ECN, &ecn, sizeof(ecn)); + if (bytemode != -1) + addattr_l(n, 1024, TCA_PIE_BYTEMODE, &bytemode, + sizeof(bytemode)); + + tail->rta_len = (void *)NLMSG_TAIL(n) - (void *)tail; + return 0; +} + +static int pie_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_PIE_MAX + 1]; + unsigned int limit; + unsigned int tupdate; + unsigned int target; + unsigned int alpha; + unsigned int beta; + unsigned ecn; + unsigned bytemode; + SPRINT_BUF(b1); + + if (opt == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_PIE_MAX, opt); + + if (tb[TCA_PIE_LIMIT] && + RTA_PAYLOAD(tb[TCA_PIE_LIMIT]) >= sizeof(__u32)) { + limit = rta_getattr_u32(tb[TCA_PIE_LIMIT]); + fprintf(f, "limit %up ", limit); + } + if (tb[TCA_PIE_TARGET] && + RTA_PAYLOAD(tb[TCA_PIE_TARGET]) >= sizeof(__u32)) { + target = rta_getattr_u32(tb[TCA_PIE_TARGET]); + fprintf(f, "target %s ", sprint_time(target, b1)); + } + if (tb[TCA_PIE_TUPDATE] && + RTA_PAYLOAD(tb[TCA_PIE_TUPDATE]) >= sizeof(__u32)) { + tupdate = rta_getattr_u32(tb[TCA_PIE_TUPDATE]); + fprintf(f, "tupdate %s ", sprint_time(tupdate, b1)); + } + if (tb[TCA_PIE_ALPHA] && + RTA_PAYLOAD(tb[TCA_PIE_ALPHA]) >= sizeof(__u32)) { + alpha = rta_getattr_u32(tb[TCA_PIE_ALPHA]); + fprintf(f, "alpha %u ", alpha); + } + if (tb[TCA_PIE_BETA] && + RTA_PAYLOAD(tb[TCA_PIE_BETA]) >= sizeof(__u32)) { + beta = rta_getattr_u32(tb[TCA_PIE_BETA]); + fprintf(f, "beta %u ", beta); + } + + if (tb[TCA_PIE_ECN] && RTA_PAYLOAD(tb[TCA_PIE_ECN]) >= sizeof(__u32)) { + ecn = rta_getattr_u32(tb[TCA_PIE_ECN]); + if (ecn) + fprintf(f, "ecn "); + } + + if (tb[TCA_PIE_BYTEMODE] && + RTA_PAYLOAD(tb[TCA_PIE_BYTEMODE]) >= sizeof(__u32)) { + bytemode = rta_getattr_u32(tb[TCA_PIE_BYTEMODE]); + if (bytemode) + fprintf(f, "bytemode "); + } + + return 0; +} + +static int pie_print_xstats(struct qdisc_util *qu, FILE *f, + struct rtattr *xstats) +{ + struct tc_pie_xstats *st; + + if (xstats == NULL) + return 0; + + if (RTA_PAYLOAD(xstats) < sizeof(*st)) + return -1; + + st = RTA_DATA(xstats); + /*prob is returned as a fracion of maximum integer value */ + fprintf(f, "prob %f delay %uus avg_dq_rate %u\n", + (double)st->prob / (double)0xffffffff, st->delay, + st->avg_dq_rate); + fprintf(f, "pkts_in %u overlimit %u dropped %u maxq %u ecn_mark %u\n", + st->packets_in, st->overlimit, st->dropped, st->maxq, + st->ecn_mark); + return 0; + +} + +struct qdisc_util pie_qdisc_util = { + .id = "pie", + .parse_qopt = pie_parse_opt, + .print_qopt = pie_print_opt, + .print_xstats = pie_print_xstats, +}; From 63d127b05dff4d21e8748b2ba9e0e6372897c383 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Fri, 3 Jan 2014 18:45:38 -0800 Subject: [PATCH 10/16] iproute2: finish support for bonding attributes Add support for bonding attributes just added to net-next. On set, allow string or number value for enumerated attributes. On show, use always use string value for attribute. Signed-off-by: Scott Feldman --- ip/iplink_bond.c | 458 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 451 insertions(+), 7 deletions(-) diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c index 3fb7f4f4..f0e5ab11 100644 --- a/ip/iplink_bond.c +++ b/ip/iplink_bond.c @@ -7,41 +7,165 @@ * 2 of the License, or (at your option) any later version. * * Authors: Jiri Pirko + * Scott Feldman */ #include #include #include #include +#include #include #include "rt_names.h" #include "utils.h" #include "ip_common.h" +#define BOND_MAX_ARP_TARGETS 16 + +static const char *mode_tbl[] = { + "balance-rr", + "active-backup", + "balance-xor", + "broadcast", + "802.3ad", + "balance-tlb", + "balance-alb", + NULL, +}; + +static const char *arp_validate_tbl[] = { + "none", + "active", + "backup", + "all", + NULL, +}; + +static const char *arp_all_targets_tbl[] = { + "any", + "all", + NULL, +}; + +static const char *primary_reselect_tbl[] = { + "always", + "better", + "failure", + NULL, +}; + +static const char *fail_over_mac_tbl[] = { + "none", + "active", + "follow", + NULL, +}; + +static const char *xmit_hash_policy_tbl[] = { + "layer2", + "layer3+4", + "layer2+3", + "encap2+3", + "encap3+4", + NULL, +}; + +static const char *lacp_rate_tbl[] = { + "slow", + "fast", + NULL, +}; + +static const char *ad_select_tbl[] = { + "stable", + "bandwidth", + "count", + NULL, +}; + +static const char *get_name(const char **tbl, int index) +{ + int i; + + for (i = 0; tbl[i]; i++) + if (i == index) + return tbl[i]; + + return "UNKNOWN"; +} + +static int get_index(const char **tbl, char *name) +{ + int i, index; + + /* check for integer index passed in instead of name */ + if (get_integer(&index, name, 10) == 0) + for (i = 0; tbl[i]; i++) + if (i == index) + return i; + + for (i = 0; tbl[i]; i++) + if (strncmp(tbl[i], name, strlen(tbl[i])) == 0) + return i; + + return -1; +} + static void explain(void) { fprintf(stderr, "Usage: ... bond [ mode BONDMODE ] [ active_slave SLAVE_DEV ]\n" - " [ clear_active_slave ]\n" + " [ clear_active_slave ] [ miimon MIIMON ]\n" + " [ updelay UPDELAY ] [ downdelay DOWNDELAY ]\n" + " [ use_carrier USE_CARRIER ]\n" + " [ arp_interval ARP_INTERVAL ]\n" + " [ arp_validate ARP_VALIDATE ]\n" + " [ arp_all_targets ARP_ALL_TARGETS ]\n" + " [ arp_ip_target [ ARP_IP_TARGET, ... ] ]\n" + " [ primary SLAVE_DEV ]\n" + " [ primary_reselect PRIMARY_RESELECT ]\n" + " [ fail_over_mac FAIL_OVER_MAC ]\n" + " [ xmit_hash_policy XMIT_HASH_POLICY ]\n" + " [ resend_igmp RESEND_IGMP ]\n" + " [ num_grat_arp|num_unsol_na NUM_GRAT_ARP|NUM_UNSOL_NA ]\n" + " [ all_slaves_active ALL_SLAVES_ACTIVE ]\n" + " [ min_links MIN_LINKS ]\n" + " [ lp_interval LP_INTERVAL ]\n" + " [ packets_per_slave PACKETS_PER_SLAVE ]\n" + " [ lacp_rate LACP_RATE ]\n" + " [ ad_select AD_SELECT ]\n" "\n" - "BONDMODE := 0-6\n" + "BONDMODE := balance-rr|active-backup|balance-xor|broadcast|802.3ad|balance-tlb|balance-alb\n" + "ARP_VALIDATE := none|active|backup|all\n" + "ARP_ALL_TARGETS := any|all\n" + "PRIMARY_RESELECT := always|better|failure\n" + "FAIL_OVER_MAC := none|active|follow\n" + "XMIT_HASH_POLICY := layer2|layer2+3|layer3+4\n" + "LACP_RATE := slow|fast\n" + "AD_SELECT := stable|bandwidth|count\n" ); } static int bond_parse_opt(struct link_util *lu, int argc, char **argv, struct nlmsghdr *n) { - __u8 mode; + __u8 mode, use_carrier, primary_reselect, fail_over_mac; + __u8 xmit_hash_policy, num_peer_notif, all_slaves_active; + __u8 lacp_rate, ad_select; + __u32 miimon, updelay, downdelay, arp_interval, arp_validate; + __u32 arp_all_targets, resend_igmp, min_links, lp_interval; + __u32 packets_per_slave; unsigned ifindex; while (argc > 0) { if (matches(*argv, "mode") == 0) { NEXT_ARG(); - if (get_u8(&mode, *argv, 0)) { - invarg("mode %s is invalid", *argv); + if (get_index(mode_tbl, *argv) < 0) { + invarg("invalid mode", *argv); return -1; } + mode = get_index(mode_tbl, *argv); addattr8(n, 1024, IFLA_BOND_MODE, mode); } else if (matches(*argv, "active_slave") == 0) { NEXT_ARG(); @@ -51,6 +175,170 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv, addattr32(n, 1024, IFLA_BOND_ACTIVE_SLAVE, ifindex); } else if (matches(*argv, "clear_active_slave") == 0) { addattr32(n, 1024, IFLA_BOND_ACTIVE_SLAVE, 0); + } else if (matches(*argv, "miimon") == 0) { + NEXT_ARG(); + if (get_u32(&miimon, *argv, 0)) { + invarg("invalid miimon", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_MIIMON, miimon); + } else if (matches(*argv, "updelay") == 0) { + NEXT_ARG(); + if (get_u32(&updelay, *argv, 0)) { + invarg("invalid updelay", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_UPDELAY, updelay); + } else if (matches(*argv, "downdelay") == 0) { + NEXT_ARG(); + if (get_u32(&downdelay, *argv, 0)) { + invarg("invalid downdelay", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_DOWNDELAY, downdelay); + } else if (matches(*argv, "use_carrier") == 0) { + NEXT_ARG(); + if (get_u8(&use_carrier, *argv, 0)) { + invarg("invalid use_carrier", *argv); + return -1; + } + addattr8(n, 1024, IFLA_BOND_USE_CARRIER, use_carrier); + } else if (matches(*argv, "arp_interval") == 0) { + NEXT_ARG(); + if (get_u32(&arp_interval, *argv, 0)) { + invarg("invalid arp_interval", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_ARP_INTERVAL, arp_interval); + } else if (matches(*argv, "arp_ip_target") == 0) { + struct rtattr * nest = addattr_nest(n, 1024, + IFLA_BOND_ARP_IP_TARGET); + if (NEXT_ARG_OK()) { + NEXT_ARG(); + char *targets = strdupa(*argv); + char *target = strtok(targets, ","); + int i; + + for(i = 0; target && i < BOND_MAX_ARP_TARGETS; i++) { + __u32 addr = get_addr32(target); + addattr32(n, 1024, i, addr); + target = strtok(NULL, ","); + } + addattr_nest_end(n, nest); + } + addattr_nest_end(n, nest); + } else if (matches(*argv, "arp_validate") == 0) { + NEXT_ARG(); + if (get_index(arp_validate_tbl, *argv) < 0) { + invarg("invalid arp_validate", *argv); + return -1; + } + arp_validate = get_index(arp_validate_tbl, *argv); + addattr32(n, 1024, IFLA_BOND_ARP_VALIDATE, arp_validate); + } else if (matches(*argv, "arp_all_targets") == 0) { + NEXT_ARG(); + if (get_index(arp_all_targets_tbl, *argv) < 0) { + invarg("invalid arp_all_targets", *argv); + return -1; + } + arp_all_targets = get_index(arp_all_targets_tbl, *argv); + addattr32(n, 1024, IFLA_BOND_ARP_ALL_TARGETS, arp_all_targets); + } else if (matches(*argv, "primary") == 0) { + NEXT_ARG(); + ifindex = if_nametoindex(*argv); + if (!ifindex) + return -1; + addattr32(n, 1024, IFLA_BOND_PRIMARY, ifindex); + } else if (matches(*argv, "primary_reselect") == 0) { + NEXT_ARG(); + if (get_index(primary_reselect_tbl, *argv) < 0) { + invarg("invalid primary_reselect", *argv); + return -1; + } + primary_reselect = get_index(primary_reselect_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_PRIMARY_RESELECT, + primary_reselect); + } else if (matches(*argv, "fail_over_mac") == 0) { + NEXT_ARG(); + if (get_index(fail_over_mac_tbl, *argv) < 0) { + invarg("invalid fail_over_mac", *argv); + return -1; + } + fail_over_mac = get_index(fail_over_mac_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_FAIL_OVER_MAC, + fail_over_mac); + } else if (matches(*argv, "xmit_hash_policy") == 0) { + NEXT_ARG(); + if (get_index(xmit_hash_policy_tbl, *argv) < 0) { + invarg("invalid xmit_hash_policy", *argv); + return -1; + } + xmit_hash_policy = get_index(xmit_hash_policy_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_XMIT_HASH_POLICY, + xmit_hash_policy); + } else if (matches(*argv, "resend_igmp") == 0) { + NEXT_ARG(); + if (get_u32(&resend_igmp, *argv, 0)) { + invarg("invalid resend_igmp", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_RESEND_IGMP, resend_igmp); + } else if (matches(*argv, "num_grat_arp") == 0 || + matches(*argv, "num_unsol_na") == 0) { + NEXT_ARG(); + if (get_u8(&num_peer_notif, *argv, 0)) { + invarg("invalid num_grat_arp|num_unsol_na", + *argv); + return -1; + } + addattr8(n, 1024, IFLA_BOND_NUM_PEER_NOTIF, + num_peer_notif); + } else if (matches(*argv, "all_slaves_active") == 0) { + NEXT_ARG(); + if (get_u8(&all_slaves_active, *argv, 0)) { + invarg("invalid all_slaves_active", *argv); + return -1; + } + addattr8(n, 1024, IFLA_BOND_ALL_SLAVES_ACTIVE, + all_slaves_active); + } else if (matches(*argv, "min_links") == 0) { + NEXT_ARG(); + if (get_u32(&min_links, *argv, 0)) { + invarg("invalid min_links", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_MIN_LINKS, min_links); + } else if (matches(*argv, "lp_interval") == 0) { + NEXT_ARG(); + if (get_u32(&lp_interval, *argv, 0)) { + invarg("invalid lp_interval", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_LP_INTERVAL, lp_interval); + } else if (matches(*argv, "packets_per_slave") == 0) { + NEXT_ARG(); + if (get_u32(&packets_per_slave, *argv, 0)) { + invarg("invalid packets_per_slave", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_PACKETS_PER_SLAVE, + packets_per_slave); + } else if (matches(*argv, "lacp_rate") == 0) { + NEXT_ARG(); + if (get_index(lacp_rate_tbl, *argv) < 0) { + invarg("invalid lacp_rate", *argv); + return -1; + } + lacp_rate = get_index(lacp_rate_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_AD_LACP_RATE, lacp_rate); + } else if (matches(*argv, "ad_select") == 0) { + NEXT_ARG(); + if (get_index(ad_select_tbl, *argv) < 0) { + invarg("invalid ad_select", *argv); + return -1; + } + ad_select = get_index(ad_select_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_AD_SELECT, ad_select); } else { fprintf(stderr, "bond: unknown command \"%s\"?\n", *argv); explain(); @@ -69,8 +357,11 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (!tb) return; - if (tb[IFLA_BOND_MODE]) - fprintf(f, "mode %u ", rta_getattr_u8(tb[IFLA_BOND_MODE])); + if (tb[IFLA_BOND_MODE]) { + const char *mode = get_name(mode_tbl, + rta_getattr_u8(tb[IFLA_BOND_MODE])); + fprintf(f, "mode %s ", mode); + } if (tb[IFLA_BOND_ACTIVE_SLAVE] && (ifindex = rta_getattr_u32(tb[IFLA_BOND_ACTIVE_SLAVE]))) { @@ -82,6 +373,159 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) else fprintf(f, "active_slave %u ", ifindex); } + + if (tb[IFLA_BOND_MIIMON]) + fprintf(f, "miimon %u ", rta_getattr_u32(tb[IFLA_BOND_MIIMON])); + + if (tb[IFLA_BOND_UPDELAY]) + fprintf(f, "updelay %u ", rta_getattr_u32(tb[IFLA_BOND_UPDELAY])); + + if (tb[IFLA_BOND_DOWNDELAY]) + fprintf(f, "downdelay %u ", + rta_getattr_u32(tb[IFLA_BOND_DOWNDELAY])); + + if (tb[IFLA_BOND_USE_CARRIER]) + fprintf(f, "use_carrier %u ", + rta_getattr_u8(tb[IFLA_BOND_USE_CARRIER])); + + if (tb[IFLA_BOND_ARP_INTERVAL]) + fprintf(f, "arp_interval %u ", + rta_getattr_u32(tb[IFLA_BOND_ARP_INTERVAL])); + + if (tb[IFLA_BOND_ARP_IP_TARGET]) { + struct rtattr *iptb[BOND_MAX_ARP_TARGETS + 1]; + char buf[INET_ADDRSTRLEN]; + int i; + + parse_rtattr_nested(iptb, BOND_MAX_ARP_TARGETS, + tb[IFLA_BOND_ARP_IP_TARGET]); + + if (iptb[0]) + fprintf(f, "arp_ip_target "); + + for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) { + if (iptb[i]) + fprintf(f, "%s", + rt_addr_n2a(AF_INET, + RTA_PAYLOAD(iptb[i]), + RTA_DATA(iptb[i]), + buf, + INET_ADDRSTRLEN)); + if (i < BOND_MAX_ARP_TARGETS-1 && iptb[i+1]) + fprintf(f, ","); + } + + if (iptb[0]) + fprintf(f, " "); + } + + if (tb[IFLA_BOND_ARP_VALIDATE]) { + const char *arp_validate = get_name(arp_validate_tbl, + rta_getattr_u32(tb[IFLA_BOND_ARP_VALIDATE])); + fprintf(f, "arp_validate %s ", arp_validate); + } + + if (tb[IFLA_BOND_ARP_ALL_TARGETS]) { + const char *arp_all_targets = get_name(arp_all_targets_tbl, + rta_getattr_u32(tb[IFLA_BOND_ARP_ALL_TARGETS])); + fprintf(f, "arp_all_target %s ", arp_all_targets); + } + + if (tb[IFLA_BOND_PRIMARY] && + (ifindex = rta_getattr_u32(tb[IFLA_BOND_PRIMARY]))) { + char buf[IFNAMSIZ]; + const char *n = if_indextoname(ifindex, buf); + + if (n) + fprintf(f, "primary %s ", n); + else + fprintf(f, "primary %u ", ifindex); + } + + if (tb[IFLA_BOND_PRIMARY_RESELECT]) { + const char *primary_reselect = get_name(primary_reselect_tbl, + rta_getattr_u8(tb[IFLA_BOND_PRIMARY_RESELECT])); + fprintf(f, "primary_reselect %s ", primary_reselect); + } + + if (tb[IFLA_BOND_FAIL_OVER_MAC]) { + const char *fail_over_mac = get_name(fail_over_mac_tbl, + rta_getattr_u8(tb[IFLA_BOND_FAIL_OVER_MAC])); + fprintf(f, "fail_over_mac %s ", fail_over_mac); + } + + if (tb[IFLA_BOND_XMIT_HASH_POLICY]) { + const char *xmit_hash_policy = get_name(xmit_hash_policy_tbl, + rta_getattr_u8(tb[IFLA_BOND_XMIT_HASH_POLICY])); + fprintf(f, "xmit_hash_policy %s ", xmit_hash_policy); + } + + if (tb[IFLA_BOND_RESEND_IGMP]) + fprintf(f, "resend_igmp %u ", + rta_getattr_u32(tb[IFLA_BOND_RESEND_IGMP])); + + if (tb[IFLA_BOND_NUM_PEER_NOTIF]) + fprintf(f, "num_grat_arp %u ", + rta_getattr_u8(tb[IFLA_BOND_NUM_PEER_NOTIF])); + + if (tb[IFLA_BOND_ALL_SLAVES_ACTIVE]) + fprintf(f, "all_slaves_active %u ", + rta_getattr_u8(tb[IFLA_BOND_ALL_SLAVES_ACTIVE])); + + if (tb[IFLA_BOND_MIN_LINKS]) + fprintf(f, "min_links %u ", + rta_getattr_u32(tb[IFLA_BOND_MIN_LINKS])); + + if (tb[IFLA_BOND_LP_INTERVAL]) + fprintf(f, "lp_interval %u ", + rta_getattr_u32(tb[IFLA_BOND_LP_INTERVAL])); + + if (tb[IFLA_BOND_PACKETS_PER_SLAVE]) + fprintf(f, "packets_per_slave %u ", + rta_getattr_u32(tb[IFLA_BOND_PACKETS_PER_SLAVE])); + + if (tb[IFLA_BOND_AD_LACP_RATE]) { + const char *lacp_rate = get_name(lacp_rate_tbl, + rta_getattr_u8(tb[IFLA_BOND_AD_LACP_RATE])); + fprintf(f, "lacp_rate %s ", lacp_rate); + } + + if (tb[IFLA_BOND_AD_SELECT]) { + const char *ad_select = get_name(ad_select_tbl, + rta_getattr_u8(tb[IFLA_BOND_AD_SELECT])); + fprintf(f, "ad_select %s ", ad_select); + } + + if (tb[IFLA_BOND_AD_INFO]) { + struct rtattr *adtb[IFLA_BOND_AD_INFO_MAX + 1]; + + parse_rtattr_nested(adtb, IFLA_BOND_AD_INFO_MAX, + tb[IFLA_BOND_AD_INFO]); + + if (adtb[IFLA_BOND_AD_INFO_AGGREGATOR]) + fprintf(f, "ad_aggregator %d ", + rta_getattr_u16(adtb[IFLA_BOND_AD_INFO_AGGREGATOR])); + + if (adtb[IFLA_BOND_AD_INFO_NUM_PORTS]) + fprintf(f, "ad_num_ports %d ", + rta_getattr_u16(adtb[IFLA_BOND_AD_INFO_NUM_PORTS])); + + if (adtb[IFLA_BOND_AD_INFO_ACTOR_KEY]) + fprintf(f, "ad_actor_key %d ", + rta_getattr_u16(adtb[IFLA_BOND_AD_INFO_ACTOR_KEY])); + + if (adtb[IFLA_BOND_AD_INFO_PARTNER_KEY]) + fprintf(f, "ad_partner_key %d ", + rta_getattr_u16(adtb[IFLA_BOND_AD_INFO_PARTNER_KEY])); + + if (adtb[IFLA_BOND_AD_INFO_PARTNER_MAC]) { + unsigned char *p = + RTA_DATA(adtb[IFLA_BOND_AD_INFO_PARTNER_MAC]); + SPRINT_BUF(b); + fprintf(f, "ad_partner_mac %s ", + ll_addr_n2a(p, ETH_ALEN, 0, b, sizeof(b))); + } + } } struct link_util bond_link_util = { From 3ba9ccda87a7e17815e531b9205cdf03162f27dc Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 20 Jan 2014 12:28:42 -0800 Subject: [PATCH 11/16] Update headers files from net-next --- include/linux/if_addr.h | 1 + include/linux/if_arp.h | 1 + include/linux/if_link.h | 13 +++++++++++++ include/linux/tc_act/tc_ipt.h | 1 + include/linux/tcp_metrics.h | 2 ++ 5 files changed, 18 insertions(+) diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index 9ce823d3..cc375e42 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -49,6 +49,7 @@ enum { #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 #define IFA_F_MANAGETEMPADDR 0x100 +#define IFA_F_NOPREFIXROUTE 0x200 struct ifa_cacheinfo { __u32 ifa_prefered; diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 0da6f5e7..d001bdb2 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -94,6 +94,7 @@ #define ARPHRD_CAIF 822 /* CAIF media type */ #define ARPHRD_IP6GRE 823 /* GRE over IPv6 */ #define ARPHRD_NETLINK 824 /* Netlink header */ +#define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 7d8f543c..9cb5909c 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -144,6 +144,7 @@ enum { IFLA_NUM_RX_QUEUES, IFLA_CARRIER, IFLA_PHYS_PORT_ID, + IFLA_SLAVE, __IFLA_MAX }; @@ -366,6 +367,18 @@ enum { #define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) +enum { + IFLA_SLAVE_STATE, + IFLA_SLAVE_MII_STATUS, + IFLA_SLAVE_LINK_FAILURE_COUNT, + IFLA_SLAVE_PERM_HWADDR, + IFLA_SLAVE_QUEUE_ID, + IFLA_SLAVE_AD_AGGREGATOR_ID, + __IFLA_SLAVE_MAX, +}; + +#define IFLA_SLAVE_MAX (__IFLA_SLAVE_MAX - 1) + /* SR-IOV virtual function management section */ enum { diff --git a/include/linux/tc_act/tc_ipt.h b/include/linux/tc_act/tc_ipt.h index a2335563..130aaadf 100644 --- a/include/linux/tc_act/tc_ipt.h +++ b/include/linux/tc_act/tc_ipt.h @@ -4,6 +4,7 @@ #include #define TCA_ACT_IPT 6 +#define TCA_ACT_XT 10 enum { TCA_IPT_UNSPEC, diff --git a/include/linux/tcp_metrics.h b/include/linux/tcp_metrics.h index cb5157b5..54a37b13 100644 --- a/include/linux/tcp_metrics.h +++ b/include/linux/tcp_metrics.h @@ -35,6 +35,8 @@ enum { TCP_METRICS_ATTR_FOPEN_SYN_DROPS, /* u16, count of drops */ TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, /* msecs age */ TCP_METRICS_ATTR_FOPEN_COOKIE, /* binary */ + TCP_METRICS_ATTR_SADDR_IPV4, /* u32 */ + TCP_METRICS_ATTR_SADDR_IPV6, /* binary */ __TCP_METRICS_ATTR_MAX, }; From 5b7e21c417beae648312c27ee2043e7c04134c92 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 6 Jan 2014 10:17:10 +0100 Subject: [PATCH 12/16] add support for IFA_F_MANAGETEMPADDR Signed-off-by: Jiri Pirko --- ip/ipaddress.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 1e3f22c5..b0d54fef 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -82,7 +82,7 @@ static void usage(void) fprintf(stderr, " tentative | deprecated | dadfailed | temporary |\n"); fprintf(stderr, " CONFFLAG-LIST ]\n"); fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n"); - fprintf(stderr, "CONFFLAG := [ home | nodad ]\n"); + fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr ]\n"); fprintf(stderr, "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n"); fprintf(stderr, "LFT := forever | SECONDS\n"); @@ -703,6 +703,10 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, ifa_flags &= ~IFA_F_NODAD; fprintf(fp, "nodad "); } + if (ifa_flags & IFA_F_MANAGETEMPADDR) { + ifa_flags &= ~IFA_F_MANAGETEMPADDR; + fprintf(fp, "mngtmpaddr "); + } if (!(ifa_flags & IFA_F_PERMANENT)) { fprintf(fp, "dynamic "); } else @@ -1126,6 +1130,9 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) } else if (strcmp(*argv, "nodad") == 0) { filter.flags |= IFA_F_NODAD; filter.flagmask |= IFA_F_NODAD; + } else if (strcmp(*argv, "mngtmpaddr") == 0) { + filter.flags |= IFA_F_MANAGETEMPADDR; + filter.flagmask |= IFA_F_MANAGETEMPADDR; } else if (strcmp(*argv, "dadfailed") == 0) { filter.flags |= IFA_F_DADFAILED; filter.flagmask |= IFA_F_DADFAILED; @@ -1345,6 +1352,8 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) ifa_flags |= IFA_F_HOMEADDRESS; } else if (strcmp(*argv, "nodad") == 0) { ifa_flags |= IFA_F_NODAD; + } else if (strcmp(*argv, "mngtmpaddr") == 0) { + ifa_flags |= IFA_F_MANAGETEMPADDR; } else { if (strcmp(*argv, "local") == 0) { NEXT_ARG(); From 58c69b226fb3adff1644914654d4fad99c2ee463 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 7 Jan 2014 21:46:28 +0100 Subject: [PATCH 13/16] add support for IFA_F_NOPREFIXROUTE Signed-off-by: Thomas Haller --- ip/ipaddress.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index b0d54fef..f794fa13 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -82,7 +82,7 @@ static void usage(void) fprintf(stderr, " tentative | deprecated | dadfailed | temporary |\n"); fprintf(stderr, " CONFFLAG-LIST ]\n"); fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n"); - fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr ]\n"); + fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute ]\n"); fprintf(stderr, "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n"); fprintf(stderr, "LFT := forever | SECONDS\n"); @@ -707,6 +707,10 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, ifa_flags &= ~IFA_F_MANAGETEMPADDR; fprintf(fp, "mngtmpaddr "); } + if (ifa_flags & IFA_F_NOPREFIXROUTE) { + ifa_flags &= ~IFA_F_NOPREFIXROUTE; + fprintf(fp, "noprefixroute "); + } if (!(ifa_flags & IFA_F_PERMANENT)) { fprintf(fp, "dynamic "); } else @@ -1133,6 +1137,9 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) } else if (strcmp(*argv, "mngtmpaddr") == 0) { filter.flags |= IFA_F_MANAGETEMPADDR; filter.flagmask |= IFA_F_MANAGETEMPADDR; + } else if (strcmp(*argv, "noprefixroute") == 0) { + filter.flags |= IFA_F_NOPREFIXROUTE; + filter.flagmask |= IFA_F_NOPREFIXROUTE; } else if (strcmp(*argv, "dadfailed") == 0) { filter.flags |= IFA_F_DADFAILED; filter.flagmask |= IFA_F_DADFAILED; @@ -1354,6 +1361,8 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) ifa_flags |= IFA_F_NODAD; } else if (strcmp(*argv, "mngtmpaddr") == 0) { ifa_flags |= IFA_F_MANAGETEMPADDR; + } else if (strcmp(*argv, "noprefixroute") == 0) { + ifa_flags |= IFA_F_NOPREFIXROUTE; } else { if (strcmp(*argv, "local") == 0) { NEXT_ARG(); From a01de0a336d96b74810f0e544a40983012aaee81 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 16 Jan 2014 11:09:13 +0800 Subject: [PATCH 14/16] tbf: support sending burst/mtu to kernel directly To avoid loss when transforming burst to buffer in userspace, send burst/mtu to kernel directly. Kernel commit 2e04ad424b("sch_tbf: add TBF_BURST/TBF_PBURST attribute") make it can handle burst/mtu. Cc: Eric Dumazet Signed-off-by: Yang Yingliang --- tc/q_tbf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tc/q_tbf.c b/tc/q_tbf.c index 2fbfd3b5..f3022b65 100644 --- a/tc/q_tbf.c +++ b/tc/q_tbf.c @@ -232,12 +232,14 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl tail = NLMSG_TAIL(n); addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); addattr_l(n, 2024, TCA_TBF_PARMS, &opt, sizeof(opt)); + addattr_l(n, 2124, TCA_TBF_BURST, &buffer, sizeof(buffer)); if (rate64 >= (1ULL << 32)) addattr_l(n, 2124, TCA_TBF_RATE64, &rate64, sizeof(rate64)); addattr_l(n, 3024, TCA_TBF_RTAB, rtab, 1024); if (opt.peakrate.rate) { if (prate64 >= (1ULL << 32)) addattr_l(n, 3124, TCA_TBF_PRATE64, &prate64, sizeof(prate64)); + addattr_l(n, 3224, TCA_TBF_PBURST, &mtu, sizeof(mtu)); addattr_l(n, 4096, TCA_TBF_PTAB, ptab, 1024); } tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; From dad2f72befc064d3ee8dea95b75b85c350cd2758 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 16 Jan 2014 11:09:14 +0800 Subject: [PATCH 15/16] netem: add 64bit rates support netem support 64bit rates start from linux-3.13. Add 64bit rates support in tc tools. tc qdisc show dev eth0 qdisc netem 1: dev eth4 root refcnt 2 limit 1000 rate 35Gbit Cc: Eric Dumazet Signed-off-by: Yang Yingliang Acked-by: Eric Dumazet --- tc/q_netem.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/tc/q_netem.c b/tc/q_netem.c index 9dd8712f..946007c5 100644 --- a/tc/q_netem.c +++ b/tc/q_netem.c @@ -183,6 +183,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, __s16 *dist_data = NULL; __u16 loss_type = NETEM_LOSS_UNSPEC; int present[__TCA_NETEM_MAX]; + __u64 rate64 = 0; memset(&cor, 0, sizeof(cor)); memset(&reorder, 0, sizeof(reorder)); @@ -391,7 +392,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, } else if (matches(*argv, "rate") == 0) { ++present[TCA_NETEM_RATE]; NEXT_ARG(); - if (get_rate(&rate.rate, *argv)) { + if (get_rate64(&rate64, *argv)) { explain1("rate"); return -1; } @@ -496,9 +497,18 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, addattr_nest_end(n, start); } - if (present[TCA_NETEM_RATE] && - addattr_l(n, 1024, TCA_NETEM_RATE, &rate, sizeof(rate)) < 0) - return -1; + if (present[TCA_NETEM_RATE]) { + if (rate64 >= (1ULL << 32)) { + if (addattr_l(n, 1024, + TCA_NETEM_RATE64, &rate64, sizeof(rate64)) < 0) + return -1; + rate.rate = ~0U; + } else { + rate.rate = rate64; + } + if (addattr_l(n, 1024, TCA_NETEM_RATE, &rate, sizeof(rate)) < 0) + return -1; + } if (dist_data) { if (addattr_l(n, MAX_DIST * sizeof(dist_data[0]), @@ -522,6 +532,7 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) struct tc_netem_qopt qopt; const struct tc_netem_rate *rate = NULL; int len = RTA_PAYLOAD(opt) - sizeof(qopt); + __u64 rate64 = 0; SPRINT_BUF(b1); if (opt == NULL) @@ -572,6 +583,11 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) return -1; ecn = RTA_DATA(tb[TCA_NETEM_ECN]); } + if (tb[TCA_NETEM_RATE64]) { + if (RTA_PAYLOAD(tb[TCA_NETEM_RATE64]) < sizeof(rate64)) + return -1; + rate64 = rta_getattr_u64(tb[TCA_NETEM_RATE64]); + } } fprintf(f, "limit %d", qopt.limit); @@ -632,7 +648,10 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) } if (rate && rate->rate) { - fprintf(f, " rate %s", sprint_rate(rate->rate, b1)); + if (rate64) + fprintf(f, " rate %s", sprint_rate(rate64, b1)); + else + fprintf(f, " rate %s", sprint_rate(rate->rate, b1)); if (rate->packet_overhead) fprintf(f, " packetoverhead %d", rate->packet_overhead); if (rate->cell_size) From 8c45275594af49516dbdeb73850120d47d5a5cd9 Mon Sep 17 00:00:00 2001 From: Mythili Prabhu Date: Thu, 16 Jan 2014 19:39:52 -0800 Subject: [PATCH 16/16] PIE: Add man page This adds the manpage for PIE: Proportional Integral controller Enhanced AQM scheme. Signed-off-by: Vijay Subramanian Signed-off-by: Vijay Subramanian CC: Dave Taht --- man/man8/Makefile | 2 +- man/man8/tc-pie.8 | 131 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 man/man8/tc-pie.8 diff --git a/man/man8/Makefile b/man/man8/Makefile index ff80c988..cef09121 100644 --- a/man/man8/Makefile +++ b/man/man8/Makefile @@ -2,7 +2,7 @@ TARGETS = ip-address.8 ip-link.8 ip-route.8 MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 ss.8 \ tc.8 tc-bfifo.8 tc-cbq.8 tc-cbq-details.8 tc-choke.8 tc-codel.8 \ - tc-drr.8 tc-ematch.8 tc-fq_codel.8 tc-hfsc.8 tc-htb.8 \ + tc-drr.8 tc-ematch.8 tc-fq_codel.8 tc-hfsc.8 tc-htb.8 tc-pie.8 \ tc-netem.8 tc-pfifo.8 tc-pfifo_fast.8 tc-prio.8 tc-red.8 \ tc-sfb.8 tc-sfq.8 tc-stab.8 tc-tbf.8 \ bridge.8 rtstat.8 ctstat.8 nstat.8 routef.8 \ diff --git a/man/man8/tc-pie.8 b/man/man8/tc-pie.8 new file mode 100644 index 00000000..536c381c --- /dev/null +++ b/man/man8/tc-pie.8 @@ -0,0 +1,131 @@ +.TH PIE 8 "16 January 2014" "iproute2" "Linux" +.SH NAME +PIE \- Proportional Integral controller-Enhanced AQM algorithm +.SH SYNOPSIS +.B tc qdisc ... pie +[ +.B limit +PACKETS ] [ +.B target +TIME ] [ +.B tupdate +TIME ] [ +.B alpha +int ] [ +.B beta +int ] [ +.B ecn +| +.B noecn +] [ +.B bytemode +| +.B nobytemode +] + +.SH DESCRIPTION +Proportional Integral controller-Enhanced (PIE) is a control theoretic active +queue management scheme. It is based on the proportional integral controller but +aims to control delay. The main design goals are + o Low latency control + o High link utilization + o Simple implementation + o Guaranteed stability and fast responsiveness + +.SH ALGORITHM +PIE is designed to control delay effectively. First, an average dequeue rate is +estimated based on the standing queue. The rate is used to calculate the current +delay. Then, on a periodic basis, the delay is used to calculate the dropping +probabilty. Finally, on arrival, a packet is dropped (or marked) based on this +probability. + +PIE makes adjustments to the probability based on the trend of the delay i.e. +whether it is going up or down.The delay converges quickly to the target value +specified. + +alpha and beta are statically chosen parameters chosen to control the drop probability +growth and are determined through control theoretic approaches. alpha determines how +the deviation between the current and target latency changes probability. beta exerts +additional adjustments depending on the latency trend. + +The drop probabilty is used to mark packets in ecn mode. However, as in RED, +beyond 10% packets are dropped based on this probability. The bytemode is used +to drop packets proportional to the packet size. + +Additional details can be found in the paper cited below. + +.SH PARAMETERS +.SS limit +limit on the queue size in packets. Incoming packets are dropped when this limit +is reached. Default is 1000 packets. + +.SS target +is the expected queue delay. The default target delay is 20ms. + +.SS tupdate +is the frequency at which the system drop probability is calculated. The default is 30ms. + +.SS alpha +.SS beta +alpha and beta are parameters chosen to control the drop probability. These +should be in the range between 0 and 32. + +.SS ecn | noecn +is used to mark packets instead of dropping +.B ecn +to turn on ecn mode, +.B noecn +to turn off ecn mode. By default, +.B ecn +is turned off. + +.SS bytemode | nobytemode +is used to scale drop probability proportional to packet size +.B bytemode +to turn on bytemode, +.B nobytemode +to turn off bytemode. By default, +.B bytemode +is turned off. + +.SH EXAMPLES + # tc qdisc add dev eth0 root pie + # tc -s qdisc show + qdisc pie 8034: dev eth0 root refcnt 2 limit 200p target 19000us tupdate 29000us alpha 2 beta 20 + Sent 7443524 bytes 7204 pkt (dropped 900, overlimits 0 requeues 0) + backlog 38998b 37p requeues 0 + prob 0.123384 delay 25000us avg_dq_rate 1464840 + pkts_in 7241 overlimit 900 dropped 0 maxq 186 ecn_mark 0 + + # tc qdisc add dev eth0 root pie limit 100 target 20ms tupdate 30ms ecn + # tc -s qdisc show + qdisc pie 8036: dev eth0 root refcnt 2 limit 200p target 19000 tupdate 29000 alpha 2 beta 20 ecn + Sent 2491922 bytes 2507 pkt (dropped 214, overlimits 0 requeues 0) + backlog 33728b 32p requeues 0 + prob 0.102262 delay 24000us avg_dq_rate 1464840 + pkts_in 2468 overlimit 214 dropped 0 maxq 192 ecn_mark 71 + + + # tc qdisc add dev eth0 root pie limit 100 target 50ms tupdate 30ms bytemode + # tc -s qdisc show + qdisc pie 8036: dev eth0 root refcnt 2 limit 200p target 19000 tupdate 29000 alpha 2 beta 20 ecn + Sent 2491922 bytes 2507 pkt (dropped 214, overlimits 0 requeues 0) + backlog 33728b 32p requeues 0 + prob 0.102262 delay 24000us avg_dq_rate 1464840 + pkts_in 2468 overlimit 214 dropped 0 maxq 192 ecn_mark 71 + + +.SH SEE ALSO +.BR tc (8), +.BR tc-codel (8) +.BR tc-red (8) + +.SH SOURCES + o IETF draft submission is at http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + o IEEE Conference on High Performance Switching and Routing 2013 : "PIE: A +Lightweight Control Scheme to Address the Bufferbloat Problem" + +.SH AUTHORS +PIE was implemented by Vijay Subramanian and Mythili Prabhu, also the authors of +this man page. Please report bugs and corrections to the Linux networking +development mailing list at .