diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index 58b39f47..cc375e42 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -18,6 +18,9 @@ struct ifaddrmsg { * It makes no difference for normally configured broadcast interfaces, * but for point-to-point IFA_ADDRESS is DESTINATION address, * local address is supplied in IFA_LOCAL attribute. + * + * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags. + * If present, the value from struct ifaddrmsg will be ignored. */ enum { IFA_UNSPEC, @@ -28,6 +31,7 @@ enum { IFA_ANYCAST, IFA_CACHEINFO, IFA_MULTICAST, + IFA_FLAGS, __IFA_MAX, }; @@ -44,6 +48,8 @@ enum { #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 +#define IFA_F_MANAGETEMPADDR 0x100 +#define IFA_F_NOPREFIXROUTE 0x200 struct ifa_cacheinfo { __u32 ifa_prefered; diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 0da6f5e7..d001bdb2 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -94,6 +94,7 @@ #define ARPHRD_CAIF 822 /* CAIF media type */ #define ARPHRD_IP6GRE 823 /* GRE over IPv6 */ #define ARPHRD_NETLINK 824 /* Netlink header */ +#define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 4b727a55..9cb5909c 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -144,6 +144,7 @@ enum { IFLA_NUM_RX_QUEUES, IFLA_CARRIER, IFLA_PHYS_PORT_ID, + IFLA_SLAVE, __IFLA_MAX }; @@ -329,11 +330,55 @@ enum { IFLA_BOND_UNSPEC, IFLA_BOND_MODE, IFLA_BOND_ACTIVE_SLAVE, + IFLA_BOND_MIIMON, + IFLA_BOND_UPDELAY, + IFLA_BOND_DOWNDELAY, + IFLA_BOND_USE_CARRIER, + IFLA_BOND_ARP_INTERVAL, + IFLA_BOND_ARP_IP_TARGET, + IFLA_BOND_ARP_VALIDATE, + IFLA_BOND_ARP_ALL_TARGETS, + IFLA_BOND_PRIMARY, + IFLA_BOND_PRIMARY_RESELECT, + IFLA_BOND_FAIL_OVER_MAC, + IFLA_BOND_XMIT_HASH_POLICY, + IFLA_BOND_RESEND_IGMP, + IFLA_BOND_NUM_PEER_NOTIF, + IFLA_BOND_ALL_SLAVES_ACTIVE, + IFLA_BOND_MIN_LINKS, + IFLA_BOND_LP_INTERVAL, + IFLA_BOND_PACKETS_PER_SLAVE, + IFLA_BOND_AD_LACP_RATE, + IFLA_BOND_AD_SELECT, + IFLA_BOND_AD_INFO, __IFLA_BOND_MAX, }; #define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) +enum { + IFLA_BOND_AD_INFO_AGGREGATOR, + IFLA_BOND_AD_INFO_NUM_PORTS, + IFLA_BOND_AD_INFO_ACTOR_KEY, + IFLA_BOND_AD_INFO_PARTNER_KEY, + IFLA_BOND_AD_INFO_PARTNER_MAC, + __IFLA_BOND_AD_INFO_MAX, +}; + +#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) + +enum { + IFLA_SLAVE_STATE, + IFLA_SLAVE_MII_STATUS, + IFLA_SLAVE_LINK_FAILURE_COUNT, + IFLA_SLAVE_PERM_HWADDR, + IFLA_SLAVE_QUEUE_ID, + IFLA_SLAVE_AD_AGGREGATOR_ID, + __IFLA_SLAVE_MAX, +}; + +#define IFLA_SLAVE_MAX (__IFLA_SLAVE_MAX - 1) + /* SR-IOV virtual function management section */ enum { diff --git a/include/linux/netconf.h b/include/linux/netconf.h index 52c44244..6ceb170d 100644 --- a/include/linux/netconf.h +++ b/include/linux/netconf.h @@ -14,6 +14,7 @@ enum { NETCONFA_FORWARDING, NETCONFA_RP_FILTER, NETCONFA_MC_FORWARDING, + NETCONFA_PROXY_NEIGH, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f05c3d92..be0bc182 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -51,6 +51,7 @@ enum nf_inet_hooks { enum { NFPROTO_UNSPEC = 0, + NFPROTO_INET = 1, NFPROTO_IPV4 = 2, NFPROTO_ARP = 3, NFPROTO_BRIDGE = 7, diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index a806687a..d62316ba 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -173,6 +173,8 @@ enum { TCA_TBF_PTAB, TCA_TBF_RATE64, TCA_TBF_PRATE64, + TCA_TBF_BURST, + TCA_TBF_PBURST, __TCA_TBF_MAX, }; @@ -523,6 +525,7 @@ enum { TCA_NETEM_LOSS, TCA_NETEM_RATE, TCA_NETEM_ECN, + TCA_NETEM_RATE64, __TCA_NETEM_MAX, }; @@ -790,4 +793,54 @@ struct tc_fq_qd_stats { __u32 throttled_flows; __u32 pad; }; + +/* Heavy-Hitter Filter */ + +enum { + TCA_HHF_UNSPEC, + TCA_HHF_BACKLOG_LIMIT, + TCA_HHF_QUANTUM, + TCA_HHF_HH_FLOWS_LIMIT, + TCA_HHF_RESET_TIMEOUT, + TCA_HHF_ADMIT_BYTES, + TCA_HHF_EVICT_TIMEOUT, + TCA_HHF_NON_HH_WEIGHT, + __TCA_HHF_MAX +}; + +#define TCA_HHF_MAX (__TCA_HHF_MAX - 1) + +struct tc_hhf_xstats { + __u32 drop_overlimit; /* number of times max qdisc packet limit + * was hit + */ + __u32 hh_overlimit; /* number of times max heavy-hitters was hit */ + __u32 hh_tot_count; /* number of captured heavy-hitters so far */ + __u32 hh_cur_count; /* number of current heavy-hitters */ +}; + +/* PIE */ +enum { + TCA_PIE_UNSPEC, + TCA_PIE_TARGET, + TCA_PIE_LIMIT, + TCA_PIE_TUPDATE, + TCA_PIE_ALPHA, + TCA_PIE_BETA, + TCA_PIE_ECN, + TCA_PIE_BYTEMODE, + __TCA_PIE_MAX +}; +#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) + +struct tc_pie_xstats { + __u32 prob; /* current probability */ + __u32 delay; /* current delay in ms */ + __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to pie_action */ + __u32 overlimit; /* dropped due to lack of space in queue */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ +}; #endif diff --git a/include/linux/tc_act/tc_ipt.h b/include/linux/tc_act/tc_ipt.h index a2335563..130aaadf 100644 --- a/include/linux/tc_act/tc_ipt.h +++ b/include/linux/tc_act/tc_ipt.h @@ -4,6 +4,7 @@ #include #define TCA_ACT_IPT 6 +#define TCA_ACT_XT 10 enum { TCA_IPT_UNSPEC, diff --git a/include/linux/tcp_metrics.h b/include/linux/tcp_metrics.h index cb5157b5..54a37b13 100644 --- a/include/linux/tcp_metrics.h +++ b/include/linux/tcp_metrics.h @@ -35,6 +35,8 @@ enum { TCP_METRICS_ATTR_FOPEN_SYN_DROPS, /* u16, count of drops */ TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, /* msecs age */ TCP_METRICS_ATTR_FOPEN_COOKIE, /* binary */ + TCP_METRICS_ATTR_SADDR_IPV4, /* u32 */ + TCP_METRICS_ATTR_SADDR_IPV6, /* binary */ __TCP_METRICS_ATTR_MAX, }; diff --git a/ip/ipaddress.c b/ip/ipaddress.c index d02eaaf8..f794fa13 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -82,7 +82,7 @@ static void usage(void) fprintf(stderr, " tentative | deprecated | dadfailed | temporary |\n"); fprintf(stderr, " CONFFLAG-LIST ]\n"); fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n"); - fprintf(stderr, "CONFFLAG := [ home | nodad ]\n"); + fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute ]\n"); fprintf(stderr, "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n"); fprintf(stderr, "LFT := forever | SECONDS\n"); @@ -541,6 +541,13 @@ static int set_lifetime(unsigned int *lifetime, char *argv) return 0; } +static unsigned int get_ifa_flags(struct ifaddrmsg *ifa, + struct rtattr *ifa_flags_attr) +{ + return ifa_flags_attr ? rta_getattr_u32(ifa_flags_attr) : + ifa->ifa_flags; +} + int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) { @@ -567,6 +574,8 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, parse_rtattr(rta_tb, IFA_MAX, IFA_RTA(ifa), n->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); + ifa_flags = get_ifa_flags(ifa, rta_tb[IFA_FLAGS]); + if (!rta_tb[IFA_LOCAL]) rta_tb[IFA_LOCAL] = rta_tb[IFA_ADDRESS]; if (!rta_tb[IFA_ADDRESS]) @@ -576,7 +585,7 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, return 0; if ((filter.scope^ifa->ifa_scope)&filter.scopemask) return 0; - if ((filter.flags^ifa->ifa_flags)&filter.flagmask) + if ((filter.flags ^ ifa_flags) & filter.flagmask) return 0; if (filter.label) { SPRINT_BUF(b1); @@ -670,36 +679,43 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, abuf, sizeof(abuf))); } fprintf(fp, "scope %s ", rtnl_rtscope_n2a(ifa->ifa_scope, b1, sizeof(b1))); - ifa_flags = ifa->ifa_flags; - if (ifa->ifa_flags&IFA_F_SECONDARY) { + if (ifa_flags & IFA_F_SECONDARY) { ifa_flags &= ~IFA_F_SECONDARY; if (ifa->ifa_family == AF_INET6) fprintf(fp, "temporary "); else fprintf(fp, "secondary "); } - if (ifa->ifa_flags&IFA_F_TENTATIVE) { + if (ifa_flags & IFA_F_TENTATIVE) { ifa_flags &= ~IFA_F_TENTATIVE; fprintf(fp, "tentative "); } - if (ifa->ifa_flags&IFA_F_DEPRECATED) { + if (ifa_flags & IFA_F_DEPRECATED) { ifa_flags &= ~IFA_F_DEPRECATED; deprecated = 1; fprintf(fp, "deprecated "); } - if (ifa->ifa_flags&IFA_F_HOMEADDRESS) { + if (ifa_flags & IFA_F_HOMEADDRESS) { ifa_flags &= ~IFA_F_HOMEADDRESS; fprintf(fp, "home "); } - if (ifa->ifa_flags&IFA_F_NODAD) { + if (ifa_flags & IFA_F_NODAD) { ifa_flags &= ~IFA_F_NODAD; fprintf(fp, "nodad "); } - if (!(ifa->ifa_flags&IFA_F_PERMANENT)) { + if (ifa_flags & IFA_F_MANAGETEMPADDR) { + ifa_flags &= ~IFA_F_MANAGETEMPADDR; + fprintf(fp, "mngtmpaddr "); + } + if (ifa_flags & IFA_F_NOPREFIXROUTE) { + ifa_flags &= ~IFA_F_NOPREFIXROUTE; + fprintf(fp, "noprefixroute "); + } + if (!(ifa_flags & IFA_F_PERMANENT)) { fprintf(fp, "dynamic "); } else ifa_flags &= ~IFA_F_PERMANENT; - if (ifa->ifa_flags&IFA_F_DADFAILED) { + if (ifa_flags & IFA_F_DADFAILED) { ifa_flags &= ~IFA_F_DADFAILED; fprintf(fp, "dadfailed "); } @@ -926,6 +942,8 @@ static void ipaddr_filter(struct nlmsg_chain *linfo, struct nlmsg_chain *ainfo) for (a = ainfo->head; a; a = a->next) { struct nlmsghdr *n = &a->h; struct ifaddrmsg *ifa = NLMSG_DATA(n); + struct rtattr *tb[IFA_MAX + 1]; + unsigned int ifa_flags; if (ifa->ifa_index != ifi->ifi_index) continue; @@ -934,11 +952,13 @@ static void ipaddr_filter(struct nlmsg_chain *linfo, struct nlmsg_chain *ainfo) continue; if ((filter.scope^ifa->ifa_scope)&filter.scopemask) continue; - if ((filter.flags^ifa->ifa_flags)&filter.flagmask) + + parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(n)); + ifa_flags = get_ifa_flags(ifa, tb[IFA_FLAGS]); + + if ((filter.flags ^ ifa_flags) & filter.flagmask) continue; if (filter.pfx.family || filter.label) { - struct rtattr *tb[IFA_MAX+1]; - parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(n)); if (!tb[IFA_LOCAL]) tb[IFA_LOCAL] = tb[IFA_ADDRESS]; @@ -1114,6 +1134,12 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) } else if (strcmp(*argv, "nodad") == 0) { filter.flags |= IFA_F_NODAD; filter.flagmask |= IFA_F_NODAD; + } else if (strcmp(*argv, "mngtmpaddr") == 0) { + filter.flags |= IFA_F_MANAGETEMPADDR; + filter.flagmask |= IFA_F_MANAGETEMPADDR; + } else if (strcmp(*argv, "noprefixroute") == 0) { + filter.flags |= IFA_F_NOPREFIXROUTE; + filter.flagmask |= IFA_F_NOPREFIXROUTE; } else if (strcmp(*argv, "dadfailed") == 0) { filter.flags |= IFA_F_DADFAILED; filter.flagmask |= IFA_F_DADFAILED; @@ -1252,6 +1278,7 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) __u32 preferred_lft = INFINITY_LIFE_TIME; __u32 valid_lft = INFINITY_LIFE_TIME; struct ifa_cacheinfo cinfo; + unsigned int ifa_flags = 0; memset(&req, 0, sizeof(req)); @@ -1329,9 +1356,13 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) if (set_lifetime(&preferred_lft, *argv)) invarg("preferred_lft value", *argv); } else if (strcmp(*argv, "home") == 0) { - req.ifa.ifa_flags |= IFA_F_HOMEADDRESS; + ifa_flags |= IFA_F_HOMEADDRESS; } else if (strcmp(*argv, "nodad") == 0) { - req.ifa.ifa_flags |= IFA_F_NODAD; + ifa_flags |= IFA_F_NODAD; + } else if (strcmp(*argv, "mngtmpaddr") == 0) { + ifa_flags |= IFA_F_MANAGETEMPADDR; + } else if (strcmp(*argv, "noprefixroute") == 0) { + ifa_flags |= IFA_F_NOPREFIXROUTE; } else { if (strcmp(*argv, "local") == 0) { NEXT_ARG(); @@ -1349,6 +1380,9 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) } argc--; argv++; } + req.ifa.ifa_flags = ifa_flags; + addattr32(&req.n, sizeof(req), IFA_FLAGS, ifa_flags); + if (d == NULL) { fprintf(stderr, "Not enough information: \"dev\" argument is required.\n"); return -1; diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c index 3fb7f4f4..f0e5ab11 100644 --- a/ip/iplink_bond.c +++ b/ip/iplink_bond.c @@ -7,41 +7,165 @@ * 2 of the License, or (at your option) any later version. * * Authors: Jiri Pirko + * Scott Feldman */ #include #include #include #include +#include #include #include "rt_names.h" #include "utils.h" #include "ip_common.h" +#define BOND_MAX_ARP_TARGETS 16 + +static const char *mode_tbl[] = { + "balance-rr", + "active-backup", + "balance-xor", + "broadcast", + "802.3ad", + "balance-tlb", + "balance-alb", + NULL, +}; + +static const char *arp_validate_tbl[] = { + "none", + "active", + "backup", + "all", + NULL, +}; + +static const char *arp_all_targets_tbl[] = { + "any", + "all", + NULL, +}; + +static const char *primary_reselect_tbl[] = { + "always", + "better", + "failure", + NULL, +}; + +static const char *fail_over_mac_tbl[] = { + "none", + "active", + "follow", + NULL, +}; + +static const char *xmit_hash_policy_tbl[] = { + "layer2", + "layer3+4", + "layer2+3", + "encap2+3", + "encap3+4", + NULL, +}; + +static const char *lacp_rate_tbl[] = { + "slow", + "fast", + NULL, +}; + +static const char *ad_select_tbl[] = { + "stable", + "bandwidth", + "count", + NULL, +}; + +static const char *get_name(const char **tbl, int index) +{ + int i; + + for (i = 0; tbl[i]; i++) + if (i == index) + return tbl[i]; + + return "UNKNOWN"; +} + +static int get_index(const char **tbl, char *name) +{ + int i, index; + + /* check for integer index passed in instead of name */ + if (get_integer(&index, name, 10) == 0) + for (i = 0; tbl[i]; i++) + if (i == index) + return i; + + for (i = 0; tbl[i]; i++) + if (strncmp(tbl[i], name, strlen(tbl[i])) == 0) + return i; + + return -1; +} + static void explain(void) { fprintf(stderr, "Usage: ... bond [ mode BONDMODE ] [ active_slave SLAVE_DEV ]\n" - " [ clear_active_slave ]\n" + " [ clear_active_slave ] [ miimon MIIMON ]\n" + " [ updelay UPDELAY ] [ downdelay DOWNDELAY ]\n" + " [ use_carrier USE_CARRIER ]\n" + " [ arp_interval ARP_INTERVAL ]\n" + " [ arp_validate ARP_VALIDATE ]\n" + " [ arp_all_targets ARP_ALL_TARGETS ]\n" + " [ arp_ip_target [ ARP_IP_TARGET, ... ] ]\n" + " [ primary SLAVE_DEV ]\n" + " [ primary_reselect PRIMARY_RESELECT ]\n" + " [ fail_over_mac FAIL_OVER_MAC ]\n" + " [ xmit_hash_policy XMIT_HASH_POLICY ]\n" + " [ resend_igmp RESEND_IGMP ]\n" + " [ num_grat_arp|num_unsol_na NUM_GRAT_ARP|NUM_UNSOL_NA ]\n" + " [ all_slaves_active ALL_SLAVES_ACTIVE ]\n" + " [ min_links MIN_LINKS ]\n" + " [ lp_interval LP_INTERVAL ]\n" + " [ packets_per_slave PACKETS_PER_SLAVE ]\n" + " [ lacp_rate LACP_RATE ]\n" + " [ ad_select AD_SELECT ]\n" "\n" - "BONDMODE := 0-6\n" + "BONDMODE := balance-rr|active-backup|balance-xor|broadcast|802.3ad|balance-tlb|balance-alb\n" + "ARP_VALIDATE := none|active|backup|all\n" + "ARP_ALL_TARGETS := any|all\n" + "PRIMARY_RESELECT := always|better|failure\n" + "FAIL_OVER_MAC := none|active|follow\n" + "XMIT_HASH_POLICY := layer2|layer2+3|layer3+4\n" + "LACP_RATE := slow|fast\n" + "AD_SELECT := stable|bandwidth|count\n" ); } static int bond_parse_opt(struct link_util *lu, int argc, char **argv, struct nlmsghdr *n) { - __u8 mode; + __u8 mode, use_carrier, primary_reselect, fail_over_mac; + __u8 xmit_hash_policy, num_peer_notif, all_slaves_active; + __u8 lacp_rate, ad_select; + __u32 miimon, updelay, downdelay, arp_interval, arp_validate; + __u32 arp_all_targets, resend_igmp, min_links, lp_interval; + __u32 packets_per_slave; unsigned ifindex; while (argc > 0) { if (matches(*argv, "mode") == 0) { NEXT_ARG(); - if (get_u8(&mode, *argv, 0)) { - invarg("mode %s is invalid", *argv); + if (get_index(mode_tbl, *argv) < 0) { + invarg("invalid mode", *argv); return -1; } + mode = get_index(mode_tbl, *argv); addattr8(n, 1024, IFLA_BOND_MODE, mode); } else if (matches(*argv, "active_slave") == 0) { NEXT_ARG(); @@ -51,6 +175,170 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv, addattr32(n, 1024, IFLA_BOND_ACTIVE_SLAVE, ifindex); } else if (matches(*argv, "clear_active_slave") == 0) { addattr32(n, 1024, IFLA_BOND_ACTIVE_SLAVE, 0); + } else if (matches(*argv, "miimon") == 0) { + NEXT_ARG(); + if (get_u32(&miimon, *argv, 0)) { + invarg("invalid miimon", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_MIIMON, miimon); + } else if (matches(*argv, "updelay") == 0) { + NEXT_ARG(); + if (get_u32(&updelay, *argv, 0)) { + invarg("invalid updelay", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_UPDELAY, updelay); + } else if (matches(*argv, "downdelay") == 0) { + NEXT_ARG(); + if (get_u32(&downdelay, *argv, 0)) { + invarg("invalid downdelay", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_DOWNDELAY, downdelay); + } else if (matches(*argv, "use_carrier") == 0) { + NEXT_ARG(); + if (get_u8(&use_carrier, *argv, 0)) { + invarg("invalid use_carrier", *argv); + return -1; + } + addattr8(n, 1024, IFLA_BOND_USE_CARRIER, use_carrier); + } else if (matches(*argv, "arp_interval") == 0) { + NEXT_ARG(); + if (get_u32(&arp_interval, *argv, 0)) { + invarg("invalid arp_interval", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_ARP_INTERVAL, arp_interval); + } else if (matches(*argv, "arp_ip_target") == 0) { + struct rtattr * nest = addattr_nest(n, 1024, + IFLA_BOND_ARP_IP_TARGET); + if (NEXT_ARG_OK()) { + NEXT_ARG(); + char *targets = strdupa(*argv); + char *target = strtok(targets, ","); + int i; + + for(i = 0; target && i < BOND_MAX_ARP_TARGETS; i++) { + __u32 addr = get_addr32(target); + addattr32(n, 1024, i, addr); + target = strtok(NULL, ","); + } + addattr_nest_end(n, nest); + } + addattr_nest_end(n, nest); + } else if (matches(*argv, "arp_validate") == 0) { + NEXT_ARG(); + if (get_index(arp_validate_tbl, *argv) < 0) { + invarg("invalid arp_validate", *argv); + return -1; + } + arp_validate = get_index(arp_validate_tbl, *argv); + addattr32(n, 1024, IFLA_BOND_ARP_VALIDATE, arp_validate); + } else if (matches(*argv, "arp_all_targets") == 0) { + NEXT_ARG(); + if (get_index(arp_all_targets_tbl, *argv) < 0) { + invarg("invalid arp_all_targets", *argv); + return -1; + } + arp_all_targets = get_index(arp_all_targets_tbl, *argv); + addattr32(n, 1024, IFLA_BOND_ARP_ALL_TARGETS, arp_all_targets); + } else if (matches(*argv, "primary") == 0) { + NEXT_ARG(); + ifindex = if_nametoindex(*argv); + if (!ifindex) + return -1; + addattr32(n, 1024, IFLA_BOND_PRIMARY, ifindex); + } else if (matches(*argv, "primary_reselect") == 0) { + NEXT_ARG(); + if (get_index(primary_reselect_tbl, *argv) < 0) { + invarg("invalid primary_reselect", *argv); + return -1; + } + primary_reselect = get_index(primary_reselect_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_PRIMARY_RESELECT, + primary_reselect); + } else if (matches(*argv, "fail_over_mac") == 0) { + NEXT_ARG(); + if (get_index(fail_over_mac_tbl, *argv) < 0) { + invarg("invalid fail_over_mac", *argv); + return -1; + } + fail_over_mac = get_index(fail_over_mac_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_FAIL_OVER_MAC, + fail_over_mac); + } else if (matches(*argv, "xmit_hash_policy") == 0) { + NEXT_ARG(); + if (get_index(xmit_hash_policy_tbl, *argv) < 0) { + invarg("invalid xmit_hash_policy", *argv); + return -1; + } + xmit_hash_policy = get_index(xmit_hash_policy_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_XMIT_HASH_POLICY, + xmit_hash_policy); + } else if (matches(*argv, "resend_igmp") == 0) { + NEXT_ARG(); + if (get_u32(&resend_igmp, *argv, 0)) { + invarg("invalid resend_igmp", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_RESEND_IGMP, resend_igmp); + } else if (matches(*argv, "num_grat_arp") == 0 || + matches(*argv, "num_unsol_na") == 0) { + NEXT_ARG(); + if (get_u8(&num_peer_notif, *argv, 0)) { + invarg("invalid num_grat_arp|num_unsol_na", + *argv); + return -1; + } + addattr8(n, 1024, IFLA_BOND_NUM_PEER_NOTIF, + num_peer_notif); + } else if (matches(*argv, "all_slaves_active") == 0) { + NEXT_ARG(); + if (get_u8(&all_slaves_active, *argv, 0)) { + invarg("invalid all_slaves_active", *argv); + return -1; + } + addattr8(n, 1024, IFLA_BOND_ALL_SLAVES_ACTIVE, + all_slaves_active); + } else if (matches(*argv, "min_links") == 0) { + NEXT_ARG(); + if (get_u32(&min_links, *argv, 0)) { + invarg("invalid min_links", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_MIN_LINKS, min_links); + } else if (matches(*argv, "lp_interval") == 0) { + NEXT_ARG(); + if (get_u32(&lp_interval, *argv, 0)) { + invarg("invalid lp_interval", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_LP_INTERVAL, lp_interval); + } else if (matches(*argv, "packets_per_slave") == 0) { + NEXT_ARG(); + if (get_u32(&packets_per_slave, *argv, 0)) { + invarg("invalid packets_per_slave", *argv); + return -1; + } + addattr32(n, 1024, IFLA_BOND_PACKETS_PER_SLAVE, + packets_per_slave); + } else if (matches(*argv, "lacp_rate") == 0) { + NEXT_ARG(); + if (get_index(lacp_rate_tbl, *argv) < 0) { + invarg("invalid lacp_rate", *argv); + return -1; + } + lacp_rate = get_index(lacp_rate_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_AD_LACP_RATE, lacp_rate); + } else if (matches(*argv, "ad_select") == 0) { + NEXT_ARG(); + if (get_index(ad_select_tbl, *argv) < 0) { + invarg("invalid ad_select", *argv); + return -1; + } + ad_select = get_index(ad_select_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_AD_SELECT, ad_select); } else { fprintf(stderr, "bond: unknown command \"%s\"?\n", *argv); explain(); @@ -69,8 +357,11 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (!tb) return; - if (tb[IFLA_BOND_MODE]) - fprintf(f, "mode %u ", rta_getattr_u8(tb[IFLA_BOND_MODE])); + if (tb[IFLA_BOND_MODE]) { + const char *mode = get_name(mode_tbl, + rta_getattr_u8(tb[IFLA_BOND_MODE])); + fprintf(f, "mode %s ", mode); + } if (tb[IFLA_BOND_ACTIVE_SLAVE] && (ifindex = rta_getattr_u32(tb[IFLA_BOND_ACTIVE_SLAVE]))) { @@ -82,6 +373,159 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) else fprintf(f, "active_slave %u ", ifindex); } + + if (tb[IFLA_BOND_MIIMON]) + fprintf(f, "miimon %u ", rta_getattr_u32(tb[IFLA_BOND_MIIMON])); + + if (tb[IFLA_BOND_UPDELAY]) + fprintf(f, "updelay %u ", rta_getattr_u32(tb[IFLA_BOND_UPDELAY])); + + if (tb[IFLA_BOND_DOWNDELAY]) + fprintf(f, "downdelay %u ", + rta_getattr_u32(tb[IFLA_BOND_DOWNDELAY])); + + if (tb[IFLA_BOND_USE_CARRIER]) + fprintf(f, "use_carrier %u ", + rta_getattr_u8(tb[IFLA_BOND_USE_CARRIER])); + + if (tb[IFLA_BOND_ARP_INTERVAL]) + fprintf(f, "arp_interval %u ", + rta_getattr_u32(tb[IFLA_BOND_ARP_INTERVAL])); + + if (tb[IFLA_BOND_ARP_IP_TARGET]) { + struct rtattr *iptb[BOND_MAX_ARP_TARGETS + 1]; + char buf[INET_ADDRSTRLEN]; + int i; + + parse_rtattr_nested(iptb, BOND_MAX_ARP_TARGETS, + tb[IFLA_BOND_ARP_IP_TARGET]); + + if (iptb[0]) + fprintf(f, "arp_ip_target "); + + for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) { + if (iptb[i]) + fprintf(f, "%s", + rt_addr_n2a(AF_INET, + RTA_PAYLOAD(iptb[i]), + RTA_DATA(iptb[i]), + buf, + INET_ADDRSTRLEN)); + if (i < BOND_MAX_ARP_TARGETS-1 && iptb[i+1]) + fprintf(f, ","); + } + + if (iptb[0]) + fprintf(f, " "); + } + + if (tb[IFLA_BOND_ARP_VALIDATE]) { + const char *arp_validate = get_name(arp_validate_tbl, + rta_getattr_u32(tb[IFLA_BOND_ARP_VALIDATE])); + fprintf(f, "arp_validate %s ", arp_validate); + } + + if (tb[IFLA_BOND_ARP_ALL_TARGETS]) { + const char *arp_all_targets = get_name(arp_all_targets_tbl, + rta_getattr_u32(tb[IFLA_BOND_ARP_ALL_TARGETS])); + fprintf(f, "arp_all_target %s ", arp_all_targets); + } + + if (tb[IFLA_BOND_PRIMARY] && + (ifindex = rta_getattr_u32(tb[IFLA_BOND_PRIMARY]))) { + char buf[IFNAMSIZ]; + const char *n = if_indextoname(ifindex, buf); + + if (n) + fprintf(f, "primary %s ", n); + else + fprintf(f, "primary %u ", ifindex); + } + + if (tb[IFLA_BOND_PRIMARY_RESELECT]) { + const char *primary_reselect = get_name(primary_reselect_tbl, + rta_getattr_u8(tb[IFLA_BOND_PRIMARY_RESELECT])); + fprintf(f, "primary_reselect %s ", primary_reselect); + } + + if (tb[IFLA_BOND_FAIL_OVER_MAC]) { + const char *fail_over_mac = get_name(fail_over_mac_tbl, + rta_getattr_u8(tb[IFLA_BOND_FAIL_OVER_MAC])); + fprintf(f, "fail_over_mac %s ", fail_over_mac); + } + + if (tb[IFLA_BOND_XMIT_HASH_POLICY]) { + const char *xmit_hash_policy = get_name(xmit_hash_policy_tbl, + rta_getattr_u8(tb[IFLA_BOND_XMIT_HASH_POLICY])); + fprintf(f, "xmit_hash_policy %s ", xmit_hash_policy); + } + + if (tb[IFLA_BOND_RESEND_IGMP]) + fprintf(f, "resend_igmp %u ", + rta_getattr_u32(tb[IFLA_BOND_RESEND_IGMP])); + + if (tb[IFLA_BOND_NUM_PEER_NOTIF]) + fprintf(f, "num_grat_arp %u ", + rta_getattr_u8(tb[IFLA_BOND_NUM_PEER_NOTIF])); + + if (tb[IFLA_BOND_ALL_SLAVES_ACTIVE]) + fprintf(f, "all_slaves_active %u ", + rta_getattr_u8(tb[IFLA_BOND_ALL_SLAVES_ACTIVE])); + + if (tb[IFLA_BOND_MIN_LINKS]) + fprintf(f, "min_links %u ", + rta_getattr_u32(tb[IFLA_BOND_MIN_LINKS])); + + if (tb[IFLA_BOND_LP_INTERVAL]) + fprintf(f, "lp_interval %u ", + rta_getattr_u32(tb[IFLA_BOND_LP_INTERVAL])); + + if (tb[IFLA_BOND_PACKETS_PER_SLAVE]) + fprintf(f, "packets_per_slave %u ", + rta_getattr_u32(tb[IFLA_BOND_PACKETS_PER_SLAVE])); + + if (tb[IFLA_BOND_AD_LACP_RATE]) { + const char *lacp_rate = get_name(lacp_rate_tbl, + rta_getattr_u8(tb[IFLA_BOND_AD_LACP_RATE])); + fprintf(f, "lacp_rate %s ", lacp_rate); + } + + if (tb[IFLA_BOND_AD_SELECT]) { + const char *ad_select = get_name(ad_select_tbl, + rta_getattr_u8(tb[IFLA_BOND_AD_SELECT])); + fprintf(f, "ad_select %s ", ad_select); + } + + if (tb[IFLA_BOND_AD_INFO]) { + struct rtattr *adtb[IFLA_BOND_AD_INFO_MAX + 1]; + + parse_rtattr_nested(adtb, IFLA_BOND_AD_INFO_MAX, + tb[IFLA_BOND_AD_INFO]); + + if (adtb[IFLA_BOND_AD_INFO_AGGREGATOR]) + fprintf(f, "ad_aggregator %d ", + rta_getattr_u16(adtb[IFLA_BOND_AD_INFO_AGGREGATOR])); + + if (adtb[IFLA_BOND_AD_INFO_NUM_PORTS]) + fprintf(f, "ad_num_ports %d ", + rta_getattr_u16(adtb[IFLA_BOND_AD_INFO_NUM_PORTS])); + + if (adtb[IFLA_BOND_AD_INFO_ACTOR_KEY]) + fprintf(f, "ad_actor_key %d ", + rta_getattr_u16(adtb[IFLA_BOND_AD_INFO_ACTOR_KEY])); + + if (adtb[IFLA_BOND_AD_INFO_PARTNER_KEY]) + fprintf(f, "ad_partner_key %d ", + rta_getattr_u16(adtb[IFLA_BOND_AD_INFO_PARTNER_KEY])); + + if (adtb[IFLA_BOND_AD_INFO_PARTNER_MAC]) { + unsigned char *p = + RTA_DATA(adtb[IFLA_BOND_AD_INFO_PARTNER_MAC]); + SPRINT_BUF(b); + fprintf(f, "ad_partner_mac %s ", + ll_addr_n2a(p, ETH_ALEN, 0, b, sizeof(b))); + } + } } struct link_util bond_link_util = { diff --git a/ip/ipnetconf.c b/ip/ipnetconf.c index 37aaf450..0e44cc8c 100644 --- a/ip/ipnetconf.c +++ b/ip/ipnetconf.c @@ -114,6 +114,10 @@ int print_netconf(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) fprintf(fp, "mc_forwarding %d ", *(int *)RTA_DATA(tb[NETCONFA_MC_FORWARDING])); + if (tb[NETCONFA_PROXY_NEIGH]) + fprintf(fp, "proxy_neigh %s ", + *(int *)RTA_DATA(tb[NETCONFA_PROXY_NEIGH])?"on":"off"); + fprintf(fp, "\n"); fflush(fp); return 0; diff --git a/man/man8/Makefile b/man/man8/Makefile index ff80c988..cef09121 100644 --- a/man/man8/Makefile +++ b/man/man8/Makefile @@ -2,7 +2,7 @@ TARGETS = ip-address.8 ip-link.8 ip-route.8 MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 ss.8 \ tc.8 tc-bfifo.8 tc-cbq.8 tc-cbq-details.8 tc-choke.8 tc-codel.8 \ - tc-drr.8 tc-ematch.8 tc-fq_codel.8 tc-hfsc.8 tc-htb.8 \ + tc-drr.8 tc-ematch.8 tc-fq_codel.8 tc-hfsc.8 tc-htb.8 tc-pie.8 \ tc-netem.8 tc-pfifo.8 tc-pfifo_fast.8 tc-prio.8 tc-red.8 \ tc-sfb.8 tc-sfq.8 tc-stab.8 tc-tbf.8 \ bridge.8 rtstat.8 ctstat.8 nstat.8 routef.8 \ diff --git a/man/man8/tc-pie.8 b/man/man8/tc-pie.8 new file mode 100644 index 00000000..536c381c --- /dev/null +++ b/man/man8/tc-pie.8 @@ -0,0 +1,131 @@ +.TH PIE 8 "16 January 2014" "iproute2" "Linux" +.SH NAME +PIE \- Proportional Integral controller-Enhanced AQM algorithm +.SH SYNOPSIS +.B tc qdisc ... pie +[ +.B limit +PACKETS ] [ +.B target +TIME ] [ +.B tupdate +TIME ] [ +.B alpha +int ] [ +.B beta +int ] [ +.B ecn +| +.B noecn +] [ +.B bytemode +| +.B nobytemode +] + +.SH DESCRIPTION +Proportional Integral controller-Enhanced (PIE) is a control theoretic active +queue management scheme. It is based on the proportional integral controller but +aims to control delay. The main design goals are + o Low latency control + o High link utilization + o Simple implementation + o Guaranteed stability and fast responsiveness + +.SH ALGORITHM +PIE is designed to control delay effectively. First, an average dequeue rate is +estimated based on the standing queue. The rate is used to calculate the current +delay. Then, on a periodic basis, the delay is used to calculate the dropping +probabilty. Finally, on arrival, a packet is dropped (or marked) based on this +probability. + +PIE makes adjustments to the probability based on the trend of the delay i.e. +whether it is going up or down.The delay converges quickly to the target value +specified. + +alpha and beta are statically chosen parameters chosen to control the drop probability +growth and are determined through control theoretic approaches. alpha determines how +the deviation between the current and target latency changes probability. beta exerts +additional adjustments depending on the latency trend. + +The drop probabilty is used to mark packets in ecn mode. However, as in RED, +beyond 10% packets are dropped based on this probability. The bytemode is used +to drop packets proportional to the packet size. + +Additional details can be found in the paper cited below. + +.SH PARAMETERS +.SS limit +limit on the queue size in packets. Incoming packets are dropped when this limit +is reached. Default is 1000 packets. + +.SS target +is the expected queue delay. The default target delay is 20ms. + +.SS tupdate +is the frequency at which the system drop probability is calculated. The default is 30ms. + +.SS alpha +.SS beta +alpha and beta are parameters chosen to control the drop probability. These +should be in the range between 0 and 32. + +.SS ecn | noecn +is used to mark packets instead of dropping +.B ecn +to turn on ecn mode, +.B noecn +to turn off ecn mode. By default, +.B ecn +is turned off. + +.SS bytemode | nobytemode +is used to scale drop probability proportional to packet size +.B bytemode +to turn on bytemode, +.B nobytemode +to turn off bytemode. By default, +.B bytemode +is turned off. + +.SH EXAMPLES + # tc qdisc add dev eth0 root pie + # tc -s qdisc show + qdisc pie 8034: dev eth0 root refcnt 2 limit 200p target 19000us tupdate 29000us alpha 2 beta 20 + Sent 7443524 bytes 7204 pkt (dropped 900, overlimits 0 requeues 0) + backlog 38998b 37p requeues 0 + prob 0.123384 delay 25000us avg_dq_rate 1464840 + pkts_in 7241 overlimit 900 dropped 0 maxq 186 ecn_mark 0 + + # tc qdisc add dev eth0 root pie limit 100 target 20ms tupdate 30ms ecn + # tc -s qdisc show + qdisc pie 8036: dev eth0 root refcnt 2 limit 200p target 19000 tupdate 29000 alpha 2 beta 20 ecn + Sent 2491922 bytes 2507 pkt (dropped 214, overlimits 0 requeues 0) + backlog 33728b 32p requeues 0 + prob 0.102262 delay 24000us avg_dq_rate 1464840 + pkts_in 2468 overlimit 214 dropped 0 maxq 192 ecn_mark 71 + + + # tc qdisc add dev eth0 root pie limit 100 target 50ms tupdate 30ms bytemode + # tc -s qdisc show + qdisc pie 8036: dev eth0 root refcnt 2 limit 200p target 19000 tupdate 29000 alpha 2 beta 20 ecn + Sent 2491922 bytes 2507 pkt (dropped 214, overlimits 0 requeues 0) + backlog 33728b 32p requeues 0 + prob 0.102262 delay 24000us avg_dq_rate 1464840 + pkts_in 2468 overlimit 214 dropped 0 maxq 192 ecn_mark 71 + + +.SH SEE ALSO +.BR tc (8), +.BR tc-codel (8) +.BR tc-red (8) + +.SH SOURCES + o IETF draft submission is at http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + o IEEE Conference on High Performance Switching and Routing 2013 : "PIE: A +Lightweight Control Scheme to Address the Bufferbloat Problem" + +.SH AUTHORS +PIE was implemented by Vijay Subramanian and Mythili Prabhu, also the authors of +this man page. Please report bugs and corrections to the Linux networking +development mailing list at . diff --git a/tc/Makefile b/tc/Makefile index 84215c06..b6337714 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -53,6 +53,7 @@ TCMODULES += q_mqprio.o TCMODULES += q_codel.o TCMODULES += q_fq_codel.o TCMODULES += q_fq.o +TCMODULES += q_pie.o ifeq ($(TC_CONFIG_IPSET), y) ifeq ($(TC_CONFIG_XT), y) diff --git a/tc/q_htb.c b/tc/q_htb.c index e108857d..1d8c56f2 100644 --- a/tc/q_htb.c +++ b/tc/q_htb.c @@ -113,6 +113,7 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str unsigned int direct_qlen = ~0U; unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */ struct rtattr *tail; + __u64 ceil64 = 0, rate64 = 0; memset(&opt, 0, sizeof(opt)); mtu = 1600; /* eth packet len */ @@ -173,22 +174,22 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str ok++; } else if (strcmp(*argv, "ceil") == 0) { NEXT_ARG(); - if (opt.ceil.rate) { + if (ceil64) { fprintf(stderr, "Double \"ceil\" spec\n"); return -1; } - if (get_rate(&opt.ceil.rate, *argv)) { + if (get_rate64(&ceil64, *argv)) { explain1("ceil"); return -1; } ok++; } else if (strcmp(*argv, "rate") == 0) { NEXT_ARG(); - if (opt.rate.rate) { + if (rate64) { fprintf(stderr, "Double \"rate\" spec\n"); return -1; } - if (get_rate(&opt.rate.rate, *argv)) { + if (get_rate64(&rate64, *argv)) { explain1("rate"); return -1; } @@ -207,17 +208,23 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str /* if (!ok) return 0;*/ - if (opt.rate.rate == 0) { + if (!rate64) { fprintf(stderr, "\"rate\" is required.\n"); return -1; } /* if ceil params are missing, use the same as rate */ - if (!opt.ceil.rate) opt.ceil = opt.rate; + if (!ceil64) + ceil64 = rate64; + + opt.rate.rate = (rate64 >= (1ULL << 32)) ? ~0U : rate64; + opt.ceil.rate = (ceil64 >= (1ULL << 32)) ? ~0U : ceil64; /* compute minimal allowed burst from rate; mtu is added here to make sute that buffer is larger than mtu and to have some safeguard space */ - if (!buffer) buffer = opt.rate.rate / get_hz() + mtu; - if (!cbuffer) cbuffer = opt.ceil.rate / get_hz() + mtu; + if (!buffer) + buffer = rate64 / get_hz() + mtu; + if (!cbuffer) + cbuffer = ceil64 / get_hz() + mtu; opt.ceil.overhead = overhead; opt.rate.overhead = overhead; @@ -229,19 +236,26 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str fprintf(stderr, "htb: failed to calculate rate table.\n"); return -1; } - opt.buffer = tc_calc_xmittime(opt.rate.rate, buffer); + opt.buffer = tc_calc_xmittime(rate64, buffer); if (tc_calc_rtable(&opt.ceil, ctab, ccell_log, mtu, linklayer) < 0) { fprintf(stderr, "htb: failed to calculate ceil rate table.\n"); return -1; } - opt.cbuffer = tc_calc_xmittime(opt.ceil.rate, cbuffer); + opt.cbuffer = tc_calc_xmittime(ceil64, cbuffer); tail = NLMSG_TAIL(n); if (direct_qlen != ~0U) addattr_l(n, 1024, TCA_HTB_DIRECT_QLEN, &direct_qlen, sizeof(direct_qlen)); addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + + if (rate64 >= (1ULL << 32)) + addattr_l(n, 1124, TCA_HTB_RATE64, &rate64, sizeof(rate64)); + + if (ceil64 >= (1ULL << 32)) + addattr_l(n, 1224, TCA_HTB_CEIL64, &ceil64, sizeof(ceil64)); + addattr_l(n, 2024, TCA_HTB_PARMS, &opt, sizeof(opt)); addattr_l(n, 3024, TCA_HTB_RTAB, rtab, 1024); addattr_l(n, 4024, TCA_HTB_CTAB, ctab, 1024); @@ -256,6 +270,7 @@ static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) struct tc_htb_glob *gopt; double buffer,cbuffer; unsigned int linklayer; + __u64 rate64, ceil64; SPRINT_BUF(b1); SPRINT_BUF(b2); SPRINT_BUF(b3); @@ -275,12 +290,25 @@ static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (show_details) fprintf(f, "quantum %d ", (int)hopt->quantum); } - fprintf(f, "rate %s ", sprint_rate(hopt->rate.rate, b1)); + + rate64 = hopt->rate.rate; + if (tb[TCA_HTB_RATE64] && + RTA_PAYLOAD(tb[TCA_HTB_RATE64]) >= sizeof(rate64)) { + rate64 = rta_getattr_u64(tb[TCA_HTB_RATE64]); + } + + ceil64 = hopt->ceil.rate; + if (tb[TCA_HTB_CEIL64] && + RTA_PAYLOAD(tb[TCA_HTB_CEIL64]) >= sizeof(ceil64)) + ceil64 = rta_getattr_u64(tb[TCA_HTB_CEIL64]); + + fprintf(f, "rate %s ", sprint_rate(rate64, b1)); if (hopt->rate.overhead) fprintf(f, "overhead %u ", hopt->rate.overhead); - buffer = tc_calc_xmitsize(hopt->rate.rate, hopt->buffer); - fprintf(f, "ceil %s ", sprint_rate(hopt->ceil.rate, b1)); - cbuffer = tc_calc_xmitsize(hopt->ceil.rate, hopt->cbuffer); + buffer = tc_calc_xmitsize(rate64, hopt->buffer); + + fprintf(f, "ceil %s ", sprint_rate(ceil64, b1)); + cbuffer = tc_calc_xmitsize(ceil64, hopt->cbuffer); linklayer = (hopt->rate.linklayer & TC_LINKLAYER_MASK); if (linklayer > TC_LINKLAYER_ETHERNET || show_details) fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b4)); diff --git a/tc/q_netem.c b/tc/q_netem.c index 9dd8712f..946007c5 100644 --- a/tc/q_netem.c +++ b/tc/q_netem.c @@ -183,6 +183,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, __s16 *dist_data = NULL; __u16 loss_type = NETEM_LOSS_UNSPEC; int present[__TCA_NETEM_MAX]; + __u64 rate64 = 0; memset(&cor, 0, sizeof(cor)); memset(&reorder, 0, sizeof(reorder)); @@ -391,7 +392,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, } else if (matches(*argv, "rate") == 0) { ++present[TCA_NETEM_RATE]; NEXT_ARG(); - if (get_rate(&rate.rate, *argv)) { + if (get_rate64(&rate64, *argv)) { explain1("rate"); return -1; } @@ -496,9 +497,18 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, addattr_nest_end(n, start); } - if (present[TCA_NETEM_RATE] && - addattr_l(n, 1024, TCA_NETEM_RATE, &rate, sizeof(rate)) < 0) - return -1; + if (present[TCA_NETEM_RATE]) { + if (rate64 >= (1ULL << 32)) { + if (addattr_l(n, 1024, + TCA_NETEM_RATE64, &rate64, sizeof(rate64)) < 0) + return -1; + rate.rate = ~0U; + } else { + rate.rate = rate64; + } + if (addattr_l(n, 1024, TCA_NETEM_RATE, &rate, sizeof(rate)) < 0) + return -1; + } if (dist_data) { if (addattr_l(n, MAX_DIST * sizeof(dist_data[0]), @@ -522,6 +532,7 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) struct tc_netem_qopt qopt; const struct tc_netem_rate *rate = NULL; int len = RTA_PAYLOAD(opt) - sizeof(qopt); + __u64 rate64 = 0; SPRINT_BUF(b1); if (opt == NULL) @@ -572,6 +583,11 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) return -1; ecn = RTA_DATA(tb[TCA_NETEM_ECN]); } + if (tb[TCA_NETEM_RATE64]) { + if (RTA_PAYLOAD(tb[TCA_NETEM_RATE64]) < sizeof(rate64)) + return -1; + rate64 = rta_getattr_u64(tb[TCA_NETEM_RATE64]); + } } fprintf(f, "limit %d", qopt.limit); @@ -632,7 +648,10 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) } if (rate && rate->rate) { - fprintf(f, " rate %s", sprint_rate(rate->rate, b1)); + if (rate64) + fprintf(f, " rate %s", sprint_rate(rate64, b1)); + else + fprintf(f, " rate %s", sprint_rate(rate->rate, b1)); if (rate->packet_overhead) fprintf(f, " packetoverhead %d", rate->packet_overhead); if (rate->cell_size) diff --git a/tc/q_pie.c b/tc/q_pie.c new file mode 100644 index 00000000..193b05de --- /dev/null +++ b/tc/q_pie.c @@ -0,0 +1,218 @@ +/* Copyright (C) 2013 Cisco Systems, Inc, 2013. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Vijay Subramanian + * Author: Mythili Prabhu + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" + +static void explain(void) +{ + fprintf(stderr, "Usage: ... pie [ limit PACKETS ][ target TIME us]\n"); + fprintf(stderr, " [ tupdate TIME us][ alpha ALPHA ]"); + fprintf(stderr, "[beta BETA ][bytemode | nobytemode][ecn | noecn ]\n"); +} + +#define ALPHA_MAX 32 +#define ALPHA_MIN 0 +#define BETA_MAX 32 +#define BETA_MIN 0 + +static int pie_parse_opt(struct qdisc_util *qu, int argc, char **argv, + struct nlmsghdr *n) +{ + unsigned int limit = 0; + unsigned int target = 0; + unsigned int tupdate = 0; + unsigned int alpha = 0; + unsigned int beta = 0; + int ecn = -1; + int bytemode = -1; + struct rtattr *tail; + + while (argc > 0) { + if (strcmp(*argv, "limit") == 0) { + NEXT_ARG(); + if (get_unsigned(&limit, *argv, 0)) { + fprintf(stderr, "Illegal \"limit\"\n"); + return -1; + } + } else if (strcmp(*argv, "target") == 0) { + NEXT_ARG(); + if (get_time(&target, *argv)) { + fprintf(stderr, "Illegal \"target\"\n"); + return -1; + } + } else if (strcmp(*argv, "tupdate") == 0) { + NEXT_ARG(); + if (get_time(&tupdate, *argv)) { + fprintf(stderr, "Illegal \"tupdate\"\n"); + return -1; + } + } else if (strcmp(*argv, "alpha") == 0) { + NEXT_ARG(); + if (get_unsigned(&alpha, *argv, 0) || + (alpha > ALPHA_MAX) || (alpha < ALPHA_MIN)) { + fprintf(stderr, "Illegal \"alpha\"\n"); + return -1; + } + } else if (strcmp(*argv, "beta") == 0) { + NEXT_ARG(); + if (get_unsigned(&beta, *argv, 0) || + (beta > BETA_MAX) || (beta < BETA_MIN)) { + fprintf(stderr, "Illegal \"beta\"\n"); + return -1; + } + } else if (strcmp(*argv, "ecn") == 0) { + ecn = 1; + } else if (strcmp(*argv, "noecn") == 0) { + ecn = 0; + } else if (strcmp(*argv, "bytemode") == 0) { + bytemode = 1; + } else if (strcmp(*argv, "nobytemode") == 0) { + bytemode = 0; + } else if (strcmp(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--; + argv++; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + if (limit) + addattr_l(n, 1024, TCA_PIE_LIMIT, &limit, sizeof(limit)); + if (tupdate) + addattr_l(n, 1024, TCA_PIE_TUPDATE, &tupdate, sizeof(tupdate)); + if (target) + addattr_l(n, 1024, TCA_PIE_TARGET, &target, sizeof(target)); + if (alpha) + addattr_l(n, 1024, TCA_PIE_ALPHA, &alpha, sizeof(alpha)); + if (beta) + addattr_l(n, 1024, TCA_PIE_BETA, &beta, sizeof(beta)); + if (ecn != -1) + addattr_l(n, 1024, TCA_PIE_ECN, &ecn, sizeof(ecn)); + if (bytemode != -1) + addattr_l(n, 1024, TCA_PIE_BYTEMODE, &bytemode, + sizeof(bytemode)); + + tail->rta_len = (void *)NLMSG_TAIL(n) - (void *)tail; + return 0; +} + +static int pie_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_PIE_MAX + 1]; + unsigned int limit; + unsigned int tupdate; + unsigned int target; + unsigned int alpha; + unsigned int beta; + unsigned ecn; + unsigned bytemode; + SPRINT_BUF(b1); + + if (opt == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_PIE_MAX, opt); + + if (tb[TCA_PIE_LIMIT] && + RTA_PAYLOAD(tb[TCA_PIE_LIMIT]) >= sizeof(__u32)) { + limit = rta_getattr_u32(tb[TCA_PIE_LIMIT]); + fprintf(f, "limit %up ", limit); + } + if (tb[TCA_PIE_TARGET] && + RTA_PAYLOAD(tb[TCA_PIE_TARGET]) >= sizeof(__u32)) { + target = rta_getattr_u32(tb[TCA_PIE_TARGET]); + fprintf(f, "target %s ", sprint_time(target, b1)); + } + if (tb[TCA_PIE_TUPDATE] && + RTA_PAYLOAD(tb[TCA_PIE_TUPDATE]) >= sizeof(__u32)) { + tupdate = rta_getattr_u32(tb[TCA_PIE_TUPDATE]); + fprintf(f, "tupdate %s ", sprint_time(tupdate, b1)); + } + if (tb[TCA_PIE_ALPHA] && + RTA_PAYLOAD(tb[TCA_PIE_ALPHA]) >= sizeof(__u32)) { + alpha = rta_getattr_u32(tb[TCA_PIE_ALPHA]); + fprintf(f, "alpha %u ", alpha); + } + if (tb[TCA_PIE_BETA] && + RTA_PAYLOAD(tb[TCA_PIE_BETA]) >= sizeof(__u32)) { + beta = rta_getattr_u32(tb[TCA_PIE_BETA]); + fprintf(f, "beta %u ", beta); + } + + if (tb[TCA_PIE_ECN] && RTA_PAYLOAD(tb[TCA_PIE_ECN]) >= sizeof(__u32)) { + ecn = rta_getattr_u32(tb[TCA_PIE_ECN]); + if (ecn) + fprintf(f, "ecn "); + } + + if (tb[TCA_PIE_BYTEMODE] && + RTA_PAYLOAD(tb[TCA_PIE_BYTEMODE]) >= sizeof(__u32)) { + bytemode = rta_getattr_u32(tb[TCA_PIE_BYTEMODE]); + if (bytemode) + fprintf(f, "bytemode "); + } + + return 0; +} + +static int pie_print_xstats(struct qdisc_util *qu, FILE *f, + struct rtattr *xstats) +{ + struct tc_pie_xstats *st; + + if (xstats == NULL) + return 0; + + if (RTA_PAYLOAD(xstats) < sizeof(*st)) + return -1; + + st = RTA_DATA(xstats); + /*prob is returned as a fracion of maximum integer value */ + fprintf(f, "prob %f delay %uus avg_dq_rate %u\n", + (double)st->prob / (double)0xffffffff, st->delay, + st->avg_dq_rate); + fprintf(f, "pkts_in %u overlimit %u dropped %u maxq %u ecn_mark %u\n", + st->packets_in, st->overlimit, st->dropped, st->maxq, + st->ecn_mark); + return 0; + +} + +struct qdisc_util pie_qdisc_util = { + .id = "pie", + .parse_qopt = pie_parse_opt, + .print_qopt = pie_print_opt, + .print_xstats = pie_print_xstats, +}; diff --git a/tc/q_tbf.c b/tc/q_tbf.c index 34784a41..f3022b65 100644 --- a/tc/q_tbf.c +++ b/tc/q_tbf.c @@ -47,6 +47,7 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl unsigned short overhead=0; unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */ struct rtattr *tail; + __u64 rate64 = 0, prate64 = 0; memset(&opt, 0, sizeof(opt)); @@ -121,22 +122,22 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl ok++; } else if (strcmp(*argv, "rate") == 0) { NEXT_ARG(); - if (opt.rate.rate) { + if (rate64) { fprintf(stderr, "tbf: duplicate \"rate\" specification\n"); return -1; } - if (get_rate(&opt.rate.rate, *argv)) { + if (get_rate64(&rate64, *argv)) { explain1("rate", *argv); return -1; } ok++; } else if (matches(*argv, "peakrate") == 0) { NEXT_ARG(); - if (opt.peakrate.rate) { + if (prate64) { fprintf(stderr, "tbf: duplicate \"peakrate\" specification\n"); return -1; } - if (get_rate(&opt.peakrate.rate, *argv)) { + if (get_rate64(&prate64, *argv)) { explain1("peakrate", *argv); return -1; } @@ -172,7 +173,7 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl * one go rather than reveal one more problem when a * previous one has been fixed. */ - if (opt.rate.rate == 0) { + if (rate64 == 0) { fprintf(stderr, "tbf: the \"rate\" parameter is mandatory.\n"); verdict = -1; } @@ -180,7 +181,7 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl fprintf(stderr, "tbf: the \"burst\" parameter is mandatory.\n"); verdict = -1; } - if (opt.peakrate.rate) { + if (prate64) { if (!mtu) { fprintf(stderr, "tbf: when \"peakrate\" is specified, \"mtu\" must also be specified.\n"); verdict = -1; @@ -197,10 +198,13 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl return verdict; } + opt.rate.rate = (rate64 >= (1ULL << 32)) ? ~0U : rate64; + opt.peakrate.rate = (prate64 >= (1ULL << 32)) ? ~0U : prate64; + if (opt.limit == 0) { - double lim = opt.rate.rate*(double)latency/TIME_UNITS_PER_SEC + buffer; - if (opt.peakrate.rate) { - double lim2 = opt.peakrate.rate*(double)latency/TIME_UNITS_PER_SEC + mtu; + double lim = rate64*(double)latency/TIME_UNITS_PER_SEC + buffer; + if (prate64) { + double lim2 = prate64*(double)latency/TIME_UNITS_PER_SEC + mtu; if (lim2 < lim) lim = lim2; } @@ -228,20 +232,28 @@ static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl tail = NLMSG_TAIL(n); addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); addattr_l(n, 2024, TCA_TBF_PARMS, &opt, sizeof(opt)); + addattr_l(n, 2124, TCA_TBF_BURST, &buffer, sizeof(buffer)); + if (rate64 >= (1ULL << 32)) + addattr_l(n, 2124, TCA_TBF_RATE64, &rate64, sizeof(rate64)); addattr_l(n, 3024, TCA_TBF_RTAB, rtab, 1024); - if (opt.peakrate.rate) + if (opt.peakrate.rate) { + if (prate64 >= (1ULL << 32)) + addattr_l(n, 3124, TCA_TBF_PRATE64, &prate64, sizeof(prate64)); + addattr_l(n, 3224, TCA_TBF_PBURST, &mtu, sizeof(mtu)); addattr_l(n, 4096, TCA_TBF_PTAB, ptab, 1024); + } tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; return 0; } static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { - struct rtattr *tb[TCA_TBF_PTAB+1]; + struct rtattr *tb[TCA_TBF_MAX+1]; struct tc_tbf_qopt *qopt; unsigned int linklayer; double buffer, mtu; double latency; + __u64 rate64 = 0, prate64 = 0; SPRINT_BUF(b1); SPRINT_BUF(b2); SPRINT_BUF(b3); @@ -249,7 +261,7 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (opt == NULL) return 0; - parse_rtattr_nested(tb, TCA_TBF_PTAB, opt); + parse_rtattr_nested(tb, TCA_TBF_MAX, opt); if (tb[TCA_TBF_PARMS] == NULL) return -1; @@ -257,8 +269,12 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) qopt = RTA_DATA(tb[TCA_TBF_PARMS]); if (RTA_PAYLOAD(tb[TCA_TBF_PARMS]) < sizeof(*qopt)) return -1; - fprintf(f, "rate %s ", sprint_rate(qopt->rate.rate, b1)); - buffer = tc_calc_xmitsize(qopt->rate.rate, qopt->buffer); + rate64 = qopt->rate.rate; + if (tb[TCA_TBF_RATE64] && + RTA_PAYLOAD(tb[TCA_TBF_RATE64]) >= sizeof(rate64)) + rate64 = rta_getattr_u64(tb[TCA_TBF_RATE64]); + fprintf(f, "rate %s ", sprint_rate(rate64, b1)); + buffer = tc_calc_xmitsize(rate64, qopt->buffer); if (show_details) { fprintf(f, "burst %s/%u mpu %s ", sprint_size(buffer, b1), 1<rate.cell_log, sprint_size(qopt->rate.mpu, b2)); @@ -267,10 +283,14 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) } if (show_raw) fprintf(f, "[%08x] ", qopt->buffer); - if (qopt->peakrate.rate) { - fprintf(f, "peakrate %s ", sprint_rate(qopt->peakrate.rate, b1)); + prate64 = qopt->peakrate.rate; + if (tb[TCA_TBF_PRATE64] && + RTA_PAYLOAD(tb[TCA_TBF_PRATE64]) >= sizeof(prate64)) + prate64 = rta_getattr_u64(tb[TCA_TBF_PRATE64]); + if (prate64) { + fprintf(f, "peakrate %s ", sprint_rate(prate64, b1)); if (qopt->mtu || qopt->peakrate.mpu) { - mtu = tc_calc_xmitsize(qopt->peakrate.rate, qopt->mtu); + mtu = tc_calc_xmitsize(prate64, qopt->mtu); if (show_details) { fprintf(f, "mtu %s/%u mpu %s ", sprint_size(mtu, b1), 1<peakrate.cell_log, sprint_size(qopt->peakrate.mpu, b2)); @@ -285,9 +305,9 @@ static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (show_raw) fprintf(f, "limit %s ", sprint_size(qopt->limit, b1)); - latency = TIME_UNITS_PER_SEC*(qopt->limit/(double)qopt->rate.rate) - tc_core_tick2time(qopt->buffer); - if (qopt->peakrate.rate) { - double lat2 = TIME_UNITS_PER_SEC*(qopt->limit/(double)qopt->peakrate.rate) - tc_core_tick2time(qopt->mtu); + latency = TIME_UNITS_PER_SEC*(qopt->limit/(double)rate64) - tc_core_tick2time(qopt->buffer); + if (prate64) { + double lat2 = TIME_UNITS_PER_SEC*(qopt->limit/(double)prate64) - tc_core_tick2time(qopt->mtu); if (lat2 > latency) latency = lat2; } diff --git a/tc/tc_core.c b/tc/tc_core.c index a5243370..46eaefb5 100644 --- a/tc/tc_core.c +++ b/tc/tc_core.c @@ -56,12 +56,12 @@ unsigned tc_core_ktime2time(unsigned ktime) return ktime / clock_factor; } -unsigned tc_calc_xmittime(unsigned rate, unsigned size) +unsigned tc_calc_xmittime(__u64 rate, unsigned size) { - return tc_core_time2tick(TIME_UNITS_PER_SEC*((double)size/rate)); + return tc_core_time2tick(TIME_UNITS_PER_SEC*((double)size/(double)rate)); } -unsigned tc_calc_xmitsize(unsigned rate, unsigned ticks) +unsigned tc_calc_xmitsize(__u64 rate, unsigned ticks) { return ((double)rate*tc_core_tick2time(ticks))/TIME_UNITS_PER_SEC; } diff --git a/tc/tc_core.h b/tc/tc_core.h index 5a693bad..8a63b79c 100644 --- a/tc/tc_core.h +++ b/tc/tc_core.h @@ -18,8 +18,8 @@ unsigned tc_core_time2tick(unsigned time); unsigned tc_core_tick2time(unsigned tick); unsigned tc_core_time2ktime(unsigned time); unsigned tc_core_ktime2time(unsigned ktime); -unsigned tc_calc_xmittime(unsigned rate, unsigned size); -unsigned tc_calc_xmitsize(unsigned rate, unsigned ticks); +unsigned tc_calc_xmittime(__u64 rate, unsigned size); +unsigned tc_calc_xmitsize(__u64 rate, unsigned ticks); int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab, int cell_log, unsigned mtu, enum link_layer link_layer); int tc_calc_size_table(struct tc_sizespec *s, __u16 **stab); diff --git a/tc/tc_util.c b/tc/tc_util.c index 67f69486..15fb0534 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -171,6 +171,31 @@ int get_rate(unsigned *rate, const char *str) return 0; } +int get_rate64(__u64 *rate, const char *str) +{ + char *p; + double bps = strtod(str, &p); + const struct rate_suffix *s; + + if (p == str) + return -1; + + for (s = suffixes; s->name; ++s) { + if (strcasecmp(s->name, p) == 0) { + bps *= s->scale; + p += strlen(p); + break; + } + } + + if (*p) + return -1; /* unknown suffix */ + + bps /= 8; /* -> bytes per second */ + *rate = bps; + return 0; +} + void print_rate(char *buf, int len, __u64 rate) { extern int use_iec; diff --git a/tc/tc_util.h b/tc/tc_util.h index 7c3709f5..d4183679 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -58,6 +58,7 @@ extern struct filter_util *get_filter_kind(const char *str); extern int get_qdisc_handle(__u32 *h, const char *str); extern int get_rate(unsigned *rate, const char *str); +extern int get_rate64(__u64 *rate, const char *str); extern int get_size(unsigned *size, const char *str); extern int get_size_and_cell(unsigned *size, int *cell_log, char *str); extern int get_time(unsigned *time, const char *str);