diff --git a/bridge/br_common.h b/bridge/br_common.h index 12fce3ef..169a162d 100644 --- a/bridge/br_common.h +++ b/bridge/br_common.h @@ -16,4 +16,5 @@ extern int preferred_family; extern int show_stats; extern int show_details; extern int timestamp; +extern int compress_vlans; extern struct rtnl_handle rth; diff --git a/bridge/bridge.c b/bridge/bridge.c index 5fcc552b..88469ca2 100644 --- a/bridge/bridge.c +++ b/bridge/bridge.c @@ -21,6 +21,7 @@ int resolve_hosts; int oneline = 0; int show_stats; int show_details; +int compress_vlans; int timestamp; char * _SL_ = NULL; @@ -32,7 +33,8 @@ static void usage(void) "Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n" "where OBJECT := { link | fdb | mdb | vlan | monitor }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n" -" -o[neline] | -t[imestamp] | -n[etns] name }\n"); +" -o[neline] | -t[imestamp] | -n[etns] name |\n" +" -c[ompressvlans] }\n"); exit(-1); } @@ -117,6 +119,8 @@ main(int argc, char **argv) NEXT_ARG(); if (netns_switch(argv[1])) exit(-1); + } else if (matches(opt, "-compressvlans") == 0) { + ++compress_vlans; } else { fprintf(stderr, "Option \"%s\" is unknown, try \"bridge help\".\n", opt); exit(-1); diff --git a/bridge/vlan.c b/bridge/vlan.c index 3bd7b0db..9f6c84ee 100644 --- a/bridge/vlan.c +++ b/bridge/vlan.c @@ -32,6 +32,7 @@ static int vlan_modify(int cmd, int argc, char **argv) } req; char *d = NULL; short vid = -1; + short vid_end = -1; struct rtattr *afspec; struct bridge_vlan_info vinfo; unsigned short flags = 0; @@ -49,8 +50,18 @@ static int vlan_modify(int cmd, int argc, char **argv) NEXT_ARG(); d = *argv; } else if (strcmp(*argv, "vid") == 0) { + char *p; NEXT_ARG(); - vid = atoi(*argv); + p = strchr(*argv, '-'); + if (p) { + *p = '\0'; + p++; + vid = atoi(*argv); + vid_end = atoi(p); + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; + } else { + vid = atoi(*argv); + } } else if (strcmp(*argv, "self") == 0) { flags |= BRIDGE_FLAGS_SELF; } else if (strcmp(*argv, "master") == 0) { @@ -83,15 +94,40 @@ static int vlan_modify(int cmd, int argc, char **argv) return -1; } - vinfo.vid = vid; + if (vinfo.flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (vid_end == -1 || vid_end >= 4096 || vid >= vid_end) { + fprintf(stderr, "Invalid VLAN range \"%hu-%hu\"\n", + vid, vid_end); + return -1; + } + if (vinfo.flags & BRIDGE_VLAN_INFO_PVID) { + fprintf(stderr, + "pvid cannot be configured for a vlan range\n"); + return -1; + } + } afspec = addattr_nest(&req.n, sizeof(req), IFLA_AF_SPEC); if (flags) addattr16(&req.n, sizeof(req), IFLA_BRIDGE_FLAGS, flags); - addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, - sizeof(vinfo)); + vinfo.vid = vid; + if (vid_end != -1) { + /* send vlan range start */ + addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, + sizeof(vinfo)); + vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; + + /* Now send the vlan range end */ + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; + vinfo.vid = vid_end; + addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, + sizeof(vinfo)); + } else { + addattr_l(&req.n, sizeof(req), IFLA_BRIDGE_VLAN_INFO, &vinfo, + sizeof(vinfo)); + } addattr_nest_end(&req.n, afspec); @@ -146,7 +182,12 @@ static int print_vlan(const struct sockaddr_nl *who, continue; vinfo = RTA_DATA(i); - fprintf(fp, "\t %hu", vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) + fprintf(fp, "-%hu", vinfo->vid); + else + fprintf(fp, "\t %hu", vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) + continue; if (vinfo->flags & BRIDGE_VLAN_INFO_PVID) fprintf(fp, " PVID"); if (vinfo->flags & BRIDGE_VLAN_INFO_UNTAGGED) @@ -182,7 +223,9 @@ static int vlan_show(int argc, char **argv) } if (rtnl_wilddump_req_filter(&rth, PF_BRIDGE, RTM_GETLINK, - RTEXT_FILTER_BRVLAN) < 0) { + (compress_vlans ? + RTEXT_FILTER_BRVLAN_COMPRESSED : + RTEXT_FILTER_BRVLAN)) < 0) { perror("Cannont send dump request"); exit(1); } diff --git a/include/SNAPSHOT.h b/include/SNAPSHOT.h index 46e6d4ae..35bda66a 100644 --- a/include/SNAPSHOT.h +++ b/include/SNAPSHOT.h @@ -1 +1 @@ -static const char SNAPSHOT[] = "141224"; +static const char SNAPSHOT[] = "150210"; diff --git a/include/linux/tc_act/tc_bpf.h b/include/linux/tc_act/tc_bpf.h new file mode 100644 index 00000000..5288bd77 --- /dev/null +++ b/include/linux/tc_act/tc_bpf.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_BPF_H +#define __LINUX_TC_BPF_H + +#include + +#define TCA_ACT_BPF 13 + +struct tc_act_bpf { + tc_gen; +}; + +enum { + TCA_ACT_BPF_UNSPEC, + TCA_ACT_BPF_TM, + TCA_ACT_BPF_PARMS, + TCA_ACT_BPF_OPS_LEN, + TCA_ACT_BPF_OPS, + __TCA_ACT_BPF_MAX, +}; +#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) + +#endif diff --git a/include/namespace.h b/include/namespace.h index b8c5cad6..a2ac7dcc 100644 --- a/include/namespace.h +++ b/include/namespace.h @@ -3,6 +3,7 @@ #include #include +#include #include #define NETNS_RUN_DIR "/var/run/netns" @@ -30,7 +31,7 @@ #endif #ifndef HAVE_SETNS -static int setns(int fd, int nstype) +static inline int setns(int fd, int nstype) { #ifdef __NR_setns return syscall(__NR_setns, fd, nstype); @@ -43,5 +44,11 @@ static int setns(int fd, int nstype) extern int netns_switch(char *netns); extern int netns_get_fd(const char *netns); +extern int netns_foreach(int (*func)(char *nsname, void *arg), void *arg); + +struct netns_func { + int (*func)(char *nsname, void *arg); + void *arg; +}; #endif /* __NAMESPACE_H__ */ diff --git a/include/utils.h b/include/utils.h index e1fe7cfc..3da22837 100644 --- a/include/utils.h +++ b/include/utils.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "libnetlink.h" #include "ll_map.h" @@ -23,6 +24,7 @@ extern int timestamp_short; extern char * _SL_; extern int max_flush_loops; extern int batch_mode; +extern bool do_all; #ifndef IPPROTO_ESP #define IPPROTO_ESP 50 @@ -162,4 +164,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **name, char **type, char **link, char **dev, int *group, int *index); +extern int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, + bool show_label); + #endif /* __UTILS_H__ */ diff --git a/ip/ip.c b/ip/ip.c index 850a0017..da16b15f 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -36,6 +36,7 @@ char * _SL_ = NULL; int force = 0; int max_flush_loops = 10; int batch_mode = 0; +bool do_all = false; struct rtnl_handle rth = { .fd = -1 }; @@ -55,7 +56,7 @@ static void usage(void) " -4 | -6 | -I | -D | -B | -0 |\n" " -l[oops] { maximum-addr-flush-attempts } |\n" " -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n" -" -rc[vbuf] [size] | -n[etns] name }\n"); +" -rc[vbuf] [size] | -n[etns] name | -a[ll] }\n"); exit(-1); } @@ -270,6 +271,8 @@ int main(int argc, char **argv) NEXT_ARG(); if (netns_switch(argv[1])) exit(-1); + } else if (matches(opt, "-all") == 0) { + do_all = true; } else { fprintf(stderr, "Option \"%s\" is unknown, try \"ip -help\".\n", opt); exit(-1); diff --git a/ip/ipaddress.c b/ip/ipaddress.c index d5e863dd..3730424a 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -970,7 +970,8 @@ struct nlmsg_chain struct nlmsg_list *tail; }; -static int print_selected_addrinfo(int ifindex, struct nlmsg_list *ainfo, FILE *fp) +static int print_selected_addrinfo(struct ifinfomsg *ifi, + struct nlmsg_list *ainfo, FILE *fp) { for ( ;ainfo ; ainfo = ainfo->next) { struct nlmsghdr *n = &ainfo->h; @@ -982,10 +983,13 @@ static int print_selected_addrinfo(int ifindex, struct nlmsg_list *ainfo, FILE * if (n->nlmsg_len < NLMSG_LENGTH(sizeof(ifa))) return -1; - if (ifa->ifa_index != ifindex || + if (ifa->ifa_index != ifi->ifi_index || (filter.family && filter.family != ifa->ifa_family)) continue; + if (filter.up && !(ifi->ifi_flags&IFF_UP)) + continue; + print_addrinfo(NULL, n, fp); } return 0; @@ -1446,7 +1450,7 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) if (no_link || (res = print_linkinfo(NULL, &l->h, stdout)) >= 0) { struct ifinfomsg *ifi = NLMSG_DATA(&l->h); if (filter.family != AF_PACKET) - print_selected_addrinfo(ifi->ifi_index, + print_selected_addrinfo(ifi, ainfo.head, stdout); if (res > 0 && !do_link && show_stats) print_link_stats(stdout, &l->h); diff --git a/ip/iplink_can.c b/ip/iplink_can.c index fb503321..f1b089df 100644 --- a/ip/iplink_can.c +++ b/ip/iplink_can.c @@ -37,6 +37,7 @@ static void print_usage(FILE *f) "\t[ one-shot { on | off } ]\n" "\t[ berr-reporting { on | off } ]\n" "\t[ fd { on | off } ]\n" + "\t[ fd-non-iso { on | off } ]\n" "\t[ presume-ack { on | off } ]\n" "\n" "\t[ restart-ms TIME-MS ]\n" @@ -100,6 +101,7 @@ static void print_ctrlmode(FILE *f, __u32 cm) _PF(CAN_CTRLMODE_ONE_SHOT, "ONE-SHOT"); _PF(CAN_CTRLMODE_BERR_REPORTING, "BERR-REPORTING"); _PF(CAN_CTRLMODE_FD, "FD"); + _PF(CAN_CTRLMODE_FD_NON_ISO, "FD-NON-ISO"); _PF(CAN_CTRLMODE_PRESUME_ACK, "PRESUME-ACK"); #undef _PF if (cm) @@ -203,6 +205,10 @@ static int can_parse_opt(struct link_util *lu, int argc, char **argv, NEXT_ARG(); set_ctrlmode("fd", *argv, &cm, CAN_CTRLMODE_FD); + } else if (matches(*argv, "fd-non-iso") == 0) { + NEXT_ARG(); + set_ctrlmode("fd-non-iso", *argv, &cm, + CAN_CTRLMODE_FD_NON_ISO); } else if (matches(*argv, "presume-ack") == 0) { NEXT_ARG(); set_ctrlmode("presume-ack", *argv, &cm, diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c index 9cc3ec38..473ff97a 100644 --- a/ip/iplink_vxlan.c +++ b/ip/iplink_vxlan.c @@ -30,6 +30,7 @@ static void print_explain(FILE *f) fprintf(f, " [ [no]l2miss ] [ [no]l3miss ]\n"); fprintf(f, " [ ageing SECONDS ] [ maxaddress NUMBER ]\n"); fprintf(f, " [ [no]udpcsum ] [ [no]udp6zerocsumtx ] [ [no]udp6zerocsumrx ]\n"); + fprintf(f, " [ gbp ]\n"); fprintf(f, "\n"); fprintf(f, "Where: VNI := 0-16777215\n"); fprintf(f, " ADDR := { IP_ADDRESS | any }\n"); @@ -68,6 +69,7 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, __u8 udpcsum = 0; __u8 udp6zerocsumtx = 0; __u8 udp6zerocsumrx = 0; + __u8 gbp = 0; int dst_port_set = 0; struct ifla_vxlan_port_range range = { 0, 0 }; @@ -197,6 +199,8 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, udp6zerocsumrx = 1; } else if (!matches(*argv, "noudp6zerocsumrx")) { udp6zerocsumrx = 0; + } else if (!matches(*argv, "gbp")) { + gbp = 1; } else if (matches(*argv, "help") == 0) { explain(); return -1; @@ -268,6 +272,10 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, if (dstport) addattr16(n, 1024, IFLA_VXLAN_PORT, htons(dstport)); + if (gbp) + addattr_l(n, 1024, IFLA_VXLAN_GBP, NULL, 0); + + return 0; } @@ -398,6 +406,9 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (tb[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] && rta_getattr_u8(tb[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) fputs("udp6zerocsumrx ", f); + + if (tb[IFLA_VXLAN_GBP]) + fputs("gbp ", f); } static void vxlan_print_help(struct link_util *lu, int argc, char **argv, diff --git a/ip/ipnetns.c b/ip/ipnetns.c index 123318eb..e4038ea7 100644 --- a/ip/ipnetns.c +++ b/ip/ipnetns.c @@ -23,10 +23,10 @@ static int usage(void) { fprintf(stderr, "Usage: ip netns list\n"); fprintf(stderr, " ip netns add NAME\n"); - fprintf(stderr, " ip netns delete NAME\n"); + fprintf(stderr, " ip [-all] netns delete [NAME]\n"); fprintf(stderr, " ip netns identify [PID]\n"); fprintf(stderr, " ip netns pids NAME\n"); - fprintf(stderr, " ip netns exec NAME cmd ...\n"); + fprintf(stderr, " ip [-all] netns exec [NAME] cmd ...\n"); fprintf(stderr, " ip netns monitor\n"); exit(-1); } @@ -51,29 +51,10 @@ static int netns_list(int argc, char **argv) return 0; } -static int netns_exec(int argc, char **argv) +static int cmd_exec(const char *cmd, char **argv, bool do_fork) { - /* Setup the proper environment for apps that are not netns - * aware, and execute a program in that environment. - */ - const char *cmd; - - if (argc < 1) { - fprintf(stderr, "No netns name specified\n"); - return -1; - } - if (argc < 2) { - fprintf(stderr, "No command specified\n"); - return -1; - } - cmd = argv[1]; - - if (netns_switch(argv[0])) - return -1; - fflush(stdout); - - if (batch_mode) { + if (do_fork) { int status; pid_t pid; @@ -91,23 +72,56 @@ static int netns_exec(int argc, char **argv) } if (WIFEXITED(status)) { - /* ip must return the status of the child, - * but do_cmd() will add a minus to this, - * so let's add another one here to cancel it. - */ - return -WEXITSTATUS(status); + return WEXITSTATUS(status); } exit(1); } } - if (execvp(cmd, argv + 1) < 0) + if (execvp(cmd, argv) < 0) fprintf(stderr, "exec of \"%s\" failed: %s\n", - cmd, strerror(errno)); + cmd, strerror(errno)); _exit(1); } +static int on_netns_exec(char *nsname, void *arg) +{ + char **argv = arg; + cmd_exec(argv[1], argv + 1, true); + return 0; +} + +static int netns_exec(int argc, char **argv) +{ + /* Setup the proper environment for apps that are not netns + * aware, and execute a program in that environment. + */ + const char *cmd; + + if (argc < 1 && !do_all) { + fprintf(stderr, "No netns name specified\n"); + return -1; + } + if ((argc < 2 && !do_all) || (argc < 1 && do_all)) { + fprintf(stderr, "No command specified\n"); + return -1; + } + + if (do_all) + return do_each_netns(on_netns_exec, --argv, 1); + + if (netns_switch(argv[0])) + return -1; + + /* ip must return the status of the child, + * but do_cmd() will add a minus to this, + * so let's add another one here to cancel it. + */ + cmd = argv[1]; + return -cmd_exec(cmd, argv + 1, !!batch_mode); +} + static int is_pid(const char *str) { int ch; @@ -245,18 +259,11 @@ static int netns_identify(int argc, char **argv) } -static int netns_delete(int argc, char **argv) +static int on_netns_del(char *nsname, void *arg) { - const char *name; char netns_path[MAXPATHLEN]; - if (argc < 1) { - fprintf(stderr, "No netns name specified\n"); - return -1; - } - - name = argv[0]; - snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name); + snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, nsname); umount2(netns_path, MNT_DETACH); if (unlink(netns_path) < 0) { fprintf(stderr, "Cannot remove namespace file \"%s\": %s\n", @@ -266,6 +273,19 @@ static int netns_delete(int argc, char **argv) return 0; } +static int netns_delete(int argc, char **argv) +{ + if (argc < 1 && !do_all) { + fprintf(stderr, "No netns name specified\n"); + return -1; + } + + if (do_all) + return netns_foreach(on_netns_del, NULL); + + return on_netns_del(argv[0], NULL); +} + static int create_netns_dir(void) { /* Create the base netns directory if it doesn't exist */ diff --git a/ip/link_gre.c b/ip/link_gre.c index 47b64cb8..1d783876 100644 --- a/ip/link_gre.c +++ b/ip/link_gre.c @@ -31,7 +31,7 @@ static void print_usage(FILE *f) fprintf(f, " [ ttl TTL ] [ tos TOS ] [ [no]pmtudisc ] [ dev PHYS_DEV ]\n"); fprintf(f, " [ noencap ] [ encap { fou | gue | none } ]\n"); fprintf(f, " [ encap-sport PORT ] [ encap-dport PORT ]\n"); - fprintf(f, " [ [no]encap-csum ] [ [no]encap-csum6 ]\n"); + fprintf(f, " [ [no]encap-csum ] [ [no]encap-csum6 ] [ [no]encap-remcsum ]\n"); fprintf(f, "\n"); fprintf(f, "Where: NAME := STRING\n"); fprintf(f, " ADDR := { IP_ADDRESS | any }\n"); @@ -287,6 +287,10 @@ get_failed: encapflags |= TUNNEL_ENCAP_FLAG_CSUM6; } else if (strcmp(*argv, "noencap-udp6-csum") == 0) { encapflags |= ~TUNNEL_ENCAP_FLAG_CSUM6; + } else if (strcmp(*argv, "encap-remcsum") == 0) { + encapflags |= TUNNEL_ENCAP_FLAG_REMCSUM; + } else if (strcmp(*argv, "noencap-remcsum") == 0) { + encapflags |= ~TUNNEL_ENCAP_FLAG_REMCSUM; } else usage(); argc--; argv++; @@ -445,6 +449,11 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) fputs("encap-csum6 ", f); else fputs("noencap-csum6 ", f); + + if (flags & TUNNEL_ENCAP_FLAG_REMCSUM) + fputs("encap-remcsum ", f); + else + fputs("noencap-remcsum ", f); } } diff --git a/ip/link_iptnl.c b/ip/link_iptnl.c index 94871178..cab174f9 100644 --- a/ip/link_iptnl.c +++ b/ip/link_iptnl.c @@ -31,7 +31,7 @@ static void print_usage(FILE *f, int sit) fprintf(f, " [ 6rd-prefix ADDR ] [ 6rd-relay_prefix ADDR ] [ 6rd-reset ]\n"); fprintf(f, " [ noencap ] [ encap { fou | gue | none } ]\n"); fprintf(f, " [ encap-sport PORT ] [ encap-dport PORT ]\n"); - fprintf(f, " [ [no]encap-csum ] [ [no]encap-csum6 ]\n"); + fprintf(f, " [ [no]encap-csum ] [ [no]encap-csum6 ] [ [no]encap-remcsum ]\n"); if (sit) { fprintf(f, " [ mode { ip6ip | ipip | any } ]\n"); fprintf(f, " [ isatap ]\n"); @@ -256,6 +256,10 @@ get_failed: encapflags |= TUNNEL_ENCAP_FLAG_CSUM6; } else if (strcmp(*argv, "noencap-udp6-csum") == 0) { encapflags &= ~TUNNEL_ENCAP_FLAG_CSUM6; + } else if (strcmp(*argv, "encap-remcsum") == 0) { + encapflags |= TUNNEL_ENCAP_FLAG_REMCSUM; + } else if (strcmp(*argv, "noencap-remcsum") == 0) { + encapflags &= ~TUNNEL_ENCAP_FLAG_REMCSUM; } else if (strcmp(*argv, "6rd-prefix") == 0) { inet_prefix prefix; NEXT_ARG(); @@ -438,6 +442,11 @@ static void iptunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[ fputs("encap-csum6 ", f); else fputs("noencap-csum6 ", f); + + if (flags & TUNNEL_ENCAP_FLAG_REMCSUM) + fputs("encap-remcsum ", f); + else + fputs("noencap-remcsum ", f); } } diff --git a/lib/namespace.c b/lib/namespace.c index 65c1e3d7..c03a103a 100644 --- a/lib/namespace.c +++ b/lib/namespace.c @@ -99,3 +99,25 @@ int netns_get_fd(const char *name) } return open(path, O_RDONLY); } + +int netns_foreach(int (*func)(char *nsname, void *arg), void *arg) +{ + DIR *dir; + struct dirent *entry; + + dir = opendir(NETNS_RUN_DIR); + if (!dir) + return -1; + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0) + continue; + if (strcmp(entry->d_name, "..") == 0) + continue; + if (func(entry->d_name, arg)) + break; + } + + closedir(dir); + return 0; +} diff --git a/lib/utils.c b/lib/utils.c index f65ceaaf..efebe189 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -31,6 +31,7 @@ #include "utils.h" +#include "namespace.h" int timestamp_short = 0; @@ -878,3 +879,30 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n) tstr[strlen(tstr)-1] = 0; fprintf(fp, "Timestamp: %s %lu us\n", tstr, usecs); } + +static int on_netns(char *nsname, void *arg) +{ + struct netns_func *f = arg; + + if (netns_switch(nsname)) + return -1; + + return f->func(nsname, f->arg); +} + +static int on_netns_label(char *nsname, void *arg) +{ + printf("\nnetns: %s\n", nsname); + return on_netns(nsname, arg); +} + +int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, + bool show_label) +{ + struct netns_func nsf = { .func = func, .arg = arg }; + + if (show_label) + return netns_foreach(on_netns_label, &nsf); + + return netns_foreach(on_netns, &nsf); +} diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 1209b55d..057125b5 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -221,13 +221,13 @@ Link types: - Virtual tunnel interface GRE over IPv4 .sp .BR gretap -- Virtual L2 tuunel interface GRE over IPv4 +- Virtual L2 tunnel interface GRE over IPv4 .sp .BR ip6gre -- Virtual tuunel interface GRE over IPv6 +- Virtual tunnel interface GRE over IPv6 .sp .BR ip6gretap -- Virtual L2 tuunel interface GRE over IPv6 +- Virtual L2 tunnel interface GRE over IPv6 .in -8 .TP @@ -276,6 +276,8 @@ the following additional arguments are supported: .BI ageing " SECONDS " .R " ] [ " .BI maxaddress " NUMBER " +.R " ] [ " +.B gbp .R " ]" .in +8 @@ -348,6 +350,106 @@ are entered into the VXLAN device forwarding database. .BI maxaddress " NUMBER" - specifies the maximum number of FDB entries. +.sp +.B gbp +- enables the Group Policy extension (VXLAN-GBP). + +.in +4 +Allows to transport group policy context across VXLAN network peers. +If enabled, includes the mark of a packet in the VXLAN header for outgoing +packets and fills the packet mark based on the information found in the +VXLAN header for incomming packets. + +Format of upper 16 bits of packet mark (flags); + +.in +2 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +.br +|-|-|-|-|-|-|-|-|-|D|-|-|A|-|-|-| +.br ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +.B D := +Don't Learn bit. When set, this bit indicates that the egress +VTEP MUST NOT learn the source address of the encapsulated frame. + +.B A := +Indicates that the group policy has already been applied to +this packet. Policies MUST NOT be applied by devices when the A bit is set. +.in -2 + +Format of lower 16 bits of packet mark (policy ID): + +.in +2 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +.br +| Group Policy ID | +.br ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +.in -2 + +Example: + iptables -A OUTPUT [...] -j MARK --set-mark 0x800FF + +.in -4 + +.in -8 + +.TP +GRE, IPIP, SIT Type Support +For a link of types +.I GRE/IPIP/SIT +the following additional arguments are supported: + +.BI "ip link add " DEVICE +.BR type " { gre | ipip | sit } " +.BI " remote " ADDR " local " ADDR +.R " [ " +.BR encap " { fou | gue | none } " +.R " ] [ " +.BI "encap-sport { " PORT " | auto } " +.R " ] [ " +.BI "encap-dport " PORT +.R " ] [ " +.I " [no]encap-csum " +.R " ] [ " +.I " [no]encap-remcsum " +.R " ]" + +.in +8 +.sp +.BI remote " ADDR " +- specifies the remote address of the tunnel. + +.sp +.BI local " ADDR " +- specifies the fixed local address for tunneled packets. +It must be an address on another interface on this host. + +.sp +.BR encap " { fou | gue | none } " +- specifies type of secondary UDP encapsulation. "fou" indicates +Foo-Over-UDP, "gue" indicates Generic UDP Encapsulation. + +.sp +.BI "encap-sport { " PORT " | auto } " +- specifies the source port in UDP encapsulation. +.IR PORT +indicates the port by number, "auto" +indicates that the port number should be chosen automatically +(the kernel picks a flow based on the flow hash of the +encapsulated packet). + +.sp +.I [no]encap-csum +- specifies if UDP checksums are enabled in the secondary +encapsulation. + +.sp +.I [no]encap-remcsum +- specifies if Remote Checksum Offload is enabled. This is only +applicable for Generic UDP Encapsulation. + .in -8 .TP @@ -386,7 +488,7 @@ the following additional arguments are supported: .sp .BI local " ADDR " - specifies the fixed local IPv6 address for tunneled packets. -It must be and address on another interface on this host. +It must be an address on another interface on this host. .sp .BI [i|o]seq @@ -762,6 +864,15 @@ Removes vlan device. ip link help gre .RS 4 Display help for the gre link type. +.RE +.PP +ip link add name tun1 type ipip remote 192.168.1.1 +local 192.168.1.2 ttl 225 encap gue encap-sport auto +encap-dport 5555 encap-csum encap-remcsum +.RS 4 +Creates an IPIP that is encapsulated with Generic UDP Encapsulation, +and the outer UDP checksum and remote checksum offload are enabled. + .RE .SH SEE ALSO diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8 index 74343ed6..8e6999c0 100644 --- a/man/man8/ip-netns.8 +++ b/man/man8/ip-netns.8 @@ -16,9 +16,13 @@ ip-netns \- process network namespace management .BR "ip netns" " { " list " } " .ti -8 -.BR "ip netns" " { " add " | " delete " } " +.B ip netns add .I NETNSNAME +.ti -8 +.B ip [-all] netns del +.RI "[ " NETNSNAME " ]" + .ti -8 .BR "ip netns identify" .RI "[ " PID " ]" @@ -28,8 +32,8 @@ ip-netns \- process network namespace management .I NETNSNAME .ti -8 -.BR "ip netns exec " -.I NETNSNAME command ... +.BR "ip [-all] netns exec " +.RI "[ " NETNSNAME " ] " command ... .ti -8 .BR "ip netns monitor" @@ -76,7 +80,7 @@ If NAME is available in /var/run/netns/ this command creates a new network namespace and assigns NAME. .TP -.B ip netns delete NAME - delete the name of a network namespace +.B ip [-all] netns delete [ NAME ] - delete the name of a network namespace(s) .sp If NAME is present in /var/run/netns it is umounted and the mount point is removed. If this is the last user of the network namespace the @@ -84,6 +88,10 @@ network namespace will be freed, otherwise the network namespace persists until it has no more users. ip netns delete may fail if the mount point is in use in another mount namespace. +If +.B -all +option was specified then all the network namespace names will be removed. + .TP .B ip netns identify [PID] - Report network namespaces names for process .sp @@ -98,7 +106,7 @@ This command walks through proc and finds all of the process who have the named network namespace as their primary network namespace. .TP -.B ip netns exec NAME cmd ... - Run cmd in the named network namespace +.B ip [-all] netns exec [ NAME ] cmd ... - Run cmd in the named network namespace .sp This command allows applications that are network namespace unaware to be run in something other than the default network namespace with @@ -107,6 +115,16 @@ in the customary global locations. A network namespace and bind mounts are used to move files from their network namespace specific location to their default locations without affecting other processes. +If +.B -all +option was specified then +.B cmd +will be executed synchronously on the each named network namespace even if +.B cmd +fails on some of them. Network namespace name is printed on each +.B cmd +executing. + .TP .B ip netns monitor - Report as network namespace names are added and deleted .sp diff --git a/man/man8/ip.8 b/man/man8/ip.8 index 0bae59e7..016e8c66 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -32,7 +32,8 @@ ip \- show / manipulate routing, devices, policy routing and tunnels \fB\-f\fR[\fIamily\fR] { .BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | " \fB\-o\fR[\fIneline\fR] | -\fB\-n\fR[\fIetns\fR] name } +\fB\-n\fR[\fIetns\fR] name | +\fB\-a\fR[\fIll\fR] } .SH OPTIONS @@ -155,6 +156,10 @@ to .RI "-n[etns] " NETNS " [ " OPTIONS " ] " OBJECT " { " COMMAND " | " .BR help " }" +.TP +.BR "\-a" , " \-all" +executes specified command over all objects, it depends if command supports this option. + .SH IP - COMMAND SYNTAX .SS diff --git a/man/man8/ss.8 b/man/man8/ss.8 index 450649ab..b7fbaefa 100644 --- a/man/man8/ss.8 +++ b/man/man8/ss.8 @@ -84,6 +84,9 @@ context of the creating process, however the context shown will reflect any policy role, type and/or range transition rules applied, and is therefore a useful reference. .TP +.B \-N NSNAME, \-\-net=NSNAME +Switch to the specified network namespace name. +.TP .B \-b, \-\-bpf Show socket BPF filters (only administrators are allowed to get these information). .TP diff --git a/misc/Makefile b/misc/Makefile index 004bcc3a..b7ecba90 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -10,6 +10,10 @@ ifeq ($(HAVE_SELINUX),y) CFLAGS += $(shell pkg-config --cflags libselinux) -DHAVE_SELINUX endif +ifeq ($(IP_CONFIG_SETNS),y) + CFLAGS += -DHAVE_SETNS +endif + all: $(TARGETS) ss: $(SSOBJ) diff --git a/misc/ss.c b/misc/ss.c index f434f57f..0a6a65ee 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -31,6 +31,7 @@ #include "rt_names.h" #include "ll_map.h" #include "libnetlink.h" +#include "namespace.h" #include "SNAPSHOT.h" #include @@ -170,11 +171,11 @@ static const struct filter default_dbs[MAX_DB] = { .families = (1 << AF_INET) | (1 << AF_INET6), }, [UDP_DB] = { - .states = (1 << SS_CLOSE), + .states = (1 << SS_ESTABLISHED), .families = (1 << AF_INET) | (1 << AF_INET6), }, [RAW_DB] = { - .states = (1 << SS_CLOSE), + .states = (1 << SS_ESTABLISHED), .families = (1 << AF_INET) | (1 << AF_INET6), }, [UNIX_DG_DB] = { @@ -689,23 +690,59 @@ static const char *sstate_namel[] = { [SS_CLOSING] = "closing", }; +struct dctcpstat +{ + unsigned int ce_state; + unsigned int alpha; + unsigned int ab_ecn; + unsigned int ab_tot; + bool enabled; +}; + struct tcpstat { - inet_prefix local; - inet_prefix remote; - int lport; - int rport; - int state; - int rq, wq; - int timer; - int timeout; - int retrs; - unsigned ino; - int probes; - unsigned uid; - int refcnt; - unsigned long long sk; - int rto, ato, qack, cwnd, ssthresh; + inet_prefix local; + inet_prefix remote; + int lport; + int rport; + int state; + int rq, wq; + unsigned ino; + unsigned uid; + int refcnt; + unsigned int iface; + unsigned long long sk; + int timer; + int timeout; + int probes; + char *cong_alg; + double rto, ato, rtt, rttvar; + int qack, cwnd, ssthresh, backoff; + double send_bps; + int snd_wscale; + int rcv_wscale; + int mss; + unsigned int lastsnd; + unsigned int lastrcv; + unsigned int lastack; + double pacing_rate; + double pacing_rate_max; + unsigned int unacked; + unsigned int retrans; + unsigned int retrans_total; + unsigned int lost; + unsigned int sacked; + unsigned int fackets; + unsigned int reordering; + double rcv_rtt; + int rcv_space; + bool has_ts_opt; + bool has_sack_opt; + bool has_ecn_opt; + bool has_ecnseen_opt; + bool has_fastopen_opt; + bool has_wscale_opt; + struct dctcpstat *dctcp; }; static const char *tmr_name[] = { @@ -744,12 +781,6 @@ static const char *print_ms_timer(int timeout) return buf; } -static const char *print_hz_timer(int timeout) -{ - int hz = get_user_hz(); - return print_ms_timer(((timeout*1000) + hz-1)/hz); -} - struct scache { struct scache *next; @@ -1439,125 +1470,250 @@ out: return res; } -static int tcp_show_line(char *line, const struct filter *f, int family) +static char *proto_name(int protocol) +{ + switch (protocol) { + case IPPROTO_UDP: + return "udp"; + case IPPROTO_TCP: + return "tcp"; + case IPPROTO_DCCP: + return "dccp"; + } + + return "???"; +} + +static void inet_stats_print(struct tcpstat *s, int protocol) +{ + char *buf = NULL; + + if (netid_width) + printf("%-*s ", netid_width, proto_name(protocol)); + if (state_width) + printf("%-*s ", state_width, sstate_name[s->state]); + + printf("%-6d %-6d ", s->rq, s->wq); + + formatted_print(&s->local, s->lport, s->iface); + formatted_print(&s->remote, s->rport, 0); + + if (show_options) { + if (s->timer) { + if (s->timer > 4) + s->timer = 5; + printf(" timer:(%s,%s,%d)", + tmr_name[s->timer], + print_ms_timer(s->timeout), + s->retrans); + } + } + + if (show_proc_ctx || show_sock_ctx) { + if (find_entry(s->ino, &buf, + (show_proc_ctx & show_sock_ctx) ? + PROC_SOCK_CTX : PROC_CTX) > 0) { + printf(" users:(%s)", buf); + free(buf); + } + } else if (show_users) { + if (find_entry(s->ino, &buf, USERS) > 0) { + printf(" users:(%s)", buf); + free(buf); + } + } +} + +static int proc_parse_inet_addr(char *loc, char *rem, int family, struct tcpstat *s) +{ + s->local.family = s->remote.family = family; + if (family == AF_INET) { + sscanf(loc, "%x:%x", s->local.data, (unsigned*)&s->lport); + sscanf(rem, "%x:%x", s->remote.data, (unsigned*)&s->rport); + s->local.bytelen = s->remote.bytelen = 4; + return 0; + } else { + sscanf(loc, "%08x%08x%08x%08x:%x", + s->local.data, + s->local.data + 1, + s->local.data + 2, + s->local.data + 3, + &s->lport); + sscanf(rem, "%08x%08x%08x%08x:%x", + s->remote.data, + s->remote.data + 1, + s->remote.data + 2, + s->remote.data + 3, + &s->rport); + s->local.bytelen = s->remote.bytelen = 16; + return 0; + } + return -1; +} + +static int proc_inet_split_line(char *line, char **loc, char **rem, char **data) { - struct tcpstat s; - char *loc, *rem, *data; - char opt[256]; - int n; char *p; if ((p = strchr(line, ':')) == NULL) return -1; - loc = p+2; - if ((p = strchr(loc, ':')) == NULL) + *loc = p+2; + if ((p = strchr(*loc, ':')) == NULL) return -1; - p[5] = 0; - rem = p+6; - if ((p = strchr(rem, ':')) == NULL) + p[5] = 0; + *rem = p+6; + if ((p = strchr(*rem, ':')) == NULL) return -1; + p[5] = 0; - data = p+6; + *data = p+6; + return 0; +} - do { - int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); +static char *sprint_bw(char *buf, double bw) +{ + if (bw > 1000000.) + sprintf(buf,"%.1fM", bw / 1000000.); + else if (bw > 1000.) + sprintf(buf,"%.1fK", bw / 1000.); + else + sprintf(buf, "%g", bw); - if (!(f->states & (1<has_ts_opt) + printf(" ts"); + if (s->has_sack_opt) + printf(" sack"); + if (s->has_ecn_opt) + printf(" ecn"); + if (s->has_ecnseen_opt) + printf(" ecnseen"); + if (s->has_fastopen_opt) + printf(" fastopen"); + if (s->cong_alg) + printf(" %s", s->cong_alg); + if (s->has_wscale_opt) + printf(" wscale:%d,%d", s->snd_wscale, s->rcv_wscale); + if (s->rto) + printf(" rto:%g", s->rto); + if (s->backoff) + printf(" backoff:%u", s->backoff); + if (s->rtt) + printf(" rtt:%g/%g", s->rtt, s->rttvar); + if (s->ato) + printf(" ato:%g", s->ato); + + if (s->qack) + printf(" qack:%d", s->qack); + if (s->qack & 1) + printf(" bidir"); + + if (s->mss) + printf(" mss:%d", s->mss); + if (s->cwnd && s->cwnd != 2) + printf(" cwnd:%d", s->cwnd); + if (s->ssthresh) + printf(" ssthresh:%d", s->ssthresh); + + if (s->dctcp && s->dctcp->enabled) { + struct dctcpstat *dctcp = s->dctcp; + + printf(" ce_state %u alpha %u ab_ecn %u ab_tot %u", + dctcp->ce_state, dctcp->alpha, dctcp->ab_ecn, + dctcp->ab_tot); + } else if (s->dctcp) { + printf(" fallback_mode"); } + if (s->send_bps) + printf(" send %sbps", sprint_bw(b1, s->send_bps)); + if (s->lastsnd) + printf(" lastsnd:%u", s->lastsnd); + if (s->lastrcv) + printf(" lastrcv:%u", s->lastrcv); + if (s->lastack) + printf(" lastack:%u", s->lastack); + + if (s->pacing_rate) { + printf(" pacing_rate %sbps", sprint_bw(b1, s->pacing_rate)); + if (s->pacing_rate_max) + printf("/%sbps", sprint_bw(b1, + s->pacing_rate_max)); + } + + if (s->unacked) + printf(" unacked:%u", s->unacked); + if (s->retrans || s->retrans_total) + printf(" retrans:%u/%u", s->retrans, s->retrans_total); + if (s->lost) + printf(" lost:%u", s->lost); + if (s->sacked && s->state != SS_LISTEN) + printf(" sacked:%u", s->sacked); + if (s->fackets) + printf(" fackets:%u", s->fackets); + if (s->reordering != 3) + printf(" reordering:%d", s->reordering); + if (s->rcv_rtt) + printf(" rcv_rtt:%g", s->rcv_rtt); + if (s->rcv_space) + printf(" rcv_space:%d", s->rcv_space); +} + +static int tcp_show_line(char *line, const struct filter *f, int family) +{ + int rto = 0, ato = 0; + struct tcpstat s = {}; + char *loc, *rem, *data; + char opt[256]; + int n; + int hz = get_user_hz(); + + if (proc_inet_split_line(line, &loc, &rem, &data)) + return -1; + + int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); + if (!(f->states & (1 << state))) + return 0; + + proc_parse_inet_addr(loc, rem, family, &s); + if (f->f && run_ssfilter(f->f, &s) == 0) return 0; opt[0] = 0; n = sscanf(data, "%x %x:%x %x:%x %x %d %d %u %d %llx %d %d %d %d %d %[^\n]\n", &s.state, &s.wq, &s.rq, - &s.timer, &s.timeout, &s.retrs, &s.uid, &s.probes, &s.ino, - &s.refcnt, &s.sk, &s.rto, &s.ato, &s.qack, + &s.timer, &s.timeout, &s.retrans, &s.uid, &s.probes, &s.ino, + &s.refcnt, &s.sk, &rto, &ato, &s.qack, &s.cwnd, &s.ssthresh, opt); if (n < 17) opt[0] = 0; if (n < 12) { - s.rto = 0; + rto = 0; s.cwnd = 2; s.ssthresh = -1; - s.ato = s.qack = 0; + ato = s.qack = 0; } - if (netid_width) - printf("%-*s ", netid_width, "tcp"); - if (state_width) - printf("%-*s ", state_width, sstate_name[s.state]); + s.retrans = s.timer != 1 ? s.probes : s.retrans; + s.timeout = (s.timeout * 1000 + hz - 1) / hz; + s.ato = (double)ato / hz; + s.qack /= 2; + s.rto = (double)rto; + s.ssthresh = s.ssthresh == -1 ? 0 : s.ssthresh; + s.rto = s.rto != 3 * hz ? s.rto / hz : 0; - printf("%-6d %-6d ", s.rq, s.wq); - - formatted_print(&s.local, s.lport, 0); - formatted_print(&s.remote, s.rport, 0); - - if (show_options) { - if (s.timer) { - if (s.timer > 4) - s.timer = 5; - printf(" timer:(%s,%s,%d)", - tmr_name[s.timer], - print_hz_timer(s.timeout), - s.timer != 1 ? s.probes : s.retrs); - } - } - if (show_tcpinfo) { - int hz = get_user_hz(); - if (s.rto && s.rto != 3*hz) - printf(" rto:%g", (double)s.rto/hz); - if (s.ato) - printf(" ato:%g", (double)s.ato/hz); - if (s.cwnd != 2) - printf(" cwnd:%d", s.cwnd); - if (s.ssthresh != -1) - printf(" ssthresh:%d", s.ssthresh); - if (s.qack/2) - printf(" qack:%d", s.qack/2); - if (s.qack&1) - printf(" bidir"); - } - char *buf = NULL; - if (show_proc_ctx || show_sock_ctx) { - if (find_entry(s.ino, &buf, - (show_proc_ctx & show_sock_ctx) ? - PROC_SOCK_CTX : PROC_CTX) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } else if (show_users) { - if (find_entry(s.ino, &buf, USERS) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } + inet_stats_print(&s, IPPROTO_TCP); if (show_details) { if (s.uid) @@ -1567,8 +1723,11 @@ static int tcp_show_line(char *line, const struct filter *f, int family) if (opt[0]) printf(" opt:\"%s\"", opt); } - printf("\n"); + if (show_tcpinfo) + tcp_stats_print(&s); + + printf("\n"); return 0; } @@ -1598,23 +1757,27 @@ outerr: return ferror(fp) ? -1 : 0; } -static char *sprint_bw(char *buf, double bw) -{ - if (bw > 1000000.) - sprintf(buf,"%.1fM", bw / 1000000.); - else if (bw > 1000.) - sprintf(buf,"%.1fK", bw / 1000.); - else - sprintf(buf, "%g", bw); - - return buf; -} - static void print_skmeminfo(struct rtattr *tb[], int attrtype) { const __u32 *skmeminfo; - if (!tb[attrtype]) + + if (!tb[attrtype]) { + if (attrtype == INET_DIAG_SKMEMINFO) { + if (!tb[INET_DIAG_MEMINFO]) + return; + + const struct inet_diag_meminfo *minfo = + RTA_DATA(tb[INET_DIAG_MEMINFO]); + + printf(" mem:(r%u,w%u,f%u,t%u)", + minfo->idiag_rmem, + minfo->idiag_wmem, + minfo->idiag_fmem, + minfo->idiag_tmem); + } return; + } + skmeminfo = RTA_DATA(tb[attrtype]); printf(" skmem:(r%u,rb%u,t%u,tb%u,f%u,w%u,o%u", @@ -1633,23 +1796,15 @@ static void print_skmeminfo(struct rtattr *tb[], int attrtype) printf(")"); } +#define TCPI_HAS_OPT(info, opt) !!(info->tcpi_options & (opt)) + static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, struct rtattr *tb[]) { - char b1[64]; double rtt = 0; + struct tcpstat s = {}; - if (tb[INET_DIAG_SKMEMINFO]) { - print_skmeminfo(tb, INET_DIAG_SKMEMINFO); - } else if (tb[INET_DIAG_MEMINFO]) { - const struct inet_diag_meminfo *minfo - = RTA_DATA(tb[INET_DIAG_MEMINFO]); - printf(" mem:(r%u,w%u,f%u,t%u)", - minfo->idiag_rmem, - minfo->idiag_wmem, - minfo->idiag_fmem, - minfo->idiag_tmem); - } + print_skmeminfo(tb, INET_DIAG_SKMEMINFO); if (tb[INET_DIAG_INFO]) { struct tcp_info *info; @@ -1664,39 +1819,49 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, info = RTA_DATA(tb[INET_DIAG_INFO]); if (show_options) { - if (info->tcpi_options & TCPI_OPT_TIMESTAMPS) - printf(" ts"); - if (info->tcpi_options & TCPI_OPT_SACK) - printf(" sack"); - if (info->tcpi_options & TCPI_OPT_ECN) - printf(" ecn"); - if (info->tcpi_options & TCPI_OPT_ECN_SEEN) - printf(" ecnseen"); - if (info->tcpi_options & TCPI_OPT_SYN_DATA) - printf(" fastopen"); + s.has_ts_opt = TCPI_HAS_OPT(info, TCPI_OPT_TIMESTAMPS); + s.has_sack_opt = TCPI_HAS_OPT(info, TCPI_OPT_SACK); + s.has_ecn_opt = TCPI_HAS_OPT(info, TCPI_OPT_ECN); + s.has_ecnseen_opt = TCPI_HAS_OPT(info, TCPI_OPT_ECN_SEEN); + s.has_fastopen_opt = TCPI_HAS_OPT(info, TCPI_OPT_SYN_DATA); } - if (tb[INET_DIAG_CONG]) - printf(" %s", rta_getattr_str(tb[INET_DIAG_CONG])); + if (tb[INET_DIAG_CONG]) { + const char *cong_attr = rta_getattr_str(tb[INET_DIAG_CONG]); + s.cong_alg = malloc(strlen(cong_attr + 1)); + strcpy(s.cong_alg, cong_attr); + } + + if (TCPI_HAS_OPT(info, TCPI_OPT_WSCALE)) { + s.has_wscale_opt = true; + s.snd_wscale = info->tcpi_snd_wscale; + s.rcv_wscale = info->tcpi_rcv_wscale; + } - if (info->tcpi_options & TCPI_OPT_WSCALE) - printf(" wscale:%d,%d", info->tcpi_snd_wscale, - info->tcpi_rcv_wscale); if (info->tcpi_rto && info->tcpi_rto != 3000000) - printf(" rto:%g", (double)info->tcpi_rto/1000); - if (info->tcpi_backoff) - printf(" backoff:%u", info->tcpi_backoff); - if (info->tcpi_rtt) - printf(" rtt:%g/%g", (double)info->tcpi_rtt/1000, - (double)info->tcpi_rttvar/1000); - if (info->tcpi_ato) - printf(" ato:%g", (double)info->tcpi_ato/1000); - if (info->tcpi_snd_mss) - printf(" mss:%d", info->tcpi_snd_mss); - if (info->tcpi_snd_cwnd != 2) - printf(" cwnd:%d", info->tcpi_snd_cwnd); + s.rto = (double)info->tcpi_rto / 1000; + + s.backoff = info->tcpi_backoff; + s.rtt = (double)info->tcpi_rtt / 1000; + s.rttvar = (double)info->tcpi_rttvar / 1000; + s.ato = (double)info->tcpi_rttvar / 1000; + s.mss = info->tcpi_snd_mss; + s.rcv_space = info->tcpi_rcv_space; + s.rcv_rtt = (double)info->tcpi_rcv_rtt / 1000; + s.lastsnd = info->tcpi_last_data_sent; + s.lastrcv = info->tcpi_last_data_recv; + s.lastack = info->tcpi_last_ack_recv; + s.unacked = info->tcpi_unacked; + s.retrans = info->tcpi_retrans; + s.retrans_total = info->tcpi_total_retrans; + s.lost = info->tcpi_lost; + s.sacked = info->tcpi_sacked; + s.reordering = info->tcpi_reordering; + s.rcv_space = info->tcpi_rcv_space; + s.cwnd = info->tcpi_snd_cwnd; + if (info->tcpi_snd_ssthresh < 0xFFFF) - printf(" ssthresh:%d", info->tcpi_snd_ssthresh); + s.ssthresh = info->tcpi_snd_ssthresh; rtt = (double) info->tcpi_rtt; if (tb[INET_DIAG_VEGASINFO]) { @@ -1704,89 +1869,51 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, = RTA_DATA(tb[INET_DIAG_VEGASINFO]); if (vinfo->tcpv_enabled && - vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff) + vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff) rtt = vinfo->tcpv_rtt; } if (tb[INET_DIAG_DCTCPINFO]) { + struct dctcpstat *dctcp = malloc(sizeof(struct + dctcpstat)); + const struct tcp_dctcp_info *dinfo = RTA_DATA(tb[INET_DIAG_DCTCPINFO]); - if (dinfo->dctcp_enabled) { - printf(" ce_state %u alpha %u ab_ecn %u ab_tot %u", - dinfo->dctcp_ce_state, dinfo->dctcp_alpha, - dinfo->dctcp_ab_ecn, dinfo->dctcp_ab_tot); - } else { - printf(" fallback_mode"); - } + dctcp->enabled = !!dinfo->dctcp_enabled; + dctcp->ce_state = dinfo->dctcp_ce_state; + dctcp->alpha = dinfo->dctcp_alpha; + dctcp->ab_ecn = dinfo->dctcp_ab_ecn; + dctcp->ab_tot = dinfo->dctcp_ab_tot; + s.dctcp = dctcp; } if (rtt > 0 && info->tcpi_snd_mss && info->tcpi_snd_cwnd) { - printf(" send %sbps", - sprint_bw(b1, (double) info->tcpi_snd_cwnd * - (double) info->tcpi_snd_mss * 8000000. - / rtt)); + s.send_bps = (double) info->tcpi_snd_cwnd * + (double)info->tcpi_snd_mss * 8000000. / rtt; } - if (info->tcpi_last_data_sent) - printf(" lastsnd:%u", info->tcpi_last_data_sent); - - if (info->tcpi_last_data_recv) - printf(" lastrcv:%u", info->tcpi_last_data_recv); - - if (info->tcpi_last_ack_recv) - printf(" lastack:%u", info->tcpi_last_ack_recv); - if (info->tcpi_pacing_rate && - info->tcpi_pacing_rate != ~0ULL) { - printf(" pacing_rate %sbps", - sprint_bw(b1, info->tcpi_pacing_rate * 8.)); + info->tcpi_pacing_rate != ~0ULL) { + s.pacing_rate = info->tcpi_pacing_rate * 8.; if (info->tcpi_max_pacing_rate && - info->tcpi_max_pacing_rate != ~0ULL) - printf("/%sbps", - sprint_bw(b1, info->tcpi_max_pacing_rate * 8.)); + info->tcpi_max_pacing_rate != ~0ULL) + s.pacing_rate_max = info->tcpi_max_pacing_rate * 8.; } - if (info->tcpi_unacked) - printf(" unacked:%u", info->tcpi_unacked); - if (info->tcpi_retrans || info->tcpi_total_retrans) - printf(" retrans:%u/%u", info->tcpi_retrans, - info->tcpi_total_retrans); - if (info->tcpi_lost) - printf(" lost:%u", info->tcpi_lost); - if (info->tcpi_sacked && r->idiag_state != SS_LISTEN) - printf(" sacked:%u", info->tcpi_sacked); - if (info->tcpi_fackets) - printf(" fackets:%u", info->tcpi_fackets); - if (info->tcpi_reordering != 3) - printf(" reordering:%d", info->tcpi_reordering); - if (info->tcpi_rcv_rtt) - printf(" rcv_rtt:%g", (double) info->tcpi_rcv_rtt/1000); - if (info->tcpi_rcv_space) - printf(" rcv_space:%d", info->tcpi_rcv_space); - + tcp_stats_print(&s); + if (s.dctcp) + free(s.dctcp); + if (s.cong_alg) + free(s.cong_alg); } } -static char *proto_name(int protocol) -{ - switch (protocol) { - case IPPROTO_UDP: - return "udp"; - case IPPROTO_TCP: - return "tcp"; - case IPPROTO_DCCP: - return "dccp"; - } - - return "???"; -} - static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) { struct rtattr * tb[INET_DIAG_MAX+1]; struct inet_diag_msg *r = NLMSG_DATA(nlh); - struct tcpstat s; + struct tcpstat s = {}; parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr*)(r+1), nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); @@ -1795,52 +1922,28 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) s.local.family = s.remote.family = r->idiag_family; s.lport = ntohs(r->id.idiag_sport); s.rport = ntohs(r->id.idiag_dport); + s.wq = r->idiag_wqueue; + s.rq = r->idiag_rqueue; + s.timer = r->idiag_timer; + s.timeout = r->idiag_expires; + s.retrans = r->idiag_retrans; + s.ino = r->idiag_inode; + s.uid = r->idiag_uid; + s.iface = r->id.idiag_if; + if (s.local.family == AF_INET) { s.local.bytelen = s.remote.bytelen = 4; } else { s.local.bytelen = s.remote.bytelen = 16; } + memcpy(s.local.data, r->id.idiag_src, s.local.bytelen); memcpy(s.remote.data, r->id.idiag_dst, s.local.bytelen); if (f && f->f && run_ssfilter(f->f, &s) == 0) return 0; - if (netid_width) - printf("%-*s ", netid_width, proto_name(protocol)); - if (state_width) - printf("%-*s ", state_width, sstate_name[s.state]); - - printf("%-6d %-6d ", r->idiag_rqueue, r->idiag_wqueue); - - formatted_print(&s.local, s.lport, r->id.idiag_if); - formatted_print(&s.remote, s.rport, 0); - - if (show_options) { - if (r->idiag_timer) { - if (r->idiag_timer > 4) - r->idiag_timer = 5; - printf(" timer:(%s,%s,%d)", - tmr_name[r->idiag_timer], - print_ms_timer(r->idiag_expires), - r->idiag_retrans); - } - } - char *buf = NULL; - - if (show_proc_ctx || show_sock_ctx) { - if (find_entry(r->idiag_inode, &buf, - (show_proc_ctx & show_sock_ctx) ? - PROC_SOCK_CTX : PROC_CTX) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } else if (show_users) { - if (find_entry(r->idiag_inode, &buf, USERS) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } + inet_stats_print(&s, protocol); if (show_details) { if (r->idiag_uid) @@ -1856,13 +1959,13 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) printf(" %c-%c", mask & 1 ? '-' : '<', mask & 2 ? '-' : '>'); } } + if (show_mem || show_tcpinfo) { printf("\n\t"); tcp_show_info(nlh, r, tb); } printf("\n"); - return 0; } @@ -2183,53 +2286,19 @@ outerr: static int dgram_show_line(char *line, const struct filter *f, int family) { - struct tcpstat s; + struct tcpstat s = {}; char *loc, *rem, *data; char opt[256]; int n; - char *p; - if ((p = strchr(line, ':')) == NULL) + if (proc_inet_split_line(line, &loc, &rem, &data)) return -1; - loc = p+2; - if ((p = strchr(loc, ':')) == NULL) - return -1; - p[5] = 0; - rem = p+6; + int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); + if (!(f->states & (1 << state))) + return 0; - if ((p = strchr(rem, ':')) == NULL) - return -1; - p[5] = 0; - data = p+6; - - do { - int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); - - if (!(f->states & (1<f && run_ssfilter(f->f, &s) == 0) return 0; @@ -2243,31 +2312,7 @@ static int dgram_show_line(char *line, const struct filter *f, int family) if (n < 9) opt[0] = 0; - if (netid_width) - printf("%-*s ", netid_width, dg_proto); - if (state_width) - printf("%-*s ", state_width, sstate_name[s.state]); - - printf("%-6d %-6d ", s.rq, s.wq); - - formatted_print(&s.local, s.lport, 0); - formatted_print(&s.remote, s.rport, 0); - - char *buf = NULL; - - if (show_proc_ctx || show_sock_ctx) { - if (find_entry(s.ino, &buf, - (show_proc_ctx & show_sock_ctx) ? - PROC_SOCK_CTX : PROC_CTX) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } else if (show_users) { - if (find_entry(s.ino, &buf, USERS) > 0) { - printf(" users:(%s)", buf); - free(buf); - } - } + inet_stats_print(&s, IPPROTO_UDP); if (show_details) { if (s.uid) @@ -2277,12 +2322,11 @@ static int dgram_show_line(char *line, const struct filter *f, int family) if (opt[0]) printf(" opt:\"%s\"", opt); } - printf("\n"); + printf("\n"); return 0; } - static int udp_show(struct filter *f) { FILE *fp = NULL; @@ -2351,7 +2395,6 @@ outerr: } while (0); } - struct unixstat { struct unixstat *next; @@ -2365,12 +2408,9 @@ struct unixstat char *name; }; - - int unix_state_map[] = { SS_CLOSE, SS_SYN_SENT, SS_ESTABLISHED, SS_CLOSING }; - #define MAX_UNIX_REMEMBER (1024*1024/sizeof(struct unixstat)) static void unix_list_free(struct unixstat *list) @@ -3207,6 +3247,7 @@ static void _usage(FILE *dest) " -b, --bpf show bpf filter socket information\n" " -Z, --context display process SELinux security contexts\n" " -z, --contexts display process and socket SELinux security contexts\n" +" -N, --net switch to the specified network namespace name\n" "\n" " -4, --ipv4 display only IP version 4 sockets\n" " -6, --ipv6 display only IP version 6 sockets\n" @@ -3306,6 +3347,7 @@ static const struct option long_opts[] = { { "help", 0, 0, 'h' }, { "context", 0, 0, 'Z' }, { "contexts", 0, 0, 'z' }, + { "net", 1, 0, 'N' }, { 0 } }; @@ -3321,7 +3363,7 @@ int main(int argc, char *argv[]) struct filter dbs_filter = {}; int state_filter = 0; - while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbf:miA:D:F:vVzZ", + while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbf:miA:D:F:vVzZN:", long_opts, NULL)) != EOF) { switch(ch) { case 'n': @@ -3493,6 +3535,10 @@ int main(int argc, char *argv[]) show_proc_ctx++; user_ent_hash_build(); break; + case 'N': + if (netns_switch(optarg)) + exit(1); + break; case 'h': case '?': help(); diff --git a/tc/Makefile b/tc/Makefile index 9412094f..d831a153 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -1,5 +1,5 @@ TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o \ - tc_monitor.o m_police.o m_estimator.o m_action.o \ + tc_monitor.o tc_bpf.o m_police.o m_estimator.o m_action.o \ m_ematch.o emp_ematch.yacc.o emp_ematch.lex.o include ../Config @@ -46,6 +46,7 @@ TCMODULES += m_skbedit.o TCMODULES += m_csum.o TCMODULES += m_simple.o TCMODULES += m_vlan.o +TCMODULES += m_bpf.o TCMODULES += p_ip.o TCMODULES += p_icmp.o TCMODULES += p_tcp.o diff --git a/tc/f_bpf.c b/tc/f_bpf.c index 48635a70..e2af94e3 100644 --- a/tc/f_bpf.c +++ b/tc/f_bpf.c @@ -26,6 +26,7 @@ #include "utils.h" #include "tc_util.h" +#include "tc_bpf.h" static void explain(void) { @@ -44,130 +45,6 @@ static void explain(void) fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n"); } -static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, - char **bpf_string, bool *need_release, - const char separator) -{ - char sp; - - if (from_file) { - size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); - char *tmp_string; - FILE *fp; - - tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; - tmp_string = malloc(tmp_len); - if (tmp_string == NULL) - return -ENOMEM; - - memset(tmp_string, 0, tmp_len); - - fp = fopen(arg, "r"); - if (fp == NULL) { - perror("Cannot fopen"); - free(tmp_string); - return -ENOENT; - } - - if (!fgets(tmp_string, tmp_len, fp)) { - free(tmp_string); - fclose(fp); - return -EIO; - } - - fclose(fp); - - *need_release = true; - *bpf_string = tmp_string; - } else { - *need_release = false; - *bpf_string = arg; - } - - if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || - sp != separator) { - if (*need_release) - free(*bpf_string); - return -EINVAL; - } - - return 0; -} - -static int bpf_parse_ops(int argc, char **argv, struct nlmsghdr *n, - bool from_file) -{ - char *bpf_string, *token, separator = ','; - struct sock_filter bpf_ops[BPF_MAXINSNS]; - int ret = 0, i = 0; - bool need_release; - __u16 bpf_len = 0; - - if (argc < 1) - return -EINVAL; - if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, - &need_release, separator)) - return -EINVAL; - if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { - ret = -EINVAL; - goto out; - } - - token = bpf_string; - while ((token = strchr(token, separator)) && (++token)[0]) { - if (i >= bpf_len) { - fprintf(stderr, "Real program length exceeds encoded " - "length parameter!\n"); - ret = -EINVAL; - goto out; - } - - if (sscanf(token, "%hu %hhu %hhu %u,", - &bpf_ops[i].code, &bpf_ops[i].jt, - &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { - fprintf(stderr, "Error at instruction %d!\n", i); - ret = -EINVAL; - goto out; - } - - i++; - } - - if (i != bpf_len) { - fprintf(stderr, "Parsed program length is less than encoded" - "length parameter!\n"); - ret = -EINVAL; - goto out; - } - - addattr_l(n, MAX_MSG, TCA_BPF_OPS_LEN, &bpf_len, sizeof(bpf_len)); - addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, - bpf_len * sizeof(struct sock_filter)); -out: - if (need_release) - free(bpf_string); - - return ret; -} - -static void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) -{ - struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); - int i; - - if (len == 0) - return; - - fprintf(f, "bytecode \'%u,", len); - - for (i = 0; i < len - 1; i++) - fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, - ops[i].jf, ops[i].k); - - fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt, - ops[i].jf, ops[i].k); -} - static int bpf_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n) { @@ -195,6 +72,10 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle, while (argc > 0) { if (matches(*argv, "run") == 0) { bool from_file; + struct sock_filter bpf_ops[BPF_MAXINSNS]; + __u16 bpf_len; + int ret; + NEXT_ARG(); if (strcmp(*argv, "bytecode-file") == 0) { from_file = true; @@ -206,10 +87,15 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle, return -1; } NEXT_ARG(); - if (bpf_parse_ops(argc, argv, n, from_file)) { + ret = bpf_parse_ops(argc, argv, bpf_ops, from_file); + if (ret < 0) { fprintf(stderr, "Illegal \"bytecode\"\n"); return -1; } + bpf_len = ret; + addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, bpf_len); + addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, + bpf_len * sizeof(struct sock_filter)); } else if (matches(*argv, "classid") == 0 || strcmp(*argv, "flowid") == 0) { unsigned handle; diff --git a/tc/m_bpf.c b/tc/m_bpf.c new file mode 100644 index 00000000..611135ea --- /dev/null +++ b/tc/m_bpf.c @@ -0,0 +1,183 @@ +/* + * m_bpf.c BFP based action module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Jiri Pirko + */ + +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "rt_names.h" +#include "tc_util.h" +#include "tc_bpf.h" + +static void explain(void) +{ + fprintf(stderr, "Usage: ... bpf ...\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n"); + fprintf(stderr, " [from file]: run bytecode-file FILE\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n"); + fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); + fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n"); + fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n"); + fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n"); +} + +static void usage(void) +{ + explain(); + exit(-1); +} + +static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p, + int tca_id, struct nlmsghdr *n) +{ + int argc = *argc_p; + char **argv = *argv_p; + struct rtattr *tail; + struct tc_act_bpf parm = { 0 }; + struct sock_filter bpf_ops[BPF_MAXINSNS]; + __u16 bpf_len = 0; + + if (matches(*argv, "bpf") != 0) + return -1; + + NEXT_ARG(); + + while (argc > 0) { + if (matches(*argv, "run") == 0) { + bool from_file; + int ret; + + NEXT_ARG(); + if (strcmp(*argv, "bytecode-file") == 0) { + from_file = true; + } else if (strcmp(*argv, "bytecode") == 0) { + from_file = false; + } else { + fprintf(stderr, "unexpected \"%s\"\n", *argv); + explain(); + return -1; + } + NEXT_ARG(); + ret = bpf_parse_ops(argc, argv, bpf_ops, from_file); + if (ret < 0) { + fprintf(stderr, "Illegal \"bytecode\"\n"); + return -1; + } + bpf_len = ret; + } else if (matches(*argv, "help") == 0) { + usage(); + } else { + break; + } + argc--; + argv++; + } + + parm.action = TC_ACT_PIPE; + if (argc) { + if (matches(*argv, "reclassify") == 0) { + parm.action = TC_ACT_RECLASSIFY; + NEXT_ARG(); + } else if (matches(*argv, "pipe") == 0) { + parm.action = TC_ACT_PIPE; + NEXT_ARG(); + } else if (matches(*argv, "drop") == 0 || + matches(*argv, "shot") == 0) { + parm.action = TC_ACT_SHOT; + NEXT_ARG(); + } else if (matches(*argv, "continue") == 0) { + parm.action = TC_ACT_UNSPEC; + NEXT_ARG(); + } else if (matches(*argv, "pass") == 0) { + parm.action = TC_ACT_OK; + NEXT_ARG(); + } + } + + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); + if (get_u32(&parm.index, *argv, 10)) { + fprintf(stderr, "bpf: Illegal \"index\"\n"); + return -1; + } + argc--; + argv++; + } + } + + if (!bpf_len) { + fprintf(stderr, "bpf: Bytecode needs to be passed\n"); + explain(); + return -1; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, MAX_MSG, tca_id, NULL, 0); + addattr_l(n, MAX_MSG, TCA_ACT_BPF_PARMS, &parm, sizeof(parm)); + addattr16(n, MAX_MSG, TCA_ACT_BPF_OPS_LEN, bpf_len); + addattr_l(n, MAX_MSG, TCA_ACT_BPF_OPS, &bpf_ops, + bpf_len * sizeof(struct sock_filter)); + tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail; + + *argc_p = argc; + *argv_p = argv; + return 0; +} + +static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg) +{ + struct rtattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + + if (arg == NULL) + return -1; + + parse_rtattr_nested(tb, TCA_ACT_BPF_MAX, arg); + + if (!tb[TCA_ACT_BPF_PARMS]) { + fprintf(f, "[NULL bpf parameters]"); + return -1; + } + parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]); + + fprintf(f, " bpf "); + + if (tb[TCA_ACT_BPF_OPS] && tb[TCA_ACT_BPF_OPS_LEN]) + bpf_print_ops(f, tb[TCA_ACT_BPF_OPS], + rta_getattr_u16(tb[TCA_ACT_BPF_OPS_LEN])); + + fprintf(f, "\n\tindex %d ref %d bind %d", parm->index, parm->refcnt, + parm->bindcnt); + + if (show_stats) { + if (tb[TCA_ACT_BPF_TM]) { + struct tcf_t *tm = RTA_DATA(tb[TCA_ACT_BPF_TM]); + print_tm(f, tm); + } + } + + fprintf(f, "\n "); + + return 0; +} + +struct action_util bpf_action_util = { + .id = "bpf", + .parse_aopt = parse_bpf, + .print_aopt = print_bpf, +}; diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c new file mode 100644 index 00000000..c6901d6c --- /dev/null +++ b/tc/tc_bpf.c @@ -0,0 +1,146 @@ +/* + * tc_bpf.c BPF common code + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Daniel Borkmann + * Jiri Pirko + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" +#include "tc_bpf.h" + +int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator) +{ + char sp; + + if (from_file) { + size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); + char *tmp_string; + FILE *fp; + + tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; + tmp_string = malloc(tmp_len); + if (tmp_string == NULL) + return -ENOMEM; + + memset(tmp_string, 0, tmp_len); + + fp = fopen(arg, "r"); + if (fp == NULL) { + perror("Cannot fopen"); + free(tmp_string); + return -ENOENT; + } + + if (!fgets(tmp_string, tmp_len, fp)) { + free(tmp_string); + fclose(fp); + return -EIO; + } + + fclose(fp); + + *need_release = true; + *bpf_string = tmp_string; + } else { + *need_release = false; + *bpf_string = arg; + } + + if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || + sp != separator) { + if (*need_release) + free(*bpf_string); + return -EINVAL; + } + + return 0; +} + +int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, + bool from_file) +{ + char *bpf_string, *token, separator = ','; + int ret = 0, i = 0; + bool need_release; + __u16 bpf_len = 0; + + if (argc < 1) + return -EINVAL; + if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, + &need_release, separator)) + return -EINVAL; + if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { + ret = -EINVAL; + goto out; + } + + token = bpf_string; + while ((token = strchr(token, separator)) && (++token)[0]) { + if (i >= bpf_len) { + fprintf(stderr, "Real program length exceeds encoded " + "length parameter!\n"); + ret = -EINVAL; + goto out; + } + + if (sscanf(token, "%hu %hhu %hhu %u,", + &bpf_ops[i].code, &bpf_ops[i].jt, + &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { + fprintf(stderr, "Error at instruction %d!\n", i); + ret = -EINVAL; + goto out; + } + + i++; + } + + if (i != bpf_len) { + fprintf(stderr, "Parsed program length is less than encoded" + "length parameter!\n"); + ret = -EINVAL; + goto out; + } + ret = bpf_len; + +out: + if (need_release) + free(bpf_string); + + return ret; +} + +void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) +{ + struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); + int i; + + if (len == 0) + return; + + fprintf(f, "bytecode \'%u,", len); + + for (i = 0; i < len - 1; i++) + fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, + ops[i].jf, ops[i].k); + + fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt, + ops[i].jf, ops[i].k); +} diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h new file mode 100644 index 00000000..08cca927 --- /dev/null +++ b/tc/tc_bpf.h @@ -0,0 +1,28 @@ +/* + * tc_bpf.h BPF common code + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Daniel Borkmann + * Jiri Pirko + */ + +#ifndef _TC_BPF_H_ +#define _TC_BPF_H_ 1 + +#include +#include +#include +#include + +int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator); +int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, + bool from_file); +void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len); + +#endif diff --git a/testsuite/Makefile b/testsuite/Makefile index 2ba95473..a2c8a2d9 100644 --- a/testsuite/Makefile +++ b/testsuite/Makefile @@ -31,12 +31,13 @@ listtests: alltests: $(TESTS) clean: + @echo "Removing $(RESULTS_DIR) dir ..." @rm -rf $(RESULTS_DIR) distclean: clean echo "Entering iproute2" && cd iproute2 && $(MAKE) distclean && cd ..; -$(TESTS): +$(TESTS): clean @mkdir -p $(RESULTS_DIR) @for d in $(TESTS_DIR); do \ diff --git a/testsuite/lib/generic.sh b/testsuite/lib/generic.sh index 8f76e492..3473cc13 100644 --- a/testsuite/lib/generic.sh +++ b/testsuite/lib/generic.sh @@ -62,8 +62,9 @@ ts_ip() TMP_OUT=`mktemp /tmp/tc_testsuite.XXXXXX` || exit $IP $@ 2> $TMP_ERR > $TMP_OUT + RET=$? - if [ -s $TMP_ERR ]; then + if [ -s $TMP_ERR ] || [ "$RET" != "0" ]; then ts_err "${SCRIPT}: ${DESC} failed:" ts_err "command: $IP $@" ts_err "stderr output:" @@ -91,3 +92,8 @@ ts_qdisc_available() return 1; fi } + +rand_dev() +{ + echo "dev-$(tr -dc "[:alpha:]" < /dev/urandom | head -c 6)" +} diff --git a/testsuite/tests/ip/link/dev_wo_vf_rate.nl b/testsuite/tests/ip/link/dev_wo_vf_rate.nl new file mode 100644 index 00000000..40fa87ff Binary files /dev/null and b/testsuite/tests/ip/link/dev_wo_vf_rate.nl differ diff --git a/testsuite/tests/ip/link/new_link.t b/testsuite/tests/ip/link/new_link.t new file mode 100755 index 00000000..549ff256 --- /dev/null +++ b/testsuite/tests/ip/link/new_link.t @@ -0,0 +1,11 @@ +#!/bin/sh + +source lib/generic.sh + +ts_log "[Testing add/del virtual links]" + +NEW_DEV="$(rand_dev)" + +ts_ip "$0" "Add $NEW_DEV dummy interface" link add dev $NEW_DEV type dummy +ts_ip "$0" "Show $NEW_DEV dummy interface" link show dev $NEW_DEV +ts_ip "$0" "Del $NEW_DEV dummy interface" link del dev $NEW_DEV diff --git a/testsuite/tests/ip/link/show_dev_wo_vf_rate.t b/testsuite/tests/ip/link/show_dev_wo_vf_rate.t new file mode 100755 index 00000000..a600ba65 --- /dev/null +++ b/testsuite/tests/ip/link/show_dev_wo_vf_rate.t @@ -0,0 +1,6 @@ +#!/bin/sh + +source lib/generic.sh + +NL_FILE="tests/ip/link/dev_wo_vf_rate.nl" +ts_ip "$0" "Show VF devices w/o VF rate info" -d monitor file $NL_FILE