From f1c656e5c0f0bed919a87856e3916116e6e4c2a4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Apr 2016 16:11:39 -0700 Subject: [PATCH 01/12] iplink: display number of rx/tx queues We can set the attributes, so would be nice to display them when provided by the kernel. Signed-off-by: Eric Dumazet --- ip/ipaddress.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 3998d8ce..f7bd1c76 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -894,6 +894,12 @@ int print_linkinfo(const struct sockaddr_nl *who, if (do_link && tb[IFLA_AF_SPEC] && show_details) print_af_spec(fp, tb[IFLA_AF_SPEC]); + if (tb[IFLA_NUM_TX_QUEUES] && show_details) + fprintf(fp, "numtxqueues %u ", rta_getattr_u32(tb[IFLA_NUM_TX_QUEUES])); + + if (tb[IFLA_NUM_RX_QUEUES] && show_details) + fprintf(fp, "numrxqueues %u ", rta_getattr_u32(tb[IFLA_NUM_RX_QUEUES])); + if ((do_link || show_details) && tb[IFLA_IFALIAS]) { fprintf(fp, "%s alias %s", _SL_, rta_getattr_str(tb[IFLA_IFALIAS])); From ae6eb9075fdfe1bf0804c9a9315a37e111e8e293 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 11 Apr 2016 17:45:14 +0200 Subject: [PATCH 02/12] bridge: fdb: add support to filter by vlan id Add the optional keyword "vlan" to bridge fdb show so the user can request filtering by a specific vlan id. Currently the filtering is implemented only in user-space. The argument name has been chosen to match the add/del one - "vlan". Example: $ bridge fdb show vlan 400 52:54:00:bf:57:16 dev eth2 vlan 400 master br0 permanent Signed-off-by: Nikolay Aleksandrov --- bridge/fdb.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/bridge/fdb.c b/bridge/fdb.c index df55e86d..be849f98 100644 --- a/bridge/fdb.c +++ b/bridge/fdb.c @@ -27,7 +27,7 @@ #include "rt_names.h" #include "utils.h" -static unsigned int filter_index; +static unsigned int filter_index, filter_vlan; static void usage(void) { @@ -35,7 +35,7 @@ static void usage(void) " [ self ] [ master ] [ use ] [ router ]\n" " [ local | static | dynamic ] [ dst IPADDR ] [ vlan VID ]\n" " [ port PORT] [ vni VNI ] [ via DEV ]\n"); - fprintf(stderr, " bridge fdb [ show [ br BRDEV ] [ brport DEV ] ]\n"); + fprintf(stderr, " bridge fdb [ show [ br BRDEV ] [ brport DEV ] [ vlan VID ] ]\n"); exit(-1); } @@ -65,6 +65,7 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) struct ndmsg *r = NLMSG_DATA(n); int len = n->nlmsg_len; struct rtattr *tb[NDA_MAX+1]; + __u16 vid = 0; if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH) { fprintf(stderr, "Not RTM_NEWNEIGH: %08x %08x %08x\n", @@ -88,6 +89,12 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) parse_rtattr(tb, NDA_MAX, NDA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); + if (tb[NDA_VLAN]) + vid = rta_getattr_u16(tb[NDA_VLAN]); + + if (filter_vlan && filter_vlan != vid) + return 0; + if (n->nlmsg_type == RTM_DELNEIGH) fprintf(fp, "Deleted "); @@ -115,11 +122,8 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) RTA_DATA(tb[NDA_DST]))); } - if (tb[NDA_VLAN]) { - __u16 vid = rta_getattr_u16(tb[NDA_VLAN]); - + if (vid) fprintf(fp, "vlan %hu ", vid); - } if (tb[NDA_PORT]) fprintf(fp, "port %d ", ntohs(rta_getattr_u16(tb[NDA_PORT]))); @@ -190,6 +194,11 @@ static int fdb_show(int argc, char **argv) } else if (strcmp(*argv, "br") == 0) { NEXT_ARG(); br = *argv; + } else if (strcmp(*argv, "vlan") == 0) { + NEXT_ARG(); + if (filter_vlan) + duparg("vlan", *argv); + filter_vlan = atoi(*argv); } else { if (matches(*argv, "help") == 0) usage(); From 24687d678fd540c554f76dd43a06933ba1ca5c7d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 11 Apr 2016 17:45:15 +0200 Subject: [PATCH 03/12] bridge: mdb: add support to filter by vlan id Add the optional keyword "vid" to bridge mdb show so the user can request filtering by a specific vlan id. Currently the filtering is implemented only in user-space. The argument name has been chosen to match the add/del one - "vid". Example: $ bridge mdb show vid 200 dev br0 port eth2 grp 239.0.0.1 permanent vid 200 Signed-off-by: Nikolay Aleksandrov --- bridge/mdb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bridge/mdb.c b/bridge/mdb.c index 842536ec..6c904f8e 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -24,12 +24,12 @@ ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct br_port_msg)))) #endif -static unsigned int filter_index; +static unsigned int filter_index, filter_vlan; static void usage(void) { fprintf(stderr, "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [permanent | temp] [vid VID]\n"); - fprintf(stderr, " bridge mdb {show} [ dev DEV ]\n"); + fprintf(stderr, " bridge mdb {show} [ dev DEV ] [ vid VID ]\n"); exit(-1); } @@ -92,6 +92,8 @@ static void print_mdb_entry(FILE *f, int ifindex, struct br_mdb_entry *e, const void *src; int af; + if (filter_vlan && e->vid != filter_vlan) + return; af = e->addr.proto == htons(ETH_P_IP) ? AF_INET : AF_INET6; src = af == AF_INET ? (const void *)&e->addr.u.ip4 : (const void *)&e->addr.u.ip6; @@ -195,6 +197,11 @@ static int mdb_show(int argc, char **argv) if (filter_dev) duparg("dev", *argv); filter_dev = *argv; + } else if (strcmp(*argv, "vid") == 0) { + NEXT_ARG(); + if (filter_vlan) + duparg("vid", *argv); + filter_vlan = atoi(*argv); } argc--; argv++; } From 5a2d0201cce161617b30102d10b709fa24c6e833 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 11 Apr 2016 17:45:16 +0200 Subject: [PATCH 04/12] bridge: vlan: add support to filter by vlan id Add the optional keyword "vid" to bridge vlan show so the user can request filtering by a specific vlan id. Currently the filtering is implemented only in user-space. The argument name has been chosen to match the add/del one - "vid". This filtering can be used also with the "-compressvlans" option to see in which range is a vlan (if in any). Also this will be used to show only specific per-vlan statistics later when support is added to the kernel for it. Examples: $ bridge vlan show vid 450 port vlan ids eth2 450 $ bridge -c vlan show vid 450 port vlan ids eth2 400-500 $ bridge vlan show vid 1 port vlan ids eth1 1 PVID Egress Untagged eth2 1 PVID br0 1 PVID Egress Untagged Signed-off-by: Nikolay Aleksandrov --- bridge/vlan.c | 60 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/bridge/vlan.c b/bridge/vlan.c index ae588323..717025ae 100644 --- a/bridge/vlan.c +++ b/bridge/vlan.c @@ -13,13 +13,13 @@ #include "br_common.h" #include "utils.h" -static unsigned int filter_index; +static unsigned int filter_index, filter_vlan; static void usage(void) { fprintf(stderr, "Usage: bridge vlan { add | del } vid VLAN_ID dev DEV [ pvid] [ untagged ]\n"); fprintf(stderr, " [ self ] [ master ]\n"); - fprintf(stderr, " bridge vlan { show } [ dev DEV ]\n"); + fprintf(stderr, " bridge vlan { show } [ dev DEV ] [ vid VLAN_ID ]\n"); exit(-1); } @@ -138,6 +138,26 @@ static int vlan_modify(int cmd, int argc, char **argv) return 0; } +/* In order to use this function for both filtering and non-filtering cases + * we need to make it a tristate: + * return -1 - if filtering we've gone over so don't continue + * return 0 - skip entry and continue (applies to range start or to entries + * which are less than filter_vlan) + * return 1 - print the entry and continue + */ +static int filter_vlan_check(struct bridge_vlan_info *vinfo) +{ + /* if we're filtering we should stop on the first greater entry */ + if (filter_vlan && vinfo->vid > filter_vlan && + !(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END)) + return -1; + if ((vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) || + vinfo->vid < filter_vlan) + return 0; + + return 1; +} + static int print_vlan(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) @@ -169,26 +189,40 @@ static int print_vlan(const struct sockaddr_nl *who, /* if AF_SPEC isn't there, vlan table is not preset for this port */ if (!tb[IFLA_AF_SPEC]) { - fprintf(fp, "%s\tNone\n", ll_index_to_name(ifm->ifi_index)); + if (!filter_vlan) + fprintf(fp, "%s\tNone\n", + ll_index_to_name(ifm->ifi_index)); return 0; } else { struct rtattr *i, *list = tb[IFLA_AF_SPEC]; int rem = RTA_PAYLOAD(list); + __u16 last_vid_start = 0; - fprintf(fp, "%s", ll_index_to_name(ifm->ifi_index)); + if (!filter_vlan) + fprintf(fp, "%s", ll_index_to_name(ifm->ifi_index)); for (i = RTA_DATA(list); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { struct bridge_vlan_info *vinfo; + int vcheck_ret; if (i->rta_type != IFLA_BRIDGE_VLAN_INFO) continue; vinfo = RTA_DATA(i); - if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) - fprintf(fp, "-%hu", vinfo->vid); - else - fprintf(fp, "\t %hu", vinfo->vid); - if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) + + if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END)) + last_vid_start = vinfo->vid; + vcheck_ret = filter_vlan_check(vinfo); + if (vcheck_ret == -1) + break; + else if (vcheck_ret == 0) continue; + + if (filter_vlan) + fprintf(fp, "%s", + ll_index_to_name(ifm->ifi_index)); + fprintf(fp, "\t %hu", last_vid_start); + if (last_vid_start != vinfo->vid) + fprintf(fp, "-%hu", vinfo->vid); if (vinfo->flags & BRIDGE_VLAN_INFO_PVID) fprintf(fp, " PVID"); if (vinfo->flags & BRIDGE_VLAN_INFO_UNTAGGED) @@ -196,7 +230,8 @@ static int print_vlan(const struct sockaddr_nl *who, fprintf(fp, "\n"); } } - fprintf(fp, "\n"); + if (!filter_vlan) + fprintf(fp, "\n"); fflush(fp); return 0; } @@ -211,6 +246,11 @@ static int vlan_show(int argc, char **argv) if (filter_dev) duparg("dev", *argv); filter_dev = *argv; + } else if (strcmp(*argv, "vid") == 0) { + NEXT_ARG(); + if (filter_vlan) + duparg("vid", *argv); + filter_vlan = atoi(*argv); } argc--; argv++; } From 0395711c52260c3a1cb0984948c3db56c69a073b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 9 Apr 2016 00:32:03 +0200 Subject: [PATCH 05/12] tc, bpf: add new csum and tunnel signatures Add new signatures for BPF_FUNC_csum_diff, BPF_FUNC_skb_get_tunnel_opt and BPF_FUNC_skb_set_tunnel_opt. Signed-off-by: Daniel Borkmann --- include/bpf_api.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/bpf_api.h b/include/bpf_api.h index 4b16d25c..0f278f0c 100644 --- a/include/bpf_api.h +++ b/include/bpf_api.h @@ -212,6 +212,8 @@ static int BPF_FUNC(l3_csum_replace, struct __sk_buff *skb, uint32_t off, uint32_t from, uint32_t to, uint32_t flags); static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off, uint32_t from, uint32_t to, uint32_t flags); +static int BPF_FUNC(csum_diff, const void *from, uint32_t from_size, + const void *to, uint32_t to_size, uint32_t seed); /* Packet vlan encap/decap */ static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto, @@ -225,6 +227,11 @@ static int BPF_FUNC(skb_set_tunnel_key, struct __sk_buff *skb, const struct bpf_tunnel_key *from, uint32_t size, uint32_t flags); +static int BPF_FUNC(skb_get_tunnel_opt, struct __sk_buff *skb, + void *to, uint32_t size); +static int BPF_FUNC(skb_set_tunnel_opt, struct __sk_buff *skb, + const void *from, uint32_t size); + /** LLVM built-ins, mem*() routines work for constant size */ #ifndef lock_xadd From afc1a2000b6f991587c815076941385c259e21ed Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 9 Apr 2016 00:32:04 +0200 Subject: [PATCH 06/12] tc, bpf: further improve error reporting Make it easier to spot issues when loading the object file fails. This includes reporting in what pinned object specs differ, better indication when we've reached instruction limits. Don't retry to load a non relo program once we failed with bpf(2), and report out of bounds tail call key. Also, add truncation of huge log outputs by default. Sometimes errors are quite easy to spot by only looking at the tail of the verifier log, but logs can get huge in size e.g. up to few MB (due to verifier checking all possible program paths). Thus, by default limit output to the last 4096 bytes and indicate that it's truncated. For the full log, the verbose option can be used. Signed-off-by: Daniel Borkmann --- tc/tc_bpf.c | 82 ++++++++++++++++++++++++++++++++++++++++++----------- tc/tc_bpf.h | 4 +++ 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c index d94af828..0c59427e 100644 --- a/tc/tc_bpf.c +++ b/tc/tc_bpf.c @@ -184,7 +184,7 @@ static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops, } if (i != bpf_len) { - fprintf(stderr, "Parsed program length is less than encodedlength parameter!\n"); + fprintf(stderr, "Parsed program length is less than encoded length parameter!\n"); ret = -EINVAL; goto out; } @@ -214,6 +214,27 @@ void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) ops[i].jf, ops[i].k); } +static void bpf_map_pin_report(const struct bpf_elf_map *pin, + const struct bpf_elf_map *obj) +{ + fprintf(stderr, "Map specification differs from pinned file!\n"); + + if (obj->type != pin->type) + fprintf(stderr, " - Type: %u (obj) != %u (pin)\n", + obj->type, pin->type); + if (obj->size_key != pin->size_key) + fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n", + obj->size_key, pin->size_key); + if (obj->size_value != pin->size_value) + fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n", + obj->size_value, pin->size_value); + if (obj->max_elem != pin->max_elem) + fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n", + obj->max_elem, pin->max_elem); + + fprintf(stderr, "\n"); +} + static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, int length) { @@ -256,7 +277,7 @@ static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, if (!memcmp(&tmp, &zero, length)) return 0; - fprintf(stderr, "Map specs from pinned file differ!\n"); + bpf_map_pin_report(&tmp, map); return -EINVAL; } } @@ -735,7 +756,19 @@ bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...) va_end(vl); if (ctx->log && ctx->log[0]) { - fprintf(stderr, "%s\n", ctx->log); + if (ctx->verbose) { + fprintf(stderr, "%s\n", ctx->log); + } else { + unsigned int off = 0, len = strlen(ctx->log); + + if (len > BPF_MAX_LOG) { + off = len - BPF_MAX_LOG; + fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n", + off); + } + fprintf(stderr, "%s\n", ctx->log + off); + } + memset(ctx->log, 0, ctx->log_size); } } @@ -1055,14 +1088,16 @@ static void bpf_prog_report(int fd, const char *section, const struct bpf_elf_prog *prog, struct bpf_elf_ctx *ctx) { - fprintf(stderr, "Prog section \'%s\' %s%s (%d)!\n", section, + unsigned int insns = prog->size / sizeof(struct bpf_insn); + + fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section, fd < 0 ? "rejected: " : "loaded", fd < 0 ? strerror(errno) : "", fd < 0 ? errno : fd); fprintf(stderr, " - Type: %u\n", prog->type); - fprintf(stderr, " - Instructions: %zu\n", - prog->size / sizeof(struct bpf_insn)); + fprintf(stderr, " - Instructions: %u (%u over limit)\n", + insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0); fprintf(stderr, " - License: %s\n\n", prog->license); bpf_dump_error(ctx, "Verifier analysis:\n\n"); @@ -1283,6 +1318,11 @@ static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, return 0; } +static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx) +{ + return ctx->sym_tab && ctx->str_tab && ctx->sec_maps; +} + static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx) { struct bpf_elf_sec_data data; @@ -1306,13 +1346,13 @@ static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx) !strcmp(data.sec_name, ".strtab")) ret = bpf_fetch_strtab(ctx, i, &data); if (ret < 0) { - fprintf(stderr, "Error parsing section %d! Perhapscheck with readelf -a?\n", + fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", i); break; } } - if (ctx->sym_tab && ctx->str_tab && ctx->sec_maps) { + if (bpf_has_map_data(ctx)) { ret = bpf_maps_attach_all(ctx); if (ret < 0) { fprintf(stderr, "Error loading maps into kernel!\n"); @@ -1348,7 +1388,7 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section) fd = bpf_prog_attach(section, &prog, ctx); if (fd < 0) - continue; + break; ctx->sec_done[i] = true; break; @@ -1412,7 +1452,8 @@ static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, return 0; } -static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section) +static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section, + bool *lderr) { struct bpf_elf_sec_data data_relo, data_insn; struct bpf_elf_prog prog; @@ -1442,8 +1483,10 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section) prog.license = ctx->license; fd = bpf_prog_attach(section, &prog, ctx); - if (fd < 0) - continue; + if (fd < 0) { + *lderr = true; + break; + } ctx->sec_done[i] = true; ctx->sec_done[idx] = true; @@ -1455,11 +1498,12 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section) static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section) { + bool lderr = false; int ret = -1; - if (ctx->sym_tab) - ret = bpf_fetch_prog_relo(ctx, section); - if (ret < 0) + if (bpf_has_map_data(ctx)) + ret = bpf_fetch_prog_relo(ctx, section, &lderr); + if (ret < 0 && !lderr) ret = bpf_fetch_prog(ctx, section); return ret; @@ -1504,8 +1548,12 @@ static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx) ret = bpf_map_update(ctx->map_fds[idx], &key_id, &fd, BPF_ANY); - if (ret < 0) - return -ENOENT; + if (ret < 0) { + if (errno == E2BIG) + fprintf(stderr, "Tail call key %u for map %u out of bounds?\n", + key_id, map_id); + return -errno; + } ctx->sec_done[i] = true; } diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h index 93f7f0e2..30306dea 100644 --- a/tc/tc_bpf.h +++ b/tc/tc_bpf.h @@ -33,6 +33,10 @@ enum { #define BPF_ENV_UDS "TC_BPF_UDS" #define BPF_ENV_MNT "TC_BPF_MNT" +#ifndef BPF_MAX_LOG +# define BPF_MAX_LOG 4096 +#endif + #ifndef BPF_FS_MAGIC # define BPF_FS_MAGIC 0xcafe4a11 #endif From 4dd3f50af4b82a6a29ede951bde97197e88f9c5d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 9 Apr 2016 00:32:05 +0200 Subject: [PATCH 07/12] tc, bpf: add support for map pre/allocation Follow-up to kernel commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add flags support, so that we can pass in BPF_F_NO_PREALLOC flag for disallowing preallocation. Update examples accordingly and also remove the BPF_* map helper macros from them as they were not very useful. Signed-off-by: Daniel Borkmann --- examples/bpf/bpf_cyclic.c | 9 +++++++- examples/bpf/bpf_graft.c | 8 ++++++- examples/bpf/bpf_prog.c | 2 ++ examples/bpf/bpf_shared.c | 8 ++++++- examples/bpf/bpf_tailcall.c | 27 +++++++++++++++++++--- include/bpf_api.h | 45 ------------------------------------- include/bpf_elf.h | 1 + tc/tc_bpf.c | 16 +++++++++---- 8 files changed, 61 insertions(+), 55 deletions(-) diff --git a/examples/bpf/bpf_cyclic.c b/examples/bpf/bpf_cyclic.c index 36745a3c..11d1c061 100644 --- a/examples/bpf/bpf_cyclic.c +++ b/examples/bpf/bpf_cyclic.c @@ -6,7 +6,14 @@ */ #define JMP_MAP_ID 0xabccba -BPF_PROG_ARRAY(jmp_tc, JMP_MAP_ID, PIN_OBJECT_NS, 1); +struct bpf_elf_map __section_maps jmp_tc = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .id = JMP_MAP_ID, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, + .max_elem = 1, +}; __section_tail(JMP_MAP_ID, 0) int cls_loop(struct __sk_buff *skb) diff --git a/examples/bpf/bpf_graft.c b/examples/bpf/bpf_graft.c index 20784ff4..07113d4a 100644 --- a/examples/bpf/bpf_graft.c +++ b/examples/bpf/bpf_graft.c @@ -33,7 +33,13 @@ * [...] */ -BPF_PROG_ARRAY(jmp_tc, 0, PIN_GLOBAL_NS, 1); +struct bpf_elf_map __section_maps jmp_tc = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; __section("aaa") int cls_aaa(struct __sk_buff *skb) diff --git a/examples/bpf/bpf_prog.c b/examples/bpf/bpf_prog.c index f15e876c..d6caf374 100644 --- a/examples/bpf/bpf_prog.c +++ b/examples/bpf/bpf_prog.c @@ -192,6 +192,7 @@ struct bpf_elf_map __section("maps") map_proto = { .size_key = sizeof(uint8_t), .size_value = sizeof(struct count_tuple), .max_elem = 256, + .flags = BPF_F_NO_PREALLOC, }; struct bpf_elf_map __section("maps") map_queue = { @@ -200,6 +201,7 @@ struct bpf_elf_map __section("maps") map_queue = { .size_key = sizeof(uint32_t), .size_value = sizeof(struct count_queue), .max_elem = 1024, + .flags = BPF_F_NO_PREALLOC, }; struct bpf_elf_map __section("maps") map_drops = { diff --git a/examples/bpf/bpf_shared.c b/examples/bpf/bpf_shared.c index 7fe9ef30..21fe6f1e 100644 --- a/examples/bpf/bpf_shared.c +++ b/examples/bpf/bpf_shared.c @@ -18,7 +18,13 @@ * instance is being created. */ -BPF_ARRAY4(map_sh, 0, PIN_OBJECT_NS, 1); /* or PIN_GLOBAL_NS, or PIN_NONE */ +struct bpf_elf_map __section_maps map_sh = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */ + .max_elem = 1, +}; __section("egress") int emain(struct __sk_buff *skb) diff --git a/examples/bpf/bpf_tailcall.c b/examples/bpf/bpf_tailcall.c index f545430f..1a30426c 100644 --- a/examples/bpf/bpf_tailcall.c +++ b/examples/bpf/bpf_tailcall.c @@ -26,10 +26,31 @@ * classifier behaviour. */ -BPF_PROG_ARRAY(jmp_tc, FOO, PIN_OBJECT_NS, MAX_JMP_SIZE); -BPF_PROG_ARRAY(jmp_ex, BAR, PIN_OBJECT_NS, 1); +struct bpf_elf_map __section_maps jmp_tc = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .id = FOO, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, + .max_elem = MAX_JMP_SIZE, +}; -BPF_ARRAY4(map_sh, 0, PIN_OBJECT_NS, 1); +struct bpf_elf_map __section_maps jmp_ex = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .id = BAR, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, + .max_elem = 1, +}; + +struct bpf_elf_map __section_maps map_sh = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, + .max_elem = 1, +}; __section_tail(FOO, ENTRY_0) int cls_case1(struct __sk_buff *skb) diff --git a/include/bpf_api.h b/include/bpf_api.h index 0f278f0c..1b250d2e 100644 --- a/include/bpf_api.h +++ b/include/bpf_api.h @@ -99,51 +99,6 @@ char ____license[] __section_license = NAME #endif -#ifndef __BPF_MAP -# define __BPF_MAP(NAME, TYPE, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \ - struct bpf_elf_map __section_maps NAME = { \ - .type = (TYPE), \ - .id = (ID), \ - .size_key = (SIZE_KEY), \ - .size_value = (SIZE_VALUE), \ - .pinning = (PIN), \ - .max_elem = (MAX_ELEM), \ - } -#endif - -#ifndef BPF_HASH -# define BPF_HASH(NAME, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \ - __BPF_MAP(NAME, BPF_MAP_TYPE_HASH, ID, SIZE_KEY, SIZE_VALUE, \ - PIN, MAX_ELEM) -#endif - -#ifndef BPF_ARRAY -# define BPF_ARRAY(NAME, ID, SIZE_VALUE, PIN, MAX_ELEM) \ - __BPF_MAP(NAME, BPF_MAP_TYPE_ARRAY, ID, sizeof(uint32_t), \ - SIZE_VALUE, PIN, MAX_ELEM) -#endif - -#ifndef BPF_ARRAY2 -# define BPF_ARRAY2(NAME, ID, PIN, MAX_ELEM) \ - BPF_ARRAY(NAME, ID, sizeof(uint16_t), PIN, MAX_ELEM) -#endif - -#ifndef BPF_ARRAY4 -# define BPF_ARRAY4(NAME, ID, PIN, MAX_ELEM) \ - BPF_ARRAY(NAME, ID, sizeof(uint32_t), PIN, MAX_ELEM) -#endif - -#ifndef BPF_ARRAY8 -# define BPF_ARRAY8(NAME, ID, PIN, MAX_ELEM) \ - BPF_ARRAY(NAME, ID, sizeof(uint64_t), PIN, MAX_ELEM) -#endif - -#ifndef BPF_PROG_ARRAY -# define BPF_PROG_ARRAY(NAME, ID, PIN, MAX_ELEM) \ - __BPF_MAP(NAME, BPF_MAP_TYPE_PROG_ARRAY, ID, sizeof(uint32_t), \ - sizeof(uint32_t), PIN, MAX_ELEM) -#endif - /** Classifier helper */ #ifndef BPF_H_DEFAULT diff --git a/include/bpf_elf.h b/include/bpf_elf.h index 31a89743..36cc9882 100644 --- a/include/bpf_elf.h +++ b/include/bpf_elf.h @@ -32,6 +32,7 @@ struct bpf_elf_map { __u32 size_key; __u32 size_value; __u32 max_elem; + __u32 flags; __u32 id; __u32 pinning; }; diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c index 0c59427e..fe927ac9 100644 --- a/tc/tc_bpf.c +++ b/tc/tc_bpf.c @@ -231,6 +231,9 @@ static void bpf_map_pin_report(const struct bpf_elf_map *pin, if (obj->max_elem != pin->max_elem) fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n", obj->max_elem, pin->max_elem); + if (obj->flags != pin->flags) + fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n", + obj->flags, pin->flags); fprintf(stderr, "\n"); } @@ -261,6 +264,8 @@ static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, tmp.size_value = val; else if (sscanf(buff, "max_entries:\t%u", &val) == 1) tmp.max_elem = val; + else if (sscanf(buff, "map_flags:\t%i", &val) == 1) + tmp.flags = val; } fclose(fp); @@ -796,8 +801,9 @@ static int bpf_log_realloc(struct bpf_elf_ctx *ctx) return 0; } -static int bpf_map_create(enum bpf_map_type type, unsigned int size_key, - unsigned int size_value, unsigned int max_elem) +static int bpf_map_create(enum bpf_map_type type, uint32_t size_key, + uint32_t size_value, uint32_t max_elem, + uint32_t flags) { union bpf_attr attr; @@ -806,6 +812,7 @@ static int bpf_map_create(enum bpf_map_type type, unsigned int size_key, attr.key_size = size_key; attr.value_size = size_value; attr.max_entries = max_elem; + attr.map_flags = flags; return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } @@ -1147,7 +1154,8 @@ static void bpf_map_report(int fd, const char *name, fprintf(stderr, " - Pinning: %u\n", map->pinning); fprintf(stderr, " - Size key: %u\n", map->size_key); fprintf(stderr, " - Size value: %u\n", map->size_value); - fprintf(stderr, " - Max elems: %u\n\n", map->max_elem); + fprintf(stderr, " - Max elems: %u\n", map->max_elem); + fprintf(stderr, " - Flags: %#x\n\n", map->flags); } static int bpf_map_attach(const char *name, const struct bpf_elf_map *map, @@ -1174,7 +1182,7 @@ static int bpf_map_attach(const char *name, const struct bpf_elf_map *map, errno = 0; fd = bpf_map_create(map->type, map->size_key, map->size_value, - map->max_elem); + map->max_elem, map->flags); if (fd < 0 || ctx->verbose) { bpf_map_report(fd, name, map, ctx); if (fd < 0) From df590401d6297fd4df69f2d514ec055d59e887cc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 1 Apr 2016 16:22:01 +0200 Subject: [PATCH 08/12] iplink: display IFLA_PHYS_PORT_NAME Signed-off-by: Nicolas Dichtel --- ip/ipaddress.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index f7bd1c76..ca97907f 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -813,6 +813,10 @@ int print_linkinfo(const struct sockaddr_nl *who, fprintf(fp, "master %s ", ll_idx_n2a(*(int *)RTA_DATA(tb[IFLA_MASTER]), b1)); } + if (tb[IFLA_PHYS_PORT_NAME]) + fprintf(fp, "portname %s ", + rta_getattr_str(tb[IFLA_PHYS_PORT_NAME])); + if (tb[IFLA_PHYS_PORT_ID]) { SPRINT_BUF(b1); fprintf(fp, "portid %s ", From 11522e7d02758653145f68bda7a6d123bdd0654b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 11 Apr 2016 22:07:51 +0000 Subject: [PATCH 09/12] ip: only display phys attributes with details option Since output of ip commands are already cluttered, move the physical port details under a show_details option. --- ip/ipaddress.c | 85 ++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index ca97907f..f27d423c 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -782,7 +782,7 @@ int print_linkinfo(const struct sockaddr_nl *who, fprintf(fp, "%d: ", ifi->ifi_index); color_fprintf(fp, COLOR_IFNAME, "%s", - tb[IFLA_IFNAME] ? rta_getattr_str(tb[IFLA_IFNAME]) : ""); + tb[IFLA_IFNAME] ? rta_getattr_str(tb[IFLA_IFNAME]) : ""); if (tb[IFLA_LINK]) { SPRINT_BUF(b1); @@ -813,26 +813,6 @@ int print_linkinfo(const struct sockaddr_nl *who, fprintf(fp, "master %s ", ll_idx_n2a(*(int *)RTA_DATA(tb[IFLA_MASTER]), b1)); } - if (tb[IFLA_PHYS_PORT_NAME]) - fprintf(fp, "portname %s ", - rta_getattr_str(tb[IFLA_PHYS_PORT_NAME])); - - if (tb[IFLA_PHYS_PORT_ID]) { - SPRINT_BUF(b1); - fprintf(fp, "portid %s ", - hexstring_n2a(RTA_DATA(tb[IFLA_PHYS_PORT_ID]), - RTA_PAYLOAD(tb[IFLA_PHYS_PORT_ID]), - b1, sizeof(b1))); - } - - if (tb[IFLA_PHYS_SWITCH_ID]) { - SPRINT_BUF(b1); - fprintf(fp, "switchid %s ", - hexstring_n2a(RTA_DATA(tb[IFLA_PHYS_SWITCH_ID]), - RTA_PAYLOAD(tb[IFLA_PHYS_SWITCH_ID]), - b1, sizeof(b1))); - } - if (tb[IFLA_OPERSTATE]) print_operstate(fp, rta_getattr_u8(tb[IFLA_OPERSTATE])); @@ -856,10 +836,10 @@ int print_linkinfo(const struct sockaddr_nl *who, if (tb[IFLA_ADDRESS]) { color_fprintf(fp, COLOR_MAC, "%s", - ll_addr_n2a(RTA_DATA(tb[IFLA_ADDRESS]), - RTA_PAYLOAD(tb[IFLA_ADDRESS]), - ifi->ifi_type, - b1, sizeof(b1))); + ll_addr_n2a(RTA_DATA(tb[IFLA_ADDRESS]), + RTA_PAYLOAD(tb[IFLA_ADDRESS]), + ifi->ifi_type, + b1, sizeof(b1))); } if (tb[IFLA_BROADCAST]) { if (ifi->ifi_flags&IFF_POINTOPOINT) @@ -867,10 +847,10 @@ int print_linkinfo(const struct sockaddr_nl *who, else fprintf(fp, " brd "); color_fprintf(fp, COLOR_MAC, "%s", - ll_addr_n2a(RTA_DATA(tb[IFLA_BROADCAST]), - RTA_PAYLOAD(tb[IFLA_BROADCAST]), - ifi->ifi_type, - b1, sizeof(b1))); + ll_addr_n2a(RTA_DATA(tb[IFLA_BROADCAST]), + RTA_PAYLOAD(tb[IFLA_BROADCAST]), + ifi->ifi_type, + b1, sizeof(b1))); } } @@ -888,21 +868,46 @@ int print_linkinfo(const struct sockaddr_nl *who, fprintf(fp, " protodown on "); } - if (tb[IFLA_PROMISCUITY] && show_details) - fprintf(fp, " promiscuity %u ", - *(int *)RTA_DATA(tb[IFLA_PROMISCUITY])); + if (show_details) { + if (tb[IFLA_PROMISCUITY]) + fprintf(fp, " promiscuity %u ", + *(int *)RTA_DATA(tb[IFLA_PROMISCUITY])); - if (tb[IFLA_LINKINFO] && show_details) - print_linktype(fp, tb[IFLA_LINKINFO]); + if (tb[IFLA_LINKINFO]) + print_linktype(fp, tb[IFLA_LINKINFO]); - if (do_link && tb[IFLA_AF_SPEC] && show_details) - print_af_spec(fp, tb[IFLA_AF_SPEC]); + if (do_link && tb[IFLA_AF_SPEC]) + print_af_spec(fp, tb[IFLA_AF_SPEC]); - if (tb[IFLA_NUM_TX_QUEUES] && show_details) - fprintf(fp, "numtxqueues %u ", rta_getattr_u32(tb[IFLA_NUM_TX_QUEUES])); + if (tb[IFLA_NUM_TX_QUEUES]) + fprintf(fp, "numtxqueues %u ", + rta_getattr_u32(tb[IFLA_NUM_TX_QUEUES])); + + if (tb[IFLA_NUM_RX_QUEUES]) + fprintf(fp, "numrxqueues %u ", + rta_getattr_u32(tb[IFLA_NUM_RX_QUEUES])); + + if (tb[IFLA_PHYS_PORT_NAME]) + fprintf(fp, "portname %s ", + rta_getattr_str(tb[IFLA_PHYS_PORT_NAME])); + + if (tb[IFLA_PHYS_PORT_ID]) { + SPRINT_BUF(b1); + fprintf(fp, "portid %s ", + hexstring_n2a(RTA_DATA(tb[IFLA_PHYS_PORT_ID]), + RTA_PAYLOAD(tb[IFLA_PHYS_PORT_ID]), + b1, sizeof(b1))); + } + + if (tb[IFLA_PHYS_SWITCH_ID]) { + SPRINT_BUF(b1); + fprintf(fp, "switchid %s ", + hexstring_n2a(RTA_DATA(tb[IFLA_PHYS_SWITCH_ID]), + RTA_PAYLOAD(tb[IFLA_PHYS_SWITCH_ID]), + b1, sizeof(b1))); + } + } - if (tb[IFLA_NUM_RX_QUEUES] && show_details) - fprintf(fp, "numrxqueues %u ", rta_getattr_u32(tb[IFLA_NUM_RX_QUEUES])); if ((do_link || show_details) && tb[IFLA_IFALIAS]) { fprintf(fp, "%s alias %s", _SL_, From 5c5a0f3df9261340725a65f4655a2ab50fd3db4e Mon Sep 17 00:00:00 2001 From: Gustavo Zacarias Date: Fri, 8 Apr 2016 09:59:33 -0300 Subject: [PATCH 10/12] iproute2: tc_bpf.c: fix building with musl libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need limits.h for PATH_MAX, fixes: tc_bpf.c: In function ‘bpf_map_selfcheck_pinned’: tc_bpf.c:222:12: error: ‘PATH_MAX’ undeclared (first use in this function) char file[PATH_MAX], buff[4096]; Signed-off-by: Gustavo Zacarias Acked-by: Daniel Borkmann --- tc/tc_bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c index fe927ac9..86c6069b 100644 --- a/tc/tc_bpf.c +++ b/tc/tc_bpf.c @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef HAVE_ELF #include From fe9322781e6351b5572cbaa8df4dd0e5ec96398a Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 31 Mar 2016 14:43:32 +0200 Subject: [PATCH 11/12] ip-link: Support printing VF trust setting This adds a new item to VF lines of a PF, stating whether the VF is trusted or not. Signed-off-by: Phil Sutter --- ip/ipaddress.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index f27d423c..b2e294d2 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -380,6 +380,13 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo) else fprintf(fp, ", link-state disable"); } + if (vf[IFLA_VF_TRUST]) { + struct ifla_vf_trust *vf_trust = RTA_DATA(vf[IFLA_VF_TRUST]); + + if (vf_trust->setting != -1) + fprintf(fp, ", trust %s", + vf_trust->setting ? "on" : "off"); + } if (vf[IFLA_VF_STATS] && show_stats) print_vf_stats64(fp, vf[IFLA_VF_STATS]); } From bbac6c63011b6c89d79f3f6aae7d4d38962e4000 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 11 Apr 2016 22:13:55 +0000 Subject: [PATCH 12/12] ip: whitespace cleanup Fix whitespace --- ip/ipaddress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index b2e294d2..aac7970e 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -326,7 +326,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo) * this kernel. */ tmp = (struct rtattr *)((char *)vf[IFLA_VF_TX_RATE] + - vf[IFLA_VF_TX_RATE]->rta_len); + vf[IFLA_VF_TX_RATE]->rta_len); if (tmp->rta_type != IFLA_VF_SPOOFCHK) vf_spoofchk = NULL; @@ -338,7 +338,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo) * this kernel. */ tmp = (struct rtattr *)((char *)vf[IFLA_VF_SPOOFCHK] + - vf[IFLA_VF_SPOOFCHK]->rta_len); + vf[IFLA_VF_SPOOFCHK]->rta_len); if (tmp->rta_type != IFLA_VF_LINK_STATE) vf_linkstate = NULL; @@ -349,7 +349,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo) fprintf(fp, "%s vf %d MAC %s", _SL_, vf_mac->vf, ll_addr_n2a((unsigned char *)&vf_mac->mac, - ETH_ALEN, 0, b1, sizeof(b1))); + ETH_ALEN, 0, b1, sizeof(b1))); if (vf_vlan->vlan) fprintf(fp, ", vlan %d", vf_vlan->vlan); if (vf_vlan->qos)