From be99929d60436d131aad1cf09b9216971846e32e Mon Sep 17 00:00:00 2001 From: Feng Zhou Date: Sun, 1 Aug 2021 14:07:09 +0800 Subject: [PATCH 01/30] lib/bpf: Fix btf_load error lead to enable debug log Use tc with no verbose, when bpf_btf_attach fail, the conditions: "if (fd < 0 && (errno == ENOSPC || !ctx->log_size))" will make ctx->log_size != 0. And then, bpf_prog_attach, ctx->log_size != 0. so enable debug log. The verifier log sometimes is so chatty on larger programs. bpf_prog_attach is failed. "Log buffer too small to dump verifier log 16777215 bytes (9 tries)!" BTF load failure does not affect prog load. prog still work. So when BTF/PROG load fail, enlarge log_size and re-fail with having verbose. Signed-off-by: Feng Zhou Signed-off-by: Stephen Hemminger --- lib/bpf_legacy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bpf_legacy.c b/lib/bpf_legacy.c index d57d2635..e8a41e6f 100644 --- a/lib/bpf_legacy.c +++ b/lib/bpf_legacy.c @@ -1531,7 +1531,7 @@ retry: * into our buffer. Still, try to give a debuggable error * log for the user, so enlarge it and re-fail. */ - if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) { + if (fd < 0 && errno == ENOSPC) { if (tries++ < 10 && !bpf_log_realloc(ctx)) goto retry; @@ -2069,7 +2069,7 @@ retry: fd = bpf_btf_load(ctx->btf_data->d_buf, ctx->btf_data->d_size, ctx->log, ctx->log_size); if (fd < 0 || ctx->verbose) { - if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) { + if (fd < 0 && errno == ENOSPC) { if (tries++ < 10 && !bpf_log_realloc(ctx)) goto retry; From 954a0077c83b7981271809391ac0712d24a48314 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 5 Aug 2021 16:44:59 -0700 Subject: [PATCH 02/30] devlink: fix infinite loop on flash update for drivers without status When processing device flash update, cmd_dev_flash function waits until the flash process has completed. This requires the following two conditions to both be true: a) we've received an exit status from the child process b) we've received the DEVLINK_CMD_FLASH_UPDATE_END *or* we haven't received any status notifications from the driver. The original devlink flash status monitoring code in 9b13cddfe268 ("devlink: implement flash status monitoring") was written assuming that a driver will either send no status updates, or it will send at least one DEVLINK_CMD_FLASH_UPDATE_STATUS before DEVLINK_CMD_FLASH_UPDATE_END. Newer versions of the kernel since commit 52cc5f3a166a ("devlink: move flash end and begin to core devlink") in v5.10 moved handling of the DEVLINK_CMD_FLASH_UPDATE_END into the core stack, and will send this regardless of whether or not the driver sends any of its own status notifications. The handling of DEVLINK_CMD_FLASH_UPDATE_END in cmd_dev_flash_status_cb has an additional condition that it must not be the first message. Otherwise, it falls back to treating it like a DEVLINK_CMD_FLASH_UPDATE_STATUS. This is wrong because it can lead to an infinite loop if a driver does not send any status updates. In this case, the kernel will send DEVLINK_CMD_FLASH_UPDATE_END without any DEVLINK_CMD_FLASH_UPDATE_STATUS. The devlink application will see that ctx->not_first is false, and will treat this like any other status message. Thus, ctx->not_first will be set to 1. The loop condition to exit flash update will thus never be true, since we will wait forever, because ctx->not_first is true, and ctx->received_end is false. This leads to the application appearing to process the flash update, but it will never exit. Fix this by simply always treating DEVLINK_CMD_FLASH_UPDATE_END the same regardless of whether its the first message or not. This is obviously the correct thing to do: once we've received the DEVLINK_CMD_FLASH_UPDATE_END the flash update must be finished. For new kernels this is always true, because we send this message in the core stack after the driver flash update routine finishes. For older kernels, some drivers may not have sent any DEVLINK_CMD_FLASH_UPDATE_STATUS or DEVLINK_CMD_FLASH_UPDATE_END. This is handled by the while loop conditional that exits if we get a return value from the child process without having received any status notifications. An argument could be made that we should exit immediately when we get either the DEVLINK_CMD_FLASH_UPDATE_END or an exit code from the child process. However, at a minimum it makes no sense to ever process DEVLINK_CMD_FLASH_UPDATE_END as if it were a DEVLINK_CMD_FLASH_UPDATE_STATUS. This is easy to test as it is triggered by the selftests for the netdevsim driver, which has a test case for both with and without status notifications. Fixes: 9b13cddfe268 ("devlink: implement flash status monitoring") Signed-off-by: Jacob Keller Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index b294fcd8..9d3acc18 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -3700,7 +3700,7 @@ static int cmd_dev_flash_status_cb(const struct nlmsghdr *nlh, void *data) strcmp(dev_name, opts->dev_name)) return MNL_CB_ERROR; - if (genl->cmd == DEVLINK_CMD_FLASH_UPDATE_END && ctx->not_first) { + if (genl->cmd == DEVLINK_CMD_FLASH_UPDATE_END) { pr_out("\n"); free(ctx->last_msg); free(ctx->last_component); From 50a412702252ba29524b860df6c30d14f03cd34a Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Sat, 7 Aug 2021 18:57:38 +0200 Subject: [PATCH 03/30] lib: bpf_legacy: fix potential NULL-pointer dereference If bpf_map_fetch_name() returns NULL, strlen() hits a NULL-pointer dereference on outer_map_name. Fix this checking outer_map_name value, and returning false when NULL, as already done for inner_map_name before. Fixes: 6d61a2b55799 ("lib: add libbpf support") Signed-off-by: Andrea Claudi Signed-off-by: Stephen Hemminger --- lib/bpf_legacy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/bpf_legacy.c b/lib/bpf_legacy.c index e8a41e6f..91086aa2 100644 --- a/lib/bpf_legacy.c +++ b/lib/bpf_legacy.c @@ -3298,6 +3298,9 @@ bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *ima *omap = ctx->maps[j]; outer_map_name = bpf_map_fetch_name(ctx, j); + if (!outer_map_name) + return false; + memcpy(omap_name, outer_map_name, strlen(outer_map_name) + 1); return true; From d1eacf12b58eb9907dc071f32238388ef3e254c0 Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Sat, 7 Aug 2021 18:58:02 +0200 Subject: [PATCH 04/30] lib: bpf_glue: remove useless assignment The value of s used inside the cycle is the result of strstr(), so this assignment is useless. Signed-off-by: Andrea Claudi Signed-off-by: Stephen Hemminger --- lib/bpf_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bpf_glue.c b/lib/bpf_glue.c index eaa9504f..70d00184 100644 --- a/lib/bpf_glue.c +++ b/lib/bpf_glue.c @@ -63,7 +63,7 @@ const char *get_libbpf_version(void) if (fp == NULL) goto out; - while ((s = fgets(buf, sizeof(buf), fp)) != NULL) { + while (fgets(buf, sizeof(buf), fp) != NULL) { if ((s = strstr(buf, "libbpf.so.")) != NULL) { strncpy(_libbpf_version, s+10, sizeof(_libbpf_version)-1); strtok(_libbpf_version, "\n"); From 9b7ea92b9e3feff2876f772ace01148b7406839c Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 4 Aug 2021 11:18:28 +0200 Subject: [PATCH 05/30] tc: u32: Fix key folding in sample option In between Linux kernel 2.4 and 2.6, key folding for hash tables changed in kernel space. When iproute2 dropped support for the older algorithm, the wrong code was removed and kernel 2.4 folding method remained in place. To get things functional for recent kernels again, restoring the old code alone was not sufficient - additional byteorder fixes were needed. While being at it, make use of ffs() and thereby align the code with how kernel determines the shift width. Fixes: 267480f55383c ("Backout the 2.4 utsname hash patch.") Signed-off-by: Phil Sutter Signed-off-by: Stephen Hemminger --- tc/f_u32.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tc/f_u32.c b/tc/f_u32.c index 2ed5254a..a5747f67 100644 --- a/tc/f_u32.c +++ b/tc/f_u32.c @@ -978,6 +978,13 @@ show_k: goto show_k; } +static __u32 u32_hash_fold(struct tc_u32_key *key) +{ + __u8 fshift = key->mask ? ffs(ntohl(key->mask)) - 1 : 0; + + return ntohl(key->val & key->mask) >> fshift; +} + static int u32_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n) { @@ -1110,9 +1117,7 @@ static int u32_parse_opt(struct filter_util *qu, char *handle, } NEXT_ARG(); } - hash = sel2.keys[0].val & sel2.keys[0].mask; - hash ^= hash >> 16; - hash ^= hash >> 8; + hash = u32_hash_fold(&sel2.keys[0]); htid = ((hash % divisor) << 12) | (htid & 0xFFF00000); sample_ok = 1; continue; From 355c49ffa5e4f3680218c3dd0dc87280c9e9ac01 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 23 Jul 2021 17:53:59 +0300 Subject: [PATCH 06/30] devlink: Show port state values in man page and in the help command Port function state can have either of the two values - active or inactive. Update the documentation and help command for these two values to tell user about it. With the introduction of state, hw_addr and state are optional. Hence mark them as optional in man page that also aligns with the help command output. Fixes: bdfb9f1bd61a ("devlink: Support set of port function state") Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 2 +- man/man8/devlink-port.8 | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 9d3acc18..2f2142ed 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -3988,7 +3988,7 @@ static void cmd_port_help(void) pr_err(" devlink port set DEV/PORT_INDEX [ type { eth | ib | auto} ]\n"); pr_err(" devlink port split DEV/PORT_INDEX count COUNT\n"); pr_err(" devlink port unsplit DEV/PORT_INDEX\n"); - pr_err(" devlink port function set DEV/PORT_INDEX [ hw_addr ADDR ] [ state STATE ]\n"); + pr_err(" devlink port function set DEV/PORT_INDEX [ hw_addr ADDR ] [ state { active | inactive } ]\n"); pr_err(" devlink port function rate { help | show | add | del | set }\n"); pr_err(" devlink port param set DEV/PORT_INDEX name PARAMETER value VALUE cmode { permanent | driverinit | runtime }\n"); pr_err(" devlink port param show [DEV/PORT_INDEX name PARAMETER]\n"); diff --git a/man/man8/devlink-port.8 b/man/man8/devlink-port.8 index 053db7a1..12ccc47e 100644 --- a/man/man8/devlink-port.8 +++ b/man/man8/devlink-port.8 @@ -67,12 +67,12 @@ devlink-port \- devlink port configuration .ti -8 .BR "devlink port function set " .IR DEV/PORT_INDEX -.RI "{ " +.RI "[ " .BR "hw_addr " -.RI "ADDR }" -.RI "{ " -.BR "state" -.RI "STATE }" +.RI "ADDR ]" +.RI "[ " +.BR state " { " active " | " inactive " }" +.RI "]" .ti -8 .BR "devlink port function rate " From 3a09567f7d583a8255d3bb9c07e2444375e53494 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 16 Aug 2021 15:49:05 +0800 Subject: [PATCH 07/30] ip/bond: add arp_validate filter support Add arp_validate filter support based on kernel commit 896149ff1b2c ("bonding: extend arp_validate to be able to receive unvalidated arp-only traffic") Signed-off-by: Hangbin Liu Signed-off-by: Stephen Hemminger --- ip/iplink_bond.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c index d45845bd..0b4fe445 100644 --- a/ip/iplink_bond.c +++ b/ip/iplink_bond.c @@ -41,6 +41,9 @@ static const char *arp_validate_tbl[] = { "active", "backup", "all", + "filter", + "filter_active", + "filter_backup", NULL, }; @@ -145,7 +148,7 @@ static void print_explain(FILE *f) " [ ad_actor_system LLADDR ]\n" "\n" "BONDMODE := balance-rr|active-backup|balance-xor|broadcast|802.3ad|balance-tlb|balance-alb\n" - "ARP_VALIDATE := none|active|backup|all\n" + "ARP_VALIDATE := none|active|backup|all|filter|filter_active|filter_backup\n" "ARP_ALL_TARGETS := any|all\n" "PRIMARY_RESELECT := always|better|failure\n" "FAIL_OVER_MAC := none|active|follow\n" From 82149efee93924192ce83c03375b729ee003ca78 Mon Sep 17 00:00:00 2001 From: Gokul Sivakumar Date: Tue, 17 Aug 2021 22:58:05 +0530 Subject: [PATCH 08/30] bridge: reorder cmd line arg parsing to let "-c" detected as "color" option As per the man/man8/bridge.8 page, the shorthand cmd line arg "-c" can be used to colorize the bridge cmd output. But while parsing the args in while loop, matches() detects "-c" as "-compressedvlans" instead of "-color", so fix this by doing the check for "-color" option first before checking for "-compressedvlans". Signed-off-by: Gokul Sivakumar Signed-off-by: Stephen Hemminger --- bridge/bridge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridge/bridge.c b/bridge/bridge.c index f7bfe0b5..48b0e7f8 100644 --- a/bridge/bridge.c +++ b/bridge/bridge.c @@ -149,9 +149,9 @@ main(int argc, char **argv) NEXT_ARG(); if (netns_switch(argv[1])) exit(-1); + } else if (matches_color(opt, &color)) { } else if (matches(opt, "-compressvlans") == 0) { ++compress_vlans; - } else if (matches_color(opt, &color)) { } else if (matches(opt, "-force") == 0) { ++force; } else if (matches(opt, "-json") == 0) { From 057d3c6d378b38fcb60daaeed2f09c052e7d4ba5 Mon Sep 17 00:00:00 2001 From: Gokul Sivakumar Date: Tue, 17 Aug 2021 22:58:06 +0530 Subject: [PATCH 09/30] bridge: fdb: don't colorize the "dev" & "dst" keywords in "bridge -c fdb" To be consistent with the colorized output of "ip" command and to increase readability, stop highlighting the "dev" & "dst" keywords in the colorized output of "bridge -c fdb" cmd. Example: in the following "bridge -c fdb" entry, only "00:00:00:00:00:00", "vxlan100" and "2001:db8:2::1" fields should be highlighted in color. 00:00:00:00:00:00 dev vxlan100 dst 2001:db8:2::1 self permanent Signed-off-by: Gokul Sivakumar Signed-off-by: Stephen Hemminger --- bridge/fdb.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bridge/fdb.c b/bridge/fdb.c index 37465e46..8912f092 100644 --- a/bridge/fdb.c +++ b/bridge/fdb.c @@ -192,10 +192,13 @@ int print_fdb(struct nlmsghdr *n, void *arg) "mac", "%s ", lladdr); } - if (!filter_index && r->ndm_ifindex) + if (!filter_index && r->ndm_ifindex) { + print_string(PRINT_FP, NULL, "dev ", NULL); + print_color_string(PRINT_ANY, COLOR_IFNAME, - "ifname", "dev %s ", + "ifname", "%s ", ll_index_to_name(r->ndm_ifindex)); + } if (tb[NDA_DST]) { int family = AF_INET; @@ -208,9 +211,11 @@ int print_fdb(struct nlmsghdr *n, void *arg) RTA_PAYLOAD(tb[NDA_DST]), RTA_DATA(tb[NDA_DST])); + print_string(PRINT_FP, NULL, "dst ", NULL); + print_color_string(PRINT_ANY, ifa_family_color(family), - "dst", "dst %s ", dst); + "dst", "%s ", dst); } if (vid) From 10ecd126900bc8e291c22bf4b11ab589009eb5de Mon Sep 17 00:00:00 2001 From: Gokul Sivakumar Date: Tue, 17 Aug 2021 22:58:07 +0530 Subject: [PATCH 10/30] man: bridge: fix the typo to change "-c[lor]" into "-c[olor]" in man page Fixes: 3a1ca9a5b ("bridge: update man page for new color and json changes") Signed-off-by: Gokul Sivakumar Signed-off-by: Stephen Hemminger --- man/man8/bridge.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index eec7df43..db83a2a6 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -22,7 +22,7 @@ bridge \- show / manipulate bridge addresses and devices \fB\-s\fR[\fItatistics\fR] | \fB\-n\fR[\fIetns\fR] name | \fB\-b\fR[\fIatch\fR] filename | -\fB\-c\fR[\folor\fR] | +\fB\-c\fR[\fIolor\fR] | \fB\-p\fR[\fIretty\fR] | \fB\-j\fR[\fIson\fR] | \fB\-o\fR[\fIneline\fr] } From 1afde094988702fbc8cd553fedbed883d3be5638 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Aug 2021 14:09:34 -0700 Subject: [PATCH 11/30] uapi: update neighbour.h Signed-off-by: Stephen Hemminger --- include/uapi/linux/neighbour.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index dc8b7220..00a60695 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -66,8 +66,11 @@ enum { #define NUD_NONE 0x00 /* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - and make no address resolution or NUD. - NUD_PERMANENT also cannot be deleted by garbage collectors. + * and make no address resolution or NUD. + * NUD_PERMANENT also cannot be deleted by garbage collectors. + * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry + * states don't make sense and thus are ignored. Such entries don't age and + * can roam. */ struct nda_cacheinfo { From 85b0e73c7714d33e94e3d289c49c4e0f35497071 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 18 Aug 2021 14:29:46 -0700 Subject: [PATCH 12/30] ss: fix fallback to procfs for raw sockets Jonas reports that ss -awp does not display any RAW sockets on a Knoppix 4.4 kernel. sockdiag_send() diverts to tcpdiag_send() to try the older netlink interface. tcpdiag_send() works for TCP and DCCP but not other protocols. Instead of rejecting unsupported protocols (and missing RAW and SCTP) match on supported ones. Link: https://lore.kernel.org/netdev/20210815231738.7b42bad4@mmluhan/ Reported-and-tested-by: Jonas Bechtel Fixes: 41fe6c34de50 ("ss: Add inet raw sockets information gathering via netlink diag interface") Signed-off-by: Jakub Kicinski Signed-off-by: Stephen Hemminger --- misc/ss.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 894ad405..b39f63fe 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -3404,13 +3404,13 @@ static int tcpdiag_send(int fd, int protocol, struct filter *f) struct iovec iov[3]; int iovlen = 1; - if (protocol == IPPROTO_UDP || protocol == IPPROTO_MPTCP) - return -1; - if (protocol == IPPROTO_TCP) req.nlh.nlmsg_type = TCPDIAG_GETSOCK; - else + else if (protocol == IPPROTO_DCCP) req.nlh.nlmsg_type = DCCPDIAG_GETSOCK; + else + return -1; + if (show_mem) { req.r.idiag_ext |= (1<<(INET_DIAG_MEMINFO-1)); req.r.idiag_ext |= (1<<(INET_DIAG_SKMEMINFO-1)); From 169f36a0c916974a53f35c80a8aed7fff27f76c4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 31 Aug 2021 11:57:59 -0700 Subject: [PATCH 13/30] v5.14.0 --- include/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/version.h b/include/version.h index bbf1d1cc..d0e1a49b 100644 --- a/include/version.h +++ b/include/version.h @@ -1 +1 @@ -static const char version[] = "5.13.0"; +static const char version[] = "5.14.0"; From 508ad89c820bcdf23cb6173cfe3ed70288d18c29 Mon Sep 17 00:00:00 2001 From: Gokul Sivakumar Date: Tue, 27 Jul 2021 22:16:28 +0530 Subject: [PATCH 14/30] ipneigh: add support to print brief output of neigh cache in tabular format Make use of the already available brief flag and print the basic details of the IPv4 or IPv6 neighbour cache in a tabular format for better readability when the brief output is expected. $ ip -br neigh 172.16.12.100 bridge0 b0:fc:36:2f:07:43 172.16.12.174 bridge0 8c:16:45:2f:bc:1c 172.16.12.250 bridge0 04:d9:f5:c1:0c:74 fe80::267b:9f70:745e:d54d bridge0 b0:fc:36:2f:07:43 fd16:a115:6a62:0:8744:efa1:9933:2c4c bridge0 8c:16:45:2f:bc:1c fe80::6d9:f5ff:fec1:c74 bridge0 04:d9:f5:c1:0c:74 And add "ip neigh show" to the list of ip sub commands mentioned in the man page that support the brief output in tabular format. Signed-off-by: Gokul Sivakumar Signed-off-by: David Ahern --- ip/ipneigh.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- man/man8/ip.8 | 2 +- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/ip/ipneigh.c b/ip/ipneigh.c index 2d6b7f58..95bde520 100644 --- a/ip/ipneigh.c +++ b/ip/ipneigh.c @@ -251,6 +251,51 @@ static void print_neigh_state(unsigned int nud) close_json_array(PRINT_JSON, NULL); } +static int print_neigh_brief(FILE *fp, struct ndmsg *r, struct rtattr *tb[]) +{ + if (tb[NDA_DST]) { + const char *dst; + int family = r->ndm_family; + + if (family == AF_BRIDGE) { + if (RTA_PAYLOAD(tb[NDA_DST]) == sizeof(struct in6_addr)) + family = AF_INET6; + else + family = AF_INET; + } + + dst = format_host_rta(family, tb[NDA_DST]); + print_color_string(PRINT_ANY, ifa_family_color(family), + "dst", "%-39s ", dst); + } + + if (!filter.index && r->ndm_ifindex) { + print_color_string(PRINT_ANY, COLOR_IFNAME, + "dev", "%-16s ", + ll_index_to_name(r->ndm_ifindex)); + } + + if (tb[NDA_LLADDR]) { + const char *lladdr; + + SPRINT_BUF(b1); + + lladdr = ll_addr_n2a(RTA_DATA(tb[NDA_LLADDR]), + RTA_PAYLOAD(tb[NDA_LLADDR]), + ll_index_to_type(r->ndm_ifindex), + b1, sizeof(b1)); + + print_color_string(PRINT_ANY, COLOR_MAC, + "lladdr", "%s", lladdr); + } + + print_string(PRINT_FP, NULL, "%s", "\n"); + close_json_object(); + fflush(fp); + + return 0; +} + int print_neigh(struct nlmsghdr *n, void *arg) { FILE *fp = (FILE *)arg; @@ -337,6 +382,9 @@ int print_neigh(struct nlmsghdr *n, void *arg) else if (n->nlmsg_type == RTM_GETNEIGH) print_null(PRINT_ANY, "miss", "%s ", "miss"); + if (brief) + return print_neigh_brief(fp, r, tb); + if (tb[NDA_DST]) { const char *dst; int family = r->ndm_family; @@ -412,7 +460,7 @@ int print_neigh(struct nlmsghdr *n, void *arg) print_string(PRINT_FP, NULL, "\n", ""); close_json_object(); - fflush(stdout); + fflush(fp); return 0; } diff --git a/man/man8/ip.8 b/man/man8/ip.8 index c9f7671e..3f572889 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -227,7 +227,7 @@ print human readable rates in IEC units (e.g. 1Ki = 1024). .BR "\-br" , " \-brief" Print only basic information in a tabular format for better readability. This option is currently only supported by -.BR "ip addr show " and " ip link show " commands. +.BR "ip addr show ", " ip link show " & " ip neigh show " commands. .TP .BR "\-j", " \-json" From 2d6fa30bb8cac66d121121af9b96f6df7047993b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 2 Aug 2021 10:25:09 -0600 Subject: [PATCH 15/30] Update kernel headers Update kernel headers to commit: 1187c8c4642d ("net: phy: mscc: make some arrays static const, makes object smaller") Signed-off-by: David Ahern --- include/uapi/linux/bpf.h | 85 +++++++++++++++++++++++++-- include/uapi/linux/if_arp.h | 1 + include/uapi/linux/if_bridge.h | 18 ++++++ include/uapi/linux/if_ether.h | 3 + include/uapi/linux/if_link.h | 10 ++++ include/uapi/linux/in.h | 21 +++++-- include/uapi/linux/in6.h | 1 + include/uapi/linux/lwtunnel.h | 1 + include/uapi/linux/pkt_cls.h | 1 + include/uapi/linux/tc_act/tc_skbmod.h | 1 + 10 files changed, 133 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4bdc0e5..dd547572 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -324,9 +324,6 @@ union bpf_iter_link_info { * **BPF_PROG_TYPE_SK_LOOKUP** * *data_in* and *data_out* must be NULL. * - * **BPF_PROG_TYPE_XDP** - * *ctx_in* and *ctx_out* must be NULL. - * * **BPF_PROG_TYPE_RAW_TRACEPOINT**, * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** * @@ -3249,7 +3246,7 @@ union bpf_attr { * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -4780,6 +4777,76 @@ union bpf_attr { * Execute close syscall for given FD. * Return * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing and kprobe programs). + * Return + * Address of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4951,6 +5018,11 @@ union bpf_attr { FN(sys_bpf), \ FN(btf_find_by_name_kind), \ FN(sys_close), \ + FN(timer_init), \ + FN(timer_set_callback), \ + FN(timer_start), \ + FN(timer_cancel), \ + FN(get_func_ip), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6077,6 +6149,11 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index dbfbc227..12d06bb6 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -54,6 +54,7 @@ #define ARPHRD_X25 271 /* CCITT X.25 */ #define ARPHRD_HWX25 272 /* Boards with X.25 in firmware */ #define ARPHRD_CAN 280 /* Controller Area Network */ +#define ARPHRD_MCTP 290 #define ARPHRD_PPP 512 #define ARPHRD_CISCO 513 /* Cisco HDLC */ #define ARPHRD_HDLC ARPHRD_CISCO diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2298a43f..907745f4 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -479,16 +479,22 @@ enum { /* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */ #define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */ +#define BRIDGE_VLANDB_DUMPF_GLOBAL (1 << 1) /* Dump global vlan options only */ /* Bridge vlan RTM attributes * [BRIDGE_VLANDB_ENTRY] = { * [BRIDGE_VLANDB_ENTRY_INFO] * ... * } + * [BRIDGE_VLANDB_GLOBAL_OPTIONS] = { + * [BRIDGE_VLANDB_GOPTS_ID] + * ... + * } */ enum { BRIDGE_VLANDB_UNSPEC, BRIDGE_VLANDB_ENTRY, + BRIDGE_VLANDB_GLOBAL_OPTIONS, __BRIDGE_VLANDB_MAX, }; #define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1) @@ -538,6 +544,15 @@ enum { }; #define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1) +enum { + BRIDGE_VLANDB_GOPTS_UNSPEC, + BRIDGE_VLANDB_GOPTS_ID, + BRIDGE_VLANDB_GOPTS_RANGE, + BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + __BRIDGE_VLANDB_GOPTS_MAX +}; +#define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { @@ -629,6 +644,7 @@ enum { MDBA_ROUTER_PATTR_TYPE, MDBA_ROUTER_PATTR_INET_TIMER, MDBA_ROUTER_PATTR_INET6_TIMER, + MDBA_ROUTER_PATTR_VID, __MDBA_ROUTER_PATTR_MAX }; #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) @@ -720,12 +736,14 @@ struct br_mcast_stats { /* bridge boolean options * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets + * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping * * IMPORTANT: if adding a new option do not forget to handle * it in br_boolopt_toggle/get and bridge sysfs */ enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, + BR_BOOLOPT_MCAST_VLAN_SNOOPING, BR_BOOLOPT_MAX }; diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 8e6f2c3f..84e01c08 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -151,6 +151,9 @@ #define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and * aggregation protocol */ +#define ETH_P_MCTP 0x00FA /* Management component transport + * protocol packets + */ /* * This is an Ethernet frame header. diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5195ed93..62512efc 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1258,4 +1258,14 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + #endif /* _LINUX_IF_LINK_H */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 5b15419d..8e2de9fc 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -188,11 +188,22 @@ struct ip_mreq_source { }; struct ip_msfilter { - __be32 imsf_multiaddr; - __be32 imsf_interface; - __u32 imsf_fmode; - __u32 imsf_numsrc; - __be32 imsf_slist[1]; + union { + struct { + __be32 imsf_multiaddr_aux; + __be32 imsf_interface_aux; + __u32 imsf_fmode_aux; + __u32 imsf_numsrc_aux; + __be32 imsf_slist[1]; + }; + struct { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; + __be32 imsf_slist_flex[]; + }; + }; }; #define IP_MSFILTER_SIZE(numsrc) \ diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 7e3a58e6..a17363e0 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,6 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ +#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index b7c0191f..78f0ecd1 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -14,6 +14,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_BPF, LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, + LWTUNNEL_ENCAP_IOAM6, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 025c40fe..6836ccb9 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -22,6 +22,7 @@ enum { __TCA_ACT_MAX }; +/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */ #define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for * actions stats. */ diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h index c525b350..af6ef2cf 100644 --- a/include/uapi/linux/tc_act/tc_skbmod.h +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -17,6 +17,7 @@ #define SKBMOD_F_SMAC 0x2 #define SKBMOD_F_ETYPE 0x4 #define SKBMOD_F_SWAPMAC 0x8 +#define SKBMOD_F_ECN 0x10 struct tc_skbmod { tc_gen; From acbdef93869e0865243bb4746309527bbc5f25b7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 2 Aug 2021 11:32:26 -0600 Subject: [PATCH 16/30] Import ioam6 uapi headers Import ioam6 uapi headers from kernel headers at last sync commit. Signed-off-by: David Ahern --- include/uapi/linux/ioam6.h | 133 ++++++++++++++++++++++++++++ include/uapi/linux/ioam6_genl.h | 52 +++++++++++ include/uapi/linux/ioam6_iptunnel.h | 20 +++++ 3 files changed, 205 insertions(+) create mode 100644 include/uapi/linux/ioam6.h create mode 100644 include/uapi/linux/ioam6_genl.h create mode 100644 include/uapi/linux/ioam6_iptunnel.h diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h new file mode 100644 index 00000000..d1653a31 --- /dev/null +++ b/include/uapi/linux/ioam6.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman + */ + +#ifndef _LINUX_IOAM6_H +#define _LINUX_IOAM6_H + +#include +#include + +#define IOAM6_U16_UNAVAILABLE U16_MAX +#define IOAM6_U32_UNAVAILABLE U32_MAX +#define IOAM6_U64_UNAVAILABLE U64_MAX + +#define IOAM6_DEFAULT_ID (IOAM6_U32_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_ID_WIDE (IOAM6_U64_UNAVAILABLE >> 8) +#define IOAM6_DEFAULT_IF_ID IOAM6_U16_UNAVAILABLE +#define IOAM6_DEFAULT_IF_ID_WIDE IOAM6_U32_UNAVAILABLE + +/* + * IPv6 IOAM Option Header + */ +struct ioam6_hdr { + __u8 opt_type; + __u8 opt_len; + __u8 :8; /* reserved */ +#define IOAM6_TYPE_PREALLOC 0 + __u8 type; +} __attribute__((packed)); + +/* + * IOAM Trace Header + */ +struct ioam6_trace_hdr { + __be16 namespace_id; + +#if defined(__LITTLE_ENDIAN_BITFIELD) + + __u8 :1, /* unused */ + :1, /* unused */ + overflow:1, + nodelen:5; + + __u8 remlen:7, + :1; /* unused */ + + union { + __be32 type_be32; + + struct { + __u32 bit7:1, + bit6:1, + bit5:1, + bit4:1, + bit3:1, + bit2:1, + bit1:1, + bit0:1, + bit15:1, /* unused */ + bit14:1, /* unused */ + bit13:1, /* unused */ + bit12:1, /* unused */ + bit11:1, + bit10:1, + bit9:1, + bit8:1, + bit23:1, /* reserved */ + bit22:1, + bit21:1, /* unused */ + bit20:1, /* unused */ + bit19:1, /* unused */ + bit18:1, /* unused */ + bit17:1, /* unused */ + bit16:1, /* unused */ + :8; /* reserved */ + } type; + }; + +#elif defined(__BIG_ENDIAN_BITFIELD) + + __u8 nodelen:5, + overflow:1, + :1, /* unused */ + :1; /* unused */ + + __u8 :1, /* unused */ + remlen:7; + + union { + __be32 type_be32; + + struct { + __u32 bit0:1, + bit1:1, + bit2:1, + bit3:1, + bit4:1, + bit5:1, + bit6:1, + bit7:1, + bit8:1, + bit9:1, + bit10:1, + bit11:1, + bit12:1, /* unused */ + bit13:1, /* unused */ + bit14:1, /* unused */ + bit15:1, /* unused */ + bit16:1, /* unused */ + bit17:1, /* unused */ + bit18:1, /* unused */ + bit19:1, /* unused */ + bit20:1, /* unused */ + bit21:1, /* unused */ + bit22:1, + bit23:1, /* reserved */ + :8; /* reserved */ + } type; + }; + +#else +#error "Please fix " +#endif + +#define IOAM6_TRACE_DATA_SIZE_MAX 244 + __u8 data[0]; +} __attribute__((packed)); + +#endif /* _LINUX_IOAM6_H */ diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h new file mode 100644 index 00000000..6043d9f6 --- /dev/null +++ b/include/uapi/linux/ioam6_genl.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Generic Netlink API + * + * Author: + * Justin Iurman + */ + +#ifndef _LINUX_IOAM6_GENL_H +#define _LINUX_IOAM6_GENL_H + +#define IOAM6_GENL_NAME "IOAM6" +#define IOAM6_GENL_VERSION 0x1 + +enum { + IOAM6_ATTR_UNSPEC, + + IOAM6_ATTR_NS_ID, /* u16 */ + IOAM6_ATTR_NS_DATA, /* u32 */ + IOAM6_ATTR_NS_DATA_WIDE,/* u64 */ + +#define IOAM6_MAX_SCHEMA_DATA_LEN (255 * 4) + IOAM6_ATTR_SC_ID, /* u32 */ + IOAM6_ATTR_SC_DATA, /* Binary */ + IOAM6_ATTR_SC_NONE, /* Flag */ + + IOAM6_ATTR_PAD, + + __IOAM6_ATTR_MAX, +}; + +#define IOAM6_ATTR_MAX (__IOAM6_ATTR_MAX - 1) + +enum { + IOAM6_CMD_UNSPEC, + + IOAM6_CMD_ADD_NAMESPACE, + IOAM6_CMD_DEL_NAMESPACE, + IOAM6_CMD_DUMP_NAMESPACES, + + IOAM6_CMD_ADD_SCHEMA, + IOAM6_CMD_DEL_SCHEMA, + IOAM6_CMD_DUMP_SCHEMAS, + + IOAM6_CMD_NS_SET_SCHEMA, + + __IOAM6_CMD_MAX, +}; + +#define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1) + +#endif /* _LINUX_IOAM6_GENL_H */ diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h new file mode 100644 index 00000000..fdf52e66 --- /dev/null +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * IPv6 IOAM Lightweight Tunnel API + * + * Author: + * Justin Iurman + */ + +#ifndef _LINUX_IOAM6_IPTUNNEL_H +#define _LINUX_IOAM6_IPTUNNEL_H + +enum { + IOAM6_IPTUNNEL_UNSPEC, + IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + __IOAM6_IPTUNNEL_MAX, +}; + +#define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1) + +#endif /* _LINUX_IOAM6_IPTUNNEL_H */ From f0b3808afa756a71bf9dd2e7597da16b17892471 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Sun, 1 Aug 2021 14:45:50 +0200 Subject: [PATCH 17/30] Add, show, link, remove IOAM namespaces and schemas This patch provides support for adding, listing and removing IOAM namespaces and schemas with iproute2. When adding an IOAM namespace, both "data" (=u32) and "wide" (=u64) are optional. Therefore, you can either have none, one of them, or both at the same time. When adding an IOAM schema, there is no restriction on "DATA" except its size (see IOAM6_MAX_SCHEMA_DATA_LEN). By default, an IOAM namespace has no active IOAM schema (meaning an IOAM namespace is not linked to an IOAM schema), and an IOAM schema is not considered as "active" (meaning an IOAM schema is not linked to an IOAM namespace). It is possible to link an IOAM namespace with an IOAM schema, thanks to the last command below (meaning the IOAM schema will be considered as "active" for the specific IOAM namespace). $ ip ioam Usage: ip ioam { COMMAND | help } ip ioam namespace show ip ioam namespace add ID [ data DATA32 ] [ wide DATA64 ] ip ioam namespace del ID ip ioam schema show ip ioam schema add ID DATA ip ioam schema del ID ip ioam namespace set ID schema { ID | none } Signed-off-by: Justin Iurman Signed-off-by: David Ahern --- ip/Makefile | 2 +- ip/ip.c | 3 +- ip/ip_common.h | 1 + ip/ipioam6.c | 337 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 341 insertions(+), 2 deletions(-) create mode 100644 ip/ipioam6.c diff --git a/ip/Makefile b/ip/Makefile index b03af29b..2ae9df89 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -11,7 +11,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \ iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \ iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o ipila.o \ ipvrf.o iplink_xstats.o ipseg6.o iplink_netdevsim.o iplink_rmnet.o \ - ipnexthop.o ipmptcp.o iplink_bareudp.o iplink_wwan.o + ipnexthop.o ipmptcp.o iplink_bareudp.o iplink_wwan.o ipioam6.o RTMONOBJ=rtmon.o diff --git a/ip/ip.c b/ip/ip.c index 8e4c6eb5..e7ffeaff 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -64,7 +64,7 @@ static void usage(void) fprintf(stderr, "Usage: ip [ OPTIONS ] OBJECT { COMMAND | help }\n" " ip [ -force ] -batch filename\n" - "where OBJECT := { address | addrlabel | fou | help | ila | l2tp | link |\n" + "where OBJECT := { address | addrlabel | fou | help | ila | ioam | l2tp | link |\n" " macsec | maddress | monitor | mptcp | mroute | mrule |\n" " neighbor | neighbour | netconf | netns | nexthop | ntable |\n" " ntbl | route | rule | sr | tap | tcpmetrics |\n" @@ -121,6 +121,7 @@ static const struct cmd { { "sr", do_seg6 }, { "nexthop", do_ipnh }, { "mptcp", do_mptcp }, + { "ioam", do_ioam6 }, { "help", do_help }, { 0 } }; diff --git a/ip/ip_common.h b/ip/ip_common.h index b5b2b082..ad018183 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -90,6 +90,7 @@ int netns_identify_pid(const char *pidstr, char *name, int len); int do_seg6(int argc, char **argv); int do_ipnh(int argc, char **argv); int do_mptcp(int argc, char **argv); +int do_ioam6(int argc, char **argv); int iplink_get(char *name, __u32 filt_mask); int iplink_ifla_xstats(int argc, char **argv); diff --git a/ip/ipioam6.c b/ip/ipioam6.c new file mode 100644 index 00000000..253d0b66 --- /dev/null +++ b/ip/ipioam6.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ioam6.c "ip ioam" + * + * Author: Justin Iurman + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "utils.h" +#include "ip_common.h" +#include "libgenl.h" +#include "json_print.h" + +static void usage(void) +{ + fprintf(stderr, + "Usage: ip ioam { COMMAND | help }\n" + " ip ioam namespace show\n" + " ip ioam namespace add ID [ data DATA32 ] [ wide DATA64 ]\n" + " ip ioam namespace del ID\n" + " ip ioam schema show\n" + " ip ioam schema add ID DATA\n" + " ip ioam schema del ID\n" + " ip ioam namespace set ID schema { ID | none }\n"); + exit(-1); +} + +static struct rtnl_handle grth = { .fd = -1 }; +static int genl_family = -1; + +#define IOAM6_REQUEST(_req, _bufsiz, _cmd, _flags) \ + GENL_REQUEST(_req, _bufsiz, genl_family, 0, \ + IOAM6_GENL_VERSION, _cmd, _flags) + +static struct { + unsigned int cmd; + __u32 sc_id; + __u32 ns_data; + __u64 ns_data_wide; + __u16 ns_id; + bool has_ns_data; + bool has_ns_data_wide; + bool sc_none; + __u8 sc_data[IOAM6_MAX_SCHEMA_DATA_LEN]; +} opts; + +static void print_namespace(struct rtattr *attrs[]) +{ + print_uint(PRINT_ANY, "namespace", "namespace %u", + rta_getattr_u16(attrs[IOAM6_ATTR_NS_ID])); + + if (attrs[IOAM6_ATTR_SC_ID]) + print_uint(PRINT_ANY, "schema", " [schema %u]", + rta_getattr_u32(attrs[IOAM6_ATTR_SC_ID])); + + if (!attrs[IOAM6_ATTR_NS_DATA]) + print_null(PRINT_ANY, "data", "", NULL); + else + print_hex(PRINT_ANY, "data", ", data %#010x", + rta_getattr_u32(attrs[IOAM6_ATTR_NS_DATA])); + + if (!attrs[IOAM6_ATTR_NS_DATA_WIDE]) + print_null(PRINT_ANY, "wide", "", NULL); + else + print_0xhex(PRINT_ANY, "wide", ", wide %#018lx", + rta_getattr_u64(attrs[IOAM6_ATTR_NS_DATA_WIDE])); + + print_null(PRINT_ANY, "", "\n", NULL); +} + +static void print_schema(struct rtattr *attrs[]) +{ + __u8 data[IOAM6_MAX_SCHEMA_DATA_LEN]; + int len, i = 0; + + print_uint(PRINT_ANY, "schema", "schema %u", + rta_getattr_u32(attrs[IOAM6_ATTR_SC_ID])); + + if (attrs[IOAM6_ATTR_NS_ID]) + print_uint(PRINT_ANY, "namespace", " [namespace %u]", + rta_getattr_u16(attrs[IOAM6_ATTR_NS_ID])); + + len = RTA_PAYLOAD(attrs[IOAM6_ATTR_SC_DATA]); + memcpy(data, RTA_DATA(attrs[IOAM6_ATTR_SC_DATA]), len); + + print_null(PRINT_ANY, "data", ", data:", NULL); + while (i < len) { + print_hhu(PRINT_ANY, "", " %02x", data[i]); + i++; + } + print_null(PRINT_ANY, "", "\n", NULL); +} + +static int process_msg(struct nlmsghdr *n, void *arg) +{ + struct rtattr *attrs[IOAM6_ATTR_MAX + 1]; + struct genlmsghdr *ghdr; + int len = n->nlmsg_len; + + if (n->nlmsg_type != genl_family) + return -1; + + len -= NLMSG_LENGTH(GENL_HDRLEN); + if (len < 0) + return -1; + + ghdr = NLMSG_DATA(n); + parse_rtattr(attrs, IOAM6_ATTR_MAX, (void *)ghdr + GENL_HDRLEN, len); + + open_json_object(NULL); + switch (ghdr->cmd) { + case IOAM6_CMD_DUMP_NAMESPACES: + print_namespace(attrs); + break; + case IOAM6_CMD_DUMP_SCHEMAS: + print_schema(attrs); + break; + } + close_json_object(); + + return 0; +} + +static int ioam6_do_cmd(void) +{ + IOAM6_REQUEST(req, 1056, opts.cmd, NLM_F_REQUEST); + int dump = 0; + + if (genl_init_handle(&grth, IOAM6_GENL_NAME, &genl_family)) + exit(1); + + req.n.nlmsg_type = genl_family; + + switch (opts.cmd) { + case IOAM6_CMD_ADD_NAMESPACE: + addattr16(&req.n, sizeof(req), IOAM6_ATTR_NS_ID, opts.ns_id); + if (opts.has_ns_data) + addattr32(&req.n, sizeof(req), IOAM6_ATTR_NS_DATA, + opts.ns_data); + if (opts.has_ns_data_wide) + addattr64(&req.n, sizeof(req), IOAM6_ATTR_NS_DATA_WIDE, + opts.ns_data_wide); + break; + case IOAM6_CMD_DEL_NAMESPACE: + addattr16(&req.n, sizeof(req), IOAM6_ATTR_NS_ID, opts.ns_id); + break; + case IOAM6_CMD_DUMP_NAMESPACES: + case IOAM6_CMD_DUMP_SCHEMAS: + dump = 1; + break; + case IOAM6_CMD_ADD_SCHEMA: + addattr32(&req.n, sizeof(req), IOAM6_ATTR_SC_ID, opts.sc_id); + addattr_l(&req.n, sizeof(req), IOAM6_ATTR_SC_DATA, opts.sc_data, + strlen((const char *)opts.sc_data)); + break; + case IOAM6_CMD_DEL_SCHEMA: + addattr32(&req.n, sizeof(req), IOAM6_ATTR_SC_ID, opts.sc_id); + break; + case IOAM6_CMD_NS_SET_SCHEMA: + addattr16(&req.n, sizeof(req), IOAM6_ATTR_NS_ID, opts.ns_id); + if (opts.sc_none) + addattr(&req.n, sizeof(req), IOAM6_ATTR_SC_NONE); + else + addattr32(&req.n, sizeof(req), IOAM6_ATTR_SC_ID, + opts.sc_id); + break; + } + + if (!dump) { + if (rtnl_talk(&grth, &req.n, NULL) < 0) + return -1; + } else { + req.n.nlmsg_flags |= NLM_F_DUMP; + req.n.nlmsg_seq = grth.dump = ++grth.seq; + if (rtnl_send(&grth, &req, req.n.nlmsg_len) < 0) { + perror("Failed to send dump request"); + exit(1); + } + + new_json_obj(json); + if (rtnl_dump_filter(&grth, process_msg, stdout) < 0) { + fprintf(stderr, "Dump terminated\n"); + exit(1); + } + delete_json_obj(); + fflush(stdout); + } + + return 0; +} + +int do_ioam6(int argc, char **argv) +{ + bool maybe_wide = false; + + if (argc < 1 || strcmp(*argv, "help") == 0) + usage(); + + memset(&opts, 0, sizeof(opts)); + + if (strcmp(*argv, "namespace") == 0) { + NEXT_ARG(); + + if (strcmp(*argv, "show") == 0) { + opts.cmd = IOAM6_CMD_DUMP_NAMESPACES; + + } else if (strcmp(*argv, "add") == 0) { + NEXT_ARG(); + + if (get_u16(&opts.ns_id, *argv, 0)) + invarg("Invalid namespace ID", *argv); + + if (NEXT_ARG_OK()) { + NEXT_ARG_FWD(); + + if (strcmp(*argv, "data") == 0) { + NEXT_ARG(); + + if (get_u32(&opts.ns_data, *argv, 0)) + invarg("Invalid data", *argv); + + maybe_wide = true; + opts.has_ns_data = true; + + } else if (strcmp(*argv, "wide") == 0) { + NEXT_ARG(); + + if (get_u64(&opts.ns_data_wide, *argv, 16)) + invarg("Invalid wide data", *argv); + + opts.has_ns_data_wide = true; + + } else { + invarg("Invalid argument", *argv); + } + } + + if (NEXT_ARG_OK()) { + NEXT_ARG_FWD(); + + if (!maybe_wide || strcmp(*argv, "wide") != 0) + invarg("Unexpected argument", *argv); + + NEXT_ARG(); + + if (get_u64(&opts.ns_data_wide, *argv, 16)) + invarg("Invalid wide data", *argv); + + opts.has_ns_data_wide = true; + } + + opts.cmd = IOAM6_CMD_ADD_NAMESPACE; + + } else if (strcmp(*argv, "del") == 0) { + NEXT_ARG(); + + if (get_u16(&opts.ns_id, *argv, 0)) + invarg("Invalid namespace ID", *argv); + + opts.cmd = IOAM6_CMD_DEL_NAMESPACE; + + } else if (strcmp(*argv, "set") == 0) { + NEXT_ARG(); + + if (get_u16(&opts.ns_id, *argv, 0)) + invarg("Invalid namespace ID", *argv); + + NEXT_ARG(); + + if (strcmp(*argv, "schema") != 0) + invarg("Unknown", *argv); + + NEXT_ARG(); + + if (strcmp(*argv, "none") == 0) { + opts.sc_none = true; + + } else { + if (get_u32(&opts.sc_id, *argv, 0)) + invarg("Invalid schema ID", *argv); + + opts.sc_none = false; + } + + opts.cmd = IOAM6_CMD_NS_SET_SCHEMA; + + } else { + invarg("Unknown", *argv); + } + + } else if (strcmp(*argv, "schema") == 0) { + NEXT_ARG(); + + if (strcmp(*argv, "show") == 0) { + opts.cmd = IOAM6_CMD_DUMP_SCHEMAS; + + } else if (strcmp(*argv, "add") == 0) { + NEXT_ARG(); + + if (get_u32(&opts.sc_id, *argv, 0)) + invarg("Invalid schema ID", *argv); + + NEXT_ARG(); + + if (strlen(*argv) > IOAM6_MAX_SCHEMA_DATA_LEN) + invarg("Schema DATA too big", *argv); + + memcpy(opts.sc_data, *argv, strlen(*argv)); + opts.cmd = IOAM6_CMD_ADD_SCHEMA; + + } else if (strcmp(*argv, "del") == 0) { + NEXT_ARG(); + + if (get_u32(&opts.sc_id, *argv, 0)) + invarg("Invalid schema ID", *argv); + + opts.cmd = IOAM6_CMD_DEL_SCHEMA; + + } else { + invarg("Unknown", *argv); + } + + } else { + invarg("Unknown", *argv); + } + + return ioam6_do_cmd(); +} From 2d83c71082460ed3490db32f200959bb76df5bd2 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Sun, 1 Aug 2021 14:45:51 +0200 Subject: [PATCH 18/30] New IOAM6 encap type for routes This patch provides a new encap type for routes to insert an IOAM pre-allocated trace: $ ip -6 ro ad fc00::1/128 encap ioam6 trace prealloc type 0x800000 ns 1 size 12 dev eth0 where: - "trace" and "prealloc" may appear as useless but just anticipate for future implementations of other ioam option types. - "type" is a bitfield (=u32) defining the IOAM pre-allocated trace type (see the corresponding uapi). - "ns" is an IOAM namespace ID attached to the pre-allocated trace. - "size" is the trace pre-allocated size in bytes; must be a 4-octet multiple; limited size (see IOAM6_TRACE_DATA_SIZE_MAX). Signed-off-by: Justin Iurman Signed-off-by: David Ahern --- ip/iproute.c | 5 +- ip/iproute_lwtunnel.c | 127 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 2 deletions(-) diff --git a/ip/iproute.c b/ip/iproute.c index 1ccf51a5..1e5e2002 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -101,8 +101,8 @@ static void usage(void) "TIME := NUMBER[s|ms]\n" "BOOL := [1|0]\n" "FEATURES := ecn\n" - "ENCAPTYPE := [ mpls | ip | ip6 | seg6 | seg6local | rpl ]\n" - "ENCAPHDR := [ MPLSLABEL | SEG6HDR | SEG6LOCAL ]\n" + "ENCAPTYPE := [ mpls | ip | ip6 | seg6 | seg6local | rpl | ioam6 ]\n" + "ENCAPHDR := [ MPLSLABEL | SEG6HDR | SEG6LOCAL | IOAM6HDR ]\n" "SEG6HDR := [ mode SEGMODE ] segs ADDR1,ADDRi,ADDRn [hmac HMACKEYID] [cleanup]\n" "SEGMODE := [ encap | inline ]\n" "SEG6LOCAL := action ACTION [ OPTIONS ] [ count ]\n" @@ -112,6 +112,7 @@ static void usage(void) "OPTIONS := OPTION [ OPTIONS ]\n" "OPTION := { srh SEG6HDR | nh4 ADDR | nh6 ADDR | iif DEV | oif DEV |\n" " table TABLEID | vrftable TABLEID | endpoint PROGNAME }\n" + "IOAM6HDR := trace prealloc type IOAM6_TRACE_TYPE ns IOAM6_NAMESPACE size IOAM6_TRACE_SIZE\n" "ROUTE_GET_FLAGS := [ fibmatch ]\n"); exit(-1); } diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c index c4bae68d..218d5086 100644 --- a/ip/iproute_lwtunnel.c +++ b/ip/iproute_lwtunnel.c @@ -34,6 +34,8 @@ #include #include #include +#include +#include static const char *format_encap_type(int type) { @@ -54,6 +56,8 @@ static const char *format_encap_type(int type) return "seg6local"; case LWTUNNEL_ENCAP_RPL: return "rpl"; + case LWTUNNEL_ENCAP_IOAM6: + return "ioam6"; default: return "unknown"; } @@ -90,6 +94,8 @@ static int read_encap_type(const char *name) return LWTUNNEL_ENCAP_SEG6_LOCAL; else if (strcmp(name, "rpl") == 0) return LWTUNNEL_ENCAP_RPL; + else if (strcmp(name, "ioam6") == 0) + return LWTUNNEL_ENCAP_IOAM6; else if (strcmp(name, "help") == 0) encap_type_usage(); @@ -204,6 +210,25 @@ static void print_encap_rpl(FILE *fp, struct rtattr *encap) print_rpl_srh(fp, srh); } +static void print_encap_ioam6(FILE *fp, struct rtattr *encap) +{ + struct rtattr *tb[IOAM6_IPTUNNEL_MAX + 1]; + struct ioam6_trace_hdr *trace; + + parse_rtattr_nested(tb, IOAM6_IPTUNNEL_MAX, encap); + + if (!tb[IOAM6_IPTUNNEL_TRACE]) + return; + + trace = RTA_DATA(tb[IOAM6_IPTUNNEL_TRACE]); + + print_null(PRINT_ANY, "trace", "trace ", NULL); + print_null(PRINT_ANY, "prealloc", "prealloc ", NULL); + print_hex(PRINT_ANY, "type", "type %#08x ", ntohl(trace->type_be32) >> 8); + print_uint(PRINT_ANY, "ns", "ns %u ", ntohs(trace->namespace_id)); + print_uint(PRINT_ANY, "size", "size %u ", trace->remlen * 4); +} + static const char *seg6_action_names[SEG6_LOCAL_ACTION_MAX + 1] = { [SEG6_LOCAL_ACTION_END] = "End", [SEG6_LOCAL_ACTION_END_X] = "End.X", @@ -657,6 +682,9 @@ void lwt_print_encap(FILE *fp, struct rtattr *encap_type, case LWTUNNEL_ENCAP_RPL: print_encap_rpl(fp, encap); break; + case LWTUNNEL_ENCAP_IOAM6: + print_encap_ioam6(fp, encap); + break; } } @@ -853,6 +881,102 @@ out: return ret; } +static int parse_encap_ioam6(struct rtattr *rta, size_t len, int *argcp, + char ***argvp) +{ + struct ioam6_trace_hdr *trace; + char **argv = *argvp; + int argc = *argcp; + int ns_found = 0; + __u16 size = 0; + __u32 type = 0; + __u16 ns; + + trace = calloc(1, sizeof(*trace)); + if (!trace) + return -1; + + if (strcmp(*argv, "trace")) + missarg("trace"); + + NEXT_ARG(); + if (strcmp(*argv, "prealloc")) + missarg("prealloc"); + + while (NEXT_ARG_OK()) { + NEXT_ARG_FWD(); + + if (strcmp(*argv, "type") == 0) { + NEXT_ARG(); + + if (type) + duparg2("type", *argv); + + if (get_u32(&type, *argv, 0) || !type) + invarg("Invalid type", *argv); + + trace->type_be32 = htonl(type << 8); + + } else if (strcmp(*argv, "ns") == 0) { + NEXT_ARG(); + + if (ns_found++) + duparg2("ns", *argv); + + if (!type) + missarg("type"); + + if (get_u16(&ns, *argv, 0)) + invarg("Invalid namespace ID", *argv); + + trace->namespace_id = htons(ns); + + } else if (strcmp(*argv, "size") == 0) { + NEXT_ARG(); + + if (size) + duparg2("size", *argv); + + if (!type) + missarg("type"); + if (!ns_found) + missarg("ns"); + + if (get_u16(&size, *argv, 0) || !size) + invarg("Invalid size", *argv); + + if (size % 4) + invarg("Size must be a 4-octet multiple", *argv); + if (size > IOAM6_TRACE_DATA_SIZE_MAX) + invarg("Size too big", *argv); + + trace->remlen = (__u8)(size / 4); + + } else { + break; + } + } + + if (!type) + missarg("type"); + if (!ns_found) + missarg("ns"); + if (!size) + missarg("size"); + + if (rta_addattr_l(rta, len, IOAM6_IPTUNNEL_TRACE, trace, + sizeof(*trace))) { + free(trace); + return -1; + } + + *argcp = argc + 1; + *argvp = argv - 1; + + free(trace); + return 0; +} + struct lwt_x { struct rtattr *rta; size_t len; @@ -1744,6 +1868,9 @@ int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp, case LWTUNNEL_ENCAP_RPL: ret = parse_encap_rpl(rta, len, &argc, &argv); break; + case LWTUNNEL_ENCAP_IOAM6: + ret = parse_encap_ioam6(rta, len, &argc, &argv); + break; default: fprintf(stderr, "Error: unsupported encap type\n"); break; From 86c596ed91ec4e4cf4fa99c6555464c8b79786ac Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Sun, 1 Aug 2021 14:45:52 +0200 Subject: [PATCH 19/30] IOAM man8 This patch provides man8 documentation for IOAM inside ip, ip-ioam and ip-route. Signed-off-by: Justin Iurman Signed-off-by: David Ahern --- man/man8/ip-ioam.8 | 72 ++++++++++++++++++++++++++++++++++++++++++ man/man8/ip-route.8.in | 36 ++++++++++++++++++++- man/man8/ip.8 | 7 +++- 3 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 man/man8/ip-ioam.8 diff --git a/man/man8/ip-ioam.8 b/man/man8/ip-ioam.8 new file mode 100644 index 00000000..1bdc0ece --- /dev/null +++ b/man/man8/ip-ioam.8 @@ -0,0 +1,72 @@ +.TH IP\-IOAM 8 "05 Jul 2021" "iproute2" "Linux" +.SH "NAME" +ip-ioam \- IPv6 In-situ OAM (IOAM) +.SH SYNOPSIS +.sp +.ad l +.in +8 +.ti -8 +.B ip ioam +.RI " { " COMMAND " | " +.BR help " }" +.sp +.ti -8 + +.ti -8 +.B ip ioam namespace show + +.ti -8 +.B ip ioam namespace add +.I ID +.BR " [ " +.B data +.I DATA32 +.BR "]" +.BR " [ " +.B wide +.I DATA64 +.BR "]" + +.ti -8 +.B ip ioam namespace del +.I ID + +.ti -8 +.B ip ioam schema show + +.ti -8 +.B ip ioam schema add +.I ID DATA + +.ti -8 +.B ip ioam schema del +.I ID + +.ti -8 +.B ip ioam namespace set +.I ID +.B schema +.RI " { " ID " | " +.BR none " }" + +.SH DESCRIPTION +The \fBip ioam\fR command is used to configure IPv6 In-situ OAM (IOAM6) +internal parameters, namely IOAM namespaces and schemas. +.PP +Those parameters also include the mapping between an IOAM namespace and an IOAM +schema. + +.SH EXAMPLES +.PP +.SS Configure an IOAM namespace (ID = 1) with both data (32 bits) and wide data (64 bits) +.nf +# ip ioam namespace add 1 data 0xdeadbeef wide 0xcafec0caf00dc0de +.PP +.SS Link an existing IOAM schema (ID = 7) to an existing IOAM namespace (ID = 1) +.nf +# ip ioam namespace set 1 schema 7 +.SH SEE ALSO +.br +.BR ip-route (8) +.SH AUTHOR +Justin Iurman diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index 4b1947ab..c9a9cbf1 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -190,7 +190,7 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .ti -8 .IR ENCAP " := [ " .IR ENCAP_MPLS " | " ENCAP_IP " | " ENCAP_BPF " | " -.IR ENCAP_SEG6 " | " ENCAP_SEG6LOCAL " ] " +.IR ENCAP_SEG6 " | " ENCAP_SEG6LOCAL " | " ENCAP_IOAM6 " ] " .ti -8 .IR ENCAP_MPLS " := " @@ -243,6 +243,18 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .IR SEG6_ACTION_PARAM " ] [ " .BR count " ] " +.ti -8 +.IR ENCAP_IOAM6 " := " +.B ioam6 +.BR trace +.BR prealloc +.BR type +.IR IOAM6_TRACE_TYPE +.BR ns +.IR IOAM6_NAMESPACE +.BR size +.IR IOAM6_TRACE_SIZE + .ti -8 .IR ROUTE_GET_FLAGS " := " .BR " [ " @@ -717,6 +729,9 @@ is a string specifying the supported encapsulation type. Namely: .sp .BI seg6local - local SRv6 segment processing +.sp +.BI ioam6 +- encapsulation type IPv6 IOAM .in -8 .I ENCAPHDR @@ -896,6 +911,20 @@ Additionally, encapsulate the matching packet within an outer IPv6 header followed by the specified SRH. The destination address of the outer IPv6 header is set to the first segment of the new SRH. The source address is set as described in \fBip-sr\fR(8). +.in -2 + +.B ioam6 +.in +2 +.I IOAM6_TRACE_TYPE +- List of IOAM data required in the trace, represented by a bitfield (24 bits). +.sp + +.I IOAM6_NAMESPACE +- Numerical value to represent an IOAM namespace. See \fBip-ioam\fR(8). +.sp + +.I IOAM6_TRACE_SIZE +- Size, in octets, of the pre-allocated trace data block. .in -4 .in -8 @@ -1220,6 +1249,11 @@ ip -6 route add 2001:db8:1::/64 encap seg6local action End.DT46 vrftable 100 dev Adds an IPv6 route with SRv6 decapsulation and forward with lookup in VRF table. .RE .PP +ip -6 route add 2001:db8:1::/64 encap ioam6 trace prealloc type 0x800000 ns 1 size 12 dev eth0 +.RS 4 +Adds an IPv6 route with an IOAM Pre-allocated Trace encapsulation that only includes the hop limit and the node id, configured for the IOAM namespace 1 and a pre-allocated data block of 12 octets. +.RE +.PP ip route add 10.1.1.0/30 nhid 10 .RS 4 Adds an ipv4 route using nexthop object with id 10. diff --git a/man/man8/ip.8 b/man/man8/ip.8 index 3f572889..c3598a02 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -22,7 +22,7 @@ ip \- show / manipulate routing, network devices, interfaces and tunnels .BR link " | " address " | " addrlabel " | " route " | " rule " | " neigh " | "\ ntable " | " tunnel " | " tuntap " | " maddress " | " mroute " | " mrule " | "\ monitor " | " xfrm " | " netns " | " l2tp " | " tcp_metrics " | " token " | "\ - macsec " | " vrf " | " mptcp " }" + macsec " | " vrf " | " mptcp " | " ioam " }" .sp .ti -8 @@ -252,6 +252,10 @@ readability. .B addrlabel - label configuration for protocol address selection. +.TP +.B ioam +- manage IOAM namespaces and IOAM schemas. + .TP .B l2tp - tunnel ethernet over IP (L2TPv3). @@ -405,6 +409,7 @@ was written by Alexey N. Kuznetsov and added in Linux 2.2. .SH SEE ALSO .BR ip-address (8), .BR ip-addrlabel (8), +.BR ip-ioam (8), .BR ip-l2tp (8), .BR ip-link (8), .BR ip-maddress (8), From 7e7270bb1f18dadbfcf6d4799aee9a85fecd95d9 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Wed, 4 Aug 2021 11:15:16 -0700 Subject: [PATCH 20/30] tc/skbmod: Introduce SKBMOD_F_ECN option Recently we added SKBMOD_F_ECN option support to the kernel; support it in the tc-skbmod(8) front end, and update its man page accordingly. The 2 least significant bits of the Traffic Class field in IPv4 and IPv6 headers are used to represent different ECN states [1]: 0b00: "Non ECN-Capable Transport", Non-ECT 0b10: "ECN Capable Transport", ECT(0) 0b01: "ECN Capable Transport", ECT(1) 0b11: "Congestion Encountered", CE This new option, "ecn", marks ECT(0) and ECT(1) IPv{4,6} packets as CE, which is useful for ECN-based rate limiting. For example: $ tc filter add dev eth0 parent 1: protocol ip prio 10 \ u32 match ip protocol 1 0xff flowid 1:2 \ action skbmod \ ecn The updated tc-skbmod SYNOPSIS looks like the following: tc ... action skbmod { set SETTABLE | swap SWAPPABLE | ecn } ... Only one of "set", "swap" or "ecn" shall be used in a single tc-skbmod command. Trying to use more than one of them at a time is considered undefined behavior; pipe multiple tc-skbmod commands together instead. "set" and "swap" only affect Ethernet packets, while "ecn" only affects IP packets. Depends on kernel patch "net/sched: act_skbmod: Add SKBMOD_F_ECN option support", as well as iproute2 patch "tc/skbmod: Remove misinformation about the swap action". [1] https://en.wikipedia.org/wiki/Explicit_Congestion_Notification Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Signed-off-by: David Ahern --- man/man8/tc-skbmod.8 | 38 +++++++++++++++++++++++++++++--------- tc/m_skbmod.c | 8 +++++++- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/man/man8/tc-skbmod.8 b/man/man8/tc-skbmod.8 index 76512311..52eaf989 100644 --- a/man/man8/tc-skbmod.8 +++ b/man/man8/tc-skbmod.8 @@ -8,7 +8,8 @@ skbmod - user-friendly packet editor action .BR tc " ... " "action skbmod " "{ " "set " .IR SETTABLE " | " .BI swap " SWAPPABLE" -.RI " } [ " CONTROL " ] [ " +.RB " | " ecn +.RI "} [ " CONTROL " ] [ " .BI index " INDEX " ] @@ -37,6 +38,12 @@ action. Instead of having to manually edit 8-, 16-, or 32-bit chunks of an ethernet header, .B skbmod allows complete substitution of supported elements. +Action must be one of +.BR set ", " swap " and " ecn "." +.BR set " and " swap +only affect Ethernet packets, while +.B ecn +only affects IP packets. .SH OPTIONS .TP .BI dmac " DMAC" @@ -51,6 +58,10 @@ Change the ethertype to the specified value. .BI mac Used to swap mac addresses. .TP +.B ecn +Used to mark ECN Capable Transport (ECT) IP packets as Congestion Encountered (CE). +Does not affect Non ECN-Capable Transport (Non-ECT) packets. +.TP .I CONTROL The following keywords allow to control how the tree of qdisc, classes, filters and actions is further traversed after this action. @@ -115,7 +126,7 @@ tc filter add dev eth5 parent 1: protocol ip prio 10 \\ .EE .RE -Finally, swap the destination and source mac addresses in the header: +To swap the destination and source mac addresses in the Ethernet header: .RS .EX @@ -126,13 +137,22 @@ tc filter add dev eth3 parent 1: protocol ip prio 10 \\ .EE .RE -However, trying to -.B set -and -.B swap -in a single -.B skbmod -command will cause undefined behavior. +Finally, to mark the CE codepoint in the IP header for ECN Capable Transport (ECT) packets: + +.RS +.EX +tc filter add dev eth0 parent 1: protocol ip prio 10 \\ + u32 match ip protocol 1 0xff flowid 1:2 \\ + action skbmod \\ + ecn +.EE +.RE + +Only one of +.BR set ", " swap " and " ecn +shall be used in a single command. +Trying to use more than one of them in a single command is considered undefined behavior; pipe +multiple commands together instead. .SH SEE ALSO .BR tc (8), diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c index 3fe30651..8d8bac5b 100644 --- a/tc/m_skbmod.c +++ b/tc/m_skbmod.c @@ -28,7 +28,7 @@ static void skbmod_explain(void) { fprintf(stderr, - "Usage:... skbmod { set | swap } [CONTROL] [index INDEX]\n" + "Usage:... skbmod { set | swap | ecn } [CONTROL] [index INDEX]\n" "where SETTABLE is: [dmac DMAC] [smac SMAC] [etype ETYPE]\n" "where SWAPPABLE is: \"mac\" to swap mac addresses\n" "\tDMAC := 6 byte Destination MAC address\n" @@ -111,6 +111,9 @@ static int parse_skbmod(struct action_util *a, int *argc_p, char ***argv_p, p.flags |= SKBMOD_F_SMAC; fprintf(stderr, "src MAC address <%s>\n", saddr); ok += 1; + } else if (matches(*argv, "ecn") == 0) { + p.flags |= SKBMOD_F_ECN; + ok += 1; } else if (matches(*argv, "help") == 0) { skbmod_usage(); } else { @@ -211,6 +214,9 @@ static int print_skbmod(struct action_util *au, FILE *f, struct rtattr *arg) if (p->flags & SKBMOD_F_SWAPMAC) fprintf(f, "swap mac "); + if (p->flags & SKBMOD_F_ECN) + fprintf(f, "ecn "); + fprintf(f, "\n\t index %u ref %d bind %d", p->index, p->refcnt, p->bindcnt); if (show_stats) { From df8912ede29c0fd812da1b8dcfdbb109dcd0af18 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 11 Aug 2021 12:13:56 +0200 Subject: [PATCH 21/30] ipioam6: use print_nl instead of print_null This patch addresses Stephen's comment: """ > + print_null(PRINT_ANY, "", "\n", NULL); Use print_nl() since it handles the case of oneline output. Plus in JSON the newline is meaningless. """ It also removes two useless print_null's. Signed-off-by: Justin Iurman Signed-off-by: David Ahern --- ip/ipioam6.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/ip/ipioam6.c b/ip/ipioam6.c index 253d0b66..b63d7d5c 100644 --- a/ip/ipioam6.c +++ b/ip/ipioam6.c @@ -62,19 +62,15 @@ static void print_namespace(struct rtattr *attrs[]) print_uint(PRINT_ANY, "schema", " [schema %u]", rta_getattr_u32(attrs[IOAM6_ATTR_SC_ID])); - if (!attrs[IOAM6_ATTR_NS_DATA]) - print_null(PRINT_ANY, "data", "", NULL); - else + if (attrs[IOAM6_ATTR_NS_DATA]) print_hex(PRINT_ANY, "data", ", data %#010x", rta_getattr_u32(attrs[IOAM6_ATTR_NS_DATA])); - if (!attrs[IOAM6_ATTR_NS_DATA_WIDE]) - print_null(PRINT_ANY, "wide", "", NULL); - else + if (attrs[IOAM6_ATTR_NS_DATA_WIDE]) print_0xhex(PRINT_ANY, "wide", ", wide %#018lx", rta_getattr_u64(attrs[IOAM6_ATTR_NS_DATA_WIDE])); - print_null(PRINT_ANY, "", "\n", NULL); + print_nl(); } static void print_schema(struct rtattr *attrs[]) @@ -97,7 +93,7 @@ static void print_schema(struct rtattr *attrs[]) print_hhu(PRINT_ANY, "", " %02x", data[i]); i++; } - print_null(PRINT_ANY, "", "\n", NULL); + print_nl(); } static int process_msg(struct nlmsghdr *n, void *arg) From c730bd0b11a68014028ea82e25e2230c42399d06 Mon Sep 17 00:00:00 2001 From: Ilya Dmitrichenko Date: Mon, 9 Aug 2021 11:22:39 +0100 Subject: [PATCH 22/30] ip/tunnel: always print all known attributes Presently, if a Geneve or VXLAN interface was created with 'external', it's not possible for a user to determine e.g. the value of 'dstport' after creation. This change fixes that by avoiding early returns. This change partly reverts commit 00ff4b8e31af ("ip/tunnel: Be consistent when printing tunnel collect metadata"). Signed-off-by: Ilya Dmitrichenko Acked-by: Daniel Borkmann Signed-off-by: David Ahern --- ip/iplink_geneve.c | 12 ++++-------- ip/iplink_vxlan.c | 12 ++++-------- ip/link_gre.c | 1 - ip/link_gre6.c | 1 - ip/link_ip6tnl.c | 1 - ip/link_iptnl.c | 1 - 6 files changed, 8 insertions(+), 20 deletions(-) diff --git a/ip/iplink_geneve.c b/ip/iplink_geneve.c index 9299236c..78fc818e 100644 --- a/ip/iplink_geneve.c +++ b/ip/iplink_geneve.c @@ -243,7 +243,6 @@ static int geneve_parse_opt(struct link_util *lu, int argc, char **argv, static void geneve_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) { - __u32 vni; __u8 ttl = 0; __u8 tos = 0; @@ -252,15 +251,12 @@ static void geneve_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (tb[IFLA_GENEVE_COLLECT_METADATA]) { print_bool(PRINT_ANY, "external", "external ", true); - return; } - if (!tb[IFLA_GENEVE_ID] || - RTA_PAYLOAD(tb[IFLA_GENEVE_ID]) < sizeof(__u32)) - return; - - vni = rta_getattr_u32(tb[IFLA_GENEVE_ID]); - print_uint(PRINT_ANY, "id", "id %u ", vni); + if (tb[IFLA_GENEVE_ID] && + RTA_PAYLOAD(tb[IFLA_GENEVE_ID]) >= sizeof(__u32)) { + print_uint(PRINT_ANY, "id", "id %u ", rta_getattr_u32(tb[IFLA_GENEVE_ID])); + } if (tb[IFLA_GENEVE_REMOTE]) { __be32 addr = rta_getattr_u32(tb[IFLA_GENEVE_REMOTE]); diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c index bae9d994..9afa3cca 100644 --- a/ip/iplink_vxlan.c +++ b/ip/iplink_vxlan.c @@ -408,7 +408,6 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) { - __u32 vni; __u8 ttl = 0; __u8 tos = 0; __u32 maxaddr; @@ -419,15 +418,12 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (tb[IFLA_VXLAN_COLLECT_METADATA] && rta_getattr_u8(tb[IFLA_VXLAN_COLLECT_METADATA])) { print_bool(PRINT_ANY, "external", "external ", true); - return; } - if (!tb[IFLA_VXLAN_ID] || - RTA_PAYLOAD(tb[IFLA_VXLAN_ID]) < sizeof(__u32)) - return; - - vni = rta_getattr_u32(tb[IFLA_VXLAN_ID]); - print_uint(PRINT_ANY, "id", "id %u ", vni); + if (tb[IFLA_VXLAN_ID] && + RTA_PAYLOAD(tb[IFLA_VXLAN_ID]) >= sizeof(__u32)) { + print_uint(PRINT_ANY, "id", "id %u ", rta_getattr_u32(tb[IFLA_VXLAN_ID])); + } if (tb[IFLA_VXLAN_GROUP]) { __be32 addr = rta_getattr_u32(tb[IFLA_VXLAN_GROUP]); diff --git a/ip/link_gre.c b/ip/link_gre.c index 6d4a8be8..f462a227 100644 --- a/ip/link_gre.c +++ b/ip/link_gre.c @@ -442,7 +442,6 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (tb[IFLA_GRE_COLLECT_METADATA]) { print_bool(PRINT_ANY, "external", "external ", true); - return; } tnl_print_endpoint("remote", tb[IFLA_GRE_REMOTE], AF_INET); diff --git a/ip/link_gre6.c b/ip/link_gre6.c index f33598af..232d9bde 100644 --- a/ip/link_gre6.c +++ b/ip/link_gre6.c @@ -461,7 +461,6 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (tb[IFLA_GRE_COLLECT_METADATA]) { print_bool(PRINT_ANY, "external", "external ", true); - return; } if (tb[IFLA_GRE_FLAGS]) diff --git a/ip/link_ip6tnl.c b/ip/link_ip6tnl.c index c7b49b02..2fcc13ef 100644 --- a/ip/link_ip6tnl.c +++ b/ip/link_ip6tnl.c @@ -344,7 +344,6 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb if (tb[IFLA_IPTUN_COLLECT_METADATA]) { print_bool(PRINT_ANY, "external", "external ", true); - return; } if (tb[IFLA_IPTUN_FLAGS]) diff --git a/ip/link_iptnl.c b/ip/link_iptnl.c index 636cdb2c..b25855ba 100644 --- a/ip/link_iptnl.c +++ b/ip/link_iptnl.c @@ -368,7 +368,6 @@ static void iptunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[ if (tb[IFLA_IPTUN_COLLECT_METADATA]) { print_bool(PRINT_ANY, "external", "external ", true); - return; } if (tb[IFLA_IPTUN_PROTO]) { From 926ad64104bb624f67d2171aa8dffe63d5cd7b41 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 11 Aug 2021 12:23:33 -0600 Subject: [PATCH 23/30] Update kernel headers Update kernel headers to commit: 88be32634905 ("Merge branch 'dsa-tagger-helpers'") Signed-off-by: David Ahern --- include/uapi/linux/if_bridge.h | 14 ++++++++++++++ include/uapi/linux/if_link.h | 1 + include/uapi/linux/in.h | 21 ++++++++++++++++----- include/uapi/linux/socket.h | 5 +++++ 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 907745f4..cf665bdf 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -549,6 +549,20 @@ enum { BRIDGE_VLANDB_GOPTS_ID, BRIDGE_VLANDB_GOPTS_RANGE, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, + BRIDGE_VLANDB_GOPTS_PAD, + BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 62512efc..fb2ecf40 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -853,6 +853,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 8e2de9fc..42cfea14 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -222,11 +222,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index debcf26f..ca7c25a2 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage { }; }; +#define SOCK_SNDBUF_LOCK 1 +#define SOCK_RCVBUF_LOCK 2 + +#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) + #endif /* _LINUX_SOCKET_H */ From 6d0d35bab9f8af48a3f796e2302834db36eda96d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 9 Aug 2021 11:01:53 +0800 Subject: [PATCH 24/30] ip/bond: add lacp active support lacp_active specifies whether to send LACPDU frames periodically. If set on, the LACPDU frames are sent along with the configured lacp_rate setting. If set off, the LACPDU frames acts as "speak when spoken to". v2: use strcmp instead of match for new options. Signed-off-by: Hangbin Liu --- ip/iplink_bond.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c index 0b4fe445..59c9e36d 100644 --- a/ip/iplink_bond.c +++ b/ip/iplink_bond.c @@ -77,6 +77,12 @@ static const char *xmit_hash_policy_tbl[] = { NULL, }; +static const char *lacp_active_tbl[] = { + "off", + "on", + NULL, +}; + static const char *lacp_rate_tbl[] = { "slow", "fast", @@ -142,6 +148,7 @@ static void print_explain(FILE *f) " [ packets_per_slave PACKETS_PER_SLAVE ]\n" " [ tlb_dynamic_lb TLB_DYNAMIC_LB ]\n" " [ lacp_rate LACP_RATE ]\n" + " [ lacp_active LACP_ACTIVE]\n" " [ ad_select AD_SELECT ]\n" " [ ad_user_port_key PORTKEY ]\n" " [ ad_actor_sys_prio SYSPRIO ]\n" @@ -153,6 +160,7 @@ static void print_explain(FILE *f) "PRIMARY_RESELECT := always|better|failure\n" "FAIL_OVER_MAC := none|active|follow\n" "XMIT_HASH_POLICY := layer2|layer2+3|layer3+4|encap2+3|encap3+4|vlan+srcmac\n" + "LACP_ACTIVE := off|on\n" "LACP_RATE := slow|fast\n" "AD_SELECT := stable|bandwidth|count\n" ); @@ -168,7 +176,7 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv, { __u8 mode, use_carrier, primary_reselect, fail_over_mac; __u8 xmit_hash_policy, num_peer_notif, all_slaves_active; - __u8 lacp_rate, ad_select, tlb_dynamic_lb; + __u8 lacp_active, lacp_rate, ad_select, tlb_dynamic_lb; __u16 ad_user_port_key, ad_actor_sys_prio; __u32 miimon, updelay, downdelay, peer_notify_delay, arp_interval, arp_validate; __u32 arp_all_targets, resend_igmp, min_links, lp_interval; @@ -326,6 +334,13 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv, lacp_rate = get_index(lacp_rate_tbl, *argv); addattr8(n, 1024, IFLA_BOND_AD_LACP_RATE, lacp_rate); + } else if (strcmp(*argv, "lacp_active") == 0) { + NEXT_ARG(); + if (get_index(lacp_active_tbl, *argv) < 0) + invarg("invalid lacp_active", *argv); + + lacp_active = get_index(lacp_active_tbl, *argv); + addattr8(n, 1024, IFLA_BOND_AD_LACP_ACTIVE, lacp_active); } else if (matches(*argv, "ad_select") == 0) { NEXT_ARG(); if (get_index(ad_select_tbl, *argv) < 0) @@ -564,6 +579,15 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) "packets_per_slave %u ", rta_getattr_u32(tb[IFLA_BOND_PACKETS_PER_SLAVE])); + if (tb[IFLA_BOND_AD_LACP_ACTIVE]) { + const char *lacp_active = get_name(lacp_active_tbl, + rta_getattr_u8(tb[IFLA_BOND_AD_LACP_ACTIVE])); + print_string(PRINT_ANY, + "ad_lacp_active", + "lacp_active %s ", + lacp_active); + } + if (tb[IFLA_BOND_AD_LACP_RATE]) { const char *lacp_rate = get_name(lacp_rate_tbl, rta_getattr_u8(tb[IFLA_BOND_AD_LACP_RATE])); From 8ab1834e56f5658a7f3a3f763bee0cc17c13f1a9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 1 Sep 2021 14:02:50 -0700 Subject: [PATCH 25/30] uapi: update headers from 5.15 merge New headers from 5.15 early merge. Signed-off-by: Stephen Hemminger --- include/uapi/linux/bpf.h | 34 ++++++++++++++++++++++++++++++++- include/uapi/linux/if_bridge.h | 16 +++++++++++++++- include/uapi/linux/if_link.h | 2 ++ include/uapi/linux/mptcp.h | 1 + include/uapi/linux/virtio_ids.h | 1 + include/uapi/linux/xfrm.h | 11 +++++++++++ 6 files changed, 63 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dd547572..177cdc57 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -84,7 +84,7 @@ struct bpf_lpm_trie_key { struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ - __u32 attach_type; /* program attach type */ + __u32 attach_type; /* program attach type (enum bpf_attach_type) */ }; union bpf_iter_link_info { @@ -993,6 +993,7 @@ enum bpf_attach_type { BPF_SK_SKB_VERDICT, BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, + BPF_PERF_EVENT, __MAX_BPF_ATTACH_TYPE }; @@ -1006,6 +1007,7 @@ enum bpf_link_type { BPF_LINK_TYPE_ITER = 4, BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, + BPF_LINK_TYPE_PERF_EVENT = 7, MAX_BPF_LINK_TYPE, }; @@ -1446,6 +1448,13 @@ union bpf_attr { __aligned_u64 iter_info; /* extra bpf_iter_link_info */ __u32 iter_info_len; /* iter_info length */ }; + struct { + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 bpf_cookie; + } perf_event; }; } link_create; @@ -4847,6 +4856,27 @@ union bpf_attr { * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. + * + * u64 bpf_get_attach_cookie(void *ctx) + * Description + * Get bpf_cookie value provided (optionally) during the program + * attachment. It might be different for each individual + * attachment, even if BPF program itself is the same. + * Expects BPF program context *ctx* as a first argument. + * + * Supported for the following program types: + * - kprobe/uprobe; + * - tracepoint; + * - perf_event. + * Return + * Value specified by user at BPF link creation/attachment time + * or 0, if it was not specified. + * + * long bpf_task_pt_regs(struct task_struct *task) + * Description + * Get the struct pt_regs associated with **task**. + * Return + * A pointer to struct pt_regs. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5023,6 +5053,8 @@ union bpf_attr { FN(timer_start), \ FN(timer_cancel), \ FN(get_func_ip), \ + FN(get_attach_cookie), \ + FN(task_pt_regs), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index cf665bdf..d1b5fdee 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -506,6 +506,7 @@ enum { BRIDGE_VLANDB_ENTRY_STATE, BRIDGE_VLANDB_ENTRY_TUNNEL_INFO, BRIDGE_VLANDB_ENTRY_STATS, + BRIDGE_VLANDB_ENTRY_MCAST_ROUTER, __BRIDGE_VLANDB_ENTRY_MAX, }; #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) @@ -561,8 +562,8 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, - BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) @@ -770,4 +771,17 @@ struct br_boolopt_multi { __u32 optval; __u32 optmask; }; + +enum { + BRIDGE_QUERIER_UNSPEC, + BRIDGE_QUERIER_IP_ADDRESS, + BRIDGE_QUERIER_IP_PORT, + BRIDGE_QUERIER_IP_OTHER_TIMER, + BRIDGE_QUERIER_PAD, + BRIDGE_QUERIER_IPV6_ADDRESS, + BRIDGE_QUERIER_IPV6_PORT, + BRIDGE_QUERIER_IPV6_OTHER_TIMER, + __BRIDGE_QUERIER_MAX +}; +#define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1) #endif /* _LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index fb2ecf40..1d4ed60b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -415,6 +415,7 @@ enum { IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ IFLA_INET6_TOKEN, /* device token */ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + IFLA_INET6_RA_MTU, /* mtu carried in the RA message */ __IFLA_INET6_MAX }; @@ -477,6 +478,7 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 68d11c6e..957743ce 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -73,6 +73,7 @@ enum { #define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0) #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) +#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) enum { MPTCP_PM_CMD_UNSPEC, diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 70a8057a..99aa27b1 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -55,6 +55,7 @@ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ /* diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index eec67a2b..ecd06396 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -213,6 +213,11 @@ enum { XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + XFRM_MSG_SETDEFAULT, +#define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT + XFRM_MSG_GETDEFAULT, +#define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT + XFRM_MSG_MAPPING, #define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX @@ -508,6 +513,12 @@ struct xfrm_user_offload { #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 +struct xfrm_userpolicy_default { +#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8) + __u8 dirmask; + __u8 action; +}; + /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 From 7a70524270f3b93211d21d98f90a31b9833c26a2 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 1 Sep 2021 14:03:53 -0700 Subject: [PATCH 26/30] ip: remove leftovers from IPX and DECnet Iproute2 has not supported DECnet or IPX since version 5.0. There were some leftover support in the ip options flags and parsing, remove these. Signed-off-by: Stephen Hemminger --- include/utils.h | 11 ----------- ip/ip.c | 4 +--- ip/ipneigh.c | 3 +-- lib/utils.c | 22 ++-------------------- man/man8/ip.8 | 2 -- 5 files changed, 4 insertions(+), 38 deletions(-) diff --git a/include/utils.h b/include/utils.h index 28eaad8e..c9849461 100644 --- a/include/utils.h +++ b/include/utils.h @@ -109,17 +109,6 @@ static inline bool is_addrtype_inet_not_multi(const inet_prefix *p) return (p->flags & ADDRTYPE_INET_MULTI) == ADDRTYPE_INET; } -#define DN_MAXADDL 20 -#ifndef AF_DECnet -#define AF_DECnet 12 -#endif - -struct dn_naddr -{ - unsigned short a_len; - unsigned char a_addr[DN_MAXADDL]; -}; - #ifndef AF_MPLS # define AF_MPLS 28 #endif diff --git a/ip/ip.c b/ip/ip.c index e7ffeaff..b07a5c7d 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -72,7 +72,7 @@ static void usage(void) " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n" " -h[uman-readable] | -iec | -j[son] | -p[retty] |\n" " -f[amily] { inet | inet6 | mpls | bridge | link } |\n" - " -4 | -6 | -I | -D | -M | -B | -0 |\n" + " -4 | -6 | -M | -B | -0 |\n" " -l[oops] { maximum-addr-flush-attempts } | -br[ief] |\n" " -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n" " -rc[vbuf] [size] | -n[etns] name | -N[umeric] | -a[ll] |\n" @@ -224,8 +224,6 @@ int main(int argc, char **argv) preferred_family = AF_INET6; } else if (strcmp(opt, "-0") == 0) { preferred_family = AF_PACKET; - } else if (strcmp(opt, "-D") == 0) { - preferred_family = AF_DECnet; } else if (strcmp(opt, "-M") == 0) { preferred_family = AF_MPLS; } else if (strcmp(opt, "-B") == 0) { diff --git a/ip/ipneigh.c b/ip/ipneigh.c index 95bde520..b778de00 100644 --- a/ip/ipneigh.c +++ b/ip/ipneigh.c @@ -328,8 +328,7 @@ int print_neigh(struct nlmsghdr *n, void *arg) if (!(filter.state&r->ndm_state) && !(r->ndm_flags & NTF_PROXY) && !(r->ndm_flags & NTF_EXT_LEARNED) && - (r->ndm_state || !(filter.state&0x100)) && - (r->ndm_family != AF_DECnet)) + (r->ndm_state || !(filter.state&0x100))) return 0; if (filter.master && !(n->nlmsg_flags & NLM_F_DUMP_FILTERED)) { diff --git a/lib/utils.c b/lib/utils.c index 0559923b..53d31006 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -540,7 +540,7 @@ static int __get_addr_1(inet_prefix *addr, const char *name, int family) memset(addr, 0, sizeof(*addr)); if (strcmp(name, "default") == 0) { - if ((family == AF_DECnet) || (family == AF_MPLS)) + if (family == AF_MPLS) return -1; addr->family = family; addr->bytelen = af_byte_len(addr->family); @@ -551,7 +551,7 @@ static int __get_addr_1(inet_prefix *addr, const char *name, int family) if (strcmp(name, "all") == 0 || strcmp(name, "any") == 0) { - if ((family == AF_DECnet) || (family == AF_MPLS)) + if (family == AF_MPLS) return -1; addr->family = family; addr->bytelen = 0; @@ -636,10 +636,6 @@ int af_bit_len(int af) return 128; case AF_INET: return 32; - case AF_DECnet: - return 16; - case AF_IPX: - return 80; case AF_MPLS: return 20; } @@ -729,16 +725,6 @@ int get_addr_rta(inet_prefix *dst, const struct rtattr *rta, int family) dst->bytelen = 16; memcpy(dst->data, data, 16); break; - case 2: - dst->family = AF_DECnet; - dst->bytelen = 2; - memcpy(dst->data, data, 2); - break; - case 10: - dst->family = AF_IPX; - dst->bytelen = 10; - memcpy(dst->data, data, 10); - break; default: return -1; } @@ -1029,8 +1015,6 @@ int read_family(const char *name) family = AF_INET6; else if (strcmp(name, "link") == 0) family = AF_PACKET; - else if (strcmp(name, "ipx") == 0) - family = AF_IPX; else if (strcmp(name, "mpls") == 0) family = AF_MPLS; else if (strcmp(name, "bridge") == 0) @@ -1046,8 +1030,6 @@ const char *family_name(int family) return "inet6"; if (family == AF_PACKET) return "link"; - if (family == AF_IPX) - return "ipx"; if (family == AF_MPLS) return "mpls"; if (family == AF_BRIDGE) diff --git a/man/man8/ip.8 b/man/man8/ip.8 index c3598a02..2a4848b7 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -37,8 +37,6 @@ ip \- show / manipulate routing, network devices, interfaces and tunnels .BR inet " | " inet6 " | " link " } | " \fB-4\fR | \fB-6\fR | -\fB-I\fR | -\fB-D\fR | \fB-B\fR | \fB-0\fR | \fB-l\fR[\fIoops\fR] { \fBmaximum-addr-flush-attempts\fR } | From ceba59308db269ca841f7f059e9810abdd36978f Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 2 Sep 2021 12:37:36 +0100 Subject: [PATCH 27/30] tree-wide: fix some typos found by Lintian Signed-off-by: Luca Boccassi Signed-off-by: Stephen Hemminger --- man/man8/devlink-port.8 | 2 +- man/man8/ip-link.8.in | 2 +- man/man8/tc-u32.8 | 2 +- tc/q_netem.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/man/man8/devlink-port.8 b/man/man8/devlink-port.8 index 12ccc47e..147c8e27 100644 --- a/man/man8/devlink-port.8 +++ b/man/man8/devlink-port.8 @@ -160,7 +160,7 @@ Is an alias for .PP .B "DEV/PORT_INDEX" - specifies the devlink port index to use for the requested new port. -This is optional. When ommited, driver allocates unique port index. +This is optional. When omitted, driver allocates unique port index. .TP .BR flavour " { " pcipf " | " pcisf " } " diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 572bed87..1a3216e0 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -2515,7 +2515,7 @@ specifies the master device which enslaves devices to show. .TP .BI vrf " NAME " .I NAME -speficies the VRF which enslaves devices to show. +specifies the VRF which enslaves devices to show. .TP .BI type " TYPE " diff --git a/man/man8/tc-u32.8 b/man/man8/tc-u32.8 index a23a1846..fec9af7f 100644 --- a/man/man8/tc-u32.8 +++ b/man/man8/tc-u32.8 @@ -286,7 +286,7 @@ though inverses this behaviour: the offset is applied always, and will fall back to zero. .TP .BI hashkey " HASHKEY" -Spefify what packet data to use to calculate a hash key for bucket lookup. The +Specify what packet data to use to calculate a hash key for bucket lookup. The kernel adjusts the value according to the hash table's size. For this to work, the option .B link diff --git a/tc/q_netem.c b/tc/q_netem.c index d93e1c73..2e5a46ab 100644 --- a/tc/q_netem.c +++ b/tc/q_netem.c @@ -267,7 +267,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, NEXT_ARG(); ++present[TCA_NETEM_CORR]; if (get_percent(&cor.loss_corr, *argv)) { - explain1("loss correllation"); + explain1("loss correlation"); return -1; } } From a3272b93725a406bc98b67373da67a4bdf6fcdb0 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 2 Sep 2021 12:38:54 +0100 Subject: [PATCH 28/30] configure: restore backward compatibility Commit a9c3d70d902a0473ee5c13336317006a52ce8242 broke backward compatibility by making 'configure' error out if parameters are passed, instead of ignoring them. Sometimes packaging systems detect 'configure' and assume it's from autotools, and pass a bunch of options. Eg: dh_auto_configure ./configure --build=x86_64-linux-gnu --prefix=/usr --includedir=${prefix}/include --mandir=${prefix}/share/man --infodir=${prefix}/share/info --sysconfdir=/etc --localstatedir=/var --disable-option-checking --disable-silent-rules --libdir=${prefix}/lib/x86_64-linux-gnu --runstatedir=/run --disable-maintainer-mode --disable-dependency-tracking Ignore unknown options again instead of erroring out. Fixes: a9c3d70d902a ("configure: add options ability") Signed-off-by: Luca Boccassi Signed-off-by: Stephen Hemminger --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index 0a4a0fc9..7f4f3bd9 100755 --- a/configure +++ b/configure @@ -518,7 +518,7 @@ else "") break ;; *) - usage 1 ;; + shift 1 ;; esac done fi From deef844b1ef83292ba35ee618e40346139d2c608 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 1 Sep 2021 13:44:36 +0300 Subject: [PATCH 29/30] man: ip-link: remove double of Remove double "of". Signed-off-by: Nikolay Aleksandrov Signed-off-by: Stephen Hemminger --- man/man8/ip-link.8.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 1a3216e0..6714ef6e 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -2383,7 +2383,7 @@ may be either .B 0 to disable multicast routers on this port, .B 1 -to let the system detect the presence of of routers (this is the default), +to let the system detect the presence of routers (this is the default), .B 2 to permanently enable multicast traffic forwarding on this port or .B 3 From e7e0e2ce65708a06e95e5822efaf10b34835e518 Mon Sep 17 00:00:00 2001 From: David Marchand Date: Wed, 1 Sep 2021 17:48:26 +0200 Subject: [PATCH 30/30] iptuntap: fix multi-queue flag display When creating a tap with multi_queue flag, this flag is not displayed when dumping: $ ip tuntap add tap23 mode tap multi_queue $ ip tuntap tap23: tap persist0x100 While at it, add a space between known flags and hexdump of unknown ones. Fixes: c41e038f48a3 ("iptuntap: allow creation of multi-queue tun/tap device") Signed-off-by: David Marchand Signed-off-by: Stephen Hemminger --- ip/iptuntap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ip/iptuntap.c b/ip/iptuntap.c index 9cdb4a80..96ca1ae7 100644 --- a/ip/iptuntap.c +++ b/ip/iptuntap.c @@ -243,6 +243,9 @@ static void print_flags(long flags) if (flags & IFF_ONE_QUEUE) print_string(PRINT_ANY, NULL, " %s", "one_queue"); + if (flags & IFF_MULTI_QUEUE) + print_string(PRINT_ANY, NULL, " %s", "multi_queue"); + if (flags & IFF_VNET_HDR) print_string(PRINT_ANY, NULL, " %s", "vnet_hdr"); @@ -253,9 +256,10 @@ static void print_flags(long flags) print_string(PRINT_ANY, NULL, " %s", "filter"); flags &= ~(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | - IFF_VNET_HDR | IFF_PERSIST | IFF_NOFILTER); + IFF_MULTI_QUEUE | IFF_VNET_HDR | IFF_PERSIST | + IFF_NOFILTER); if (flags) - print_0xhex(PRINT_ANY, NULL, "%#llx", flags); + print_0xhex(PRINT_ANY, NULL, " %#llx", flags); close_json_array(PRINT_JSON, NULL); }