From 12fafa27c7b306e6c397e858f4d5a8159500f659 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 11 Jun 2020 09:46:46 -0700 Subject: [PATCH 01/28] devlink: update include files Use the tool iwyu to get more complete list of includes for all the bits used by devlink. This should also fix build with musl libc. Fixes: c4dfddccef4e ("fix JSON output of mon command") Reported-off-by: Dan Robertson Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 507972c3..ce2e4676 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -19,18 +19,25 @@ #include #include #include +#include +#include +#include +#include #include #define _LINUX_SYSINFO_H /* avoid collision with musl header */ #include #include +#include #include #include +#include +#include #include #include "SNAPSHOT.h" #include "list.h" #include "mnlg.h" -#include "json_writer.h" +#include "json_print.h" #include "utils.h" #include "namespace.h" From fd71244a2086036bdb1b44a8b2b8905ec2d4f100 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 7 Jun 2020 11:36:46 +0300 Subject: [PATCH 02/28] devlink: Add 'control' trap type This type is used for traps that trap control packets such as ARP request and IGMP query to the CPU. Example: # devlink -jp trap show netdevsim/netdevsim10 trap igmp_v1_report { "trap": { "netdevsim/netdevsim10": [ { "name": "igmp_v1_report", "type": "control", "generic": true, "action": "trap", "group": "mc_snooping" } ] } } Signed-off-by: Ido Schimmel Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/devlink/devlink.c b/devlink/devlink.c index ce2e4676..913feeb6 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -7080,6 +7080,8 @@ static const char *trap_type_name(uint8_t type) return "drop"; case DEVLINK_TRAP_TYPE_EXCEPTION: return "exception"; + case DEVLINK_TRAP_TYPE_CONTROL: + return "control"; default: return ""; } From abda1e9d2bc604aece38b1bfe66af0068c173067 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 7 Jun 2020 11:36:47 +0300 Subject: [PATCH 03/28] devlink: Add 'mirror' trap action Allow setting 'mirror' trap action for traps that support it. Extend the devlink-trap man page and bash completion accordingly. Example: # devlink -jp trap show netdevsim/netdevsim10 trap igmp_query { "trap": { "netdevsim/netdevsim10": [ { "name": "igmp_query", "type": "control", "generic": true, "action": "mirror", "group": "mc_snooping" } ] } } Signed-off-by: Ido Schimmel Signed-off-by: Stephen Hemminger --- bash-completion/devlink | 4 ++-- devlink/devlink.c | 8 ++++++-- man/man8/devlink-trap.8 | 11 +++++++---- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/bash-completion/devlink b/bash-completion/devlink index 8518e7aa..f710c888 100644 --- a/bash-completion/devlink +++ b/bash-completion/devlink @@ -678,7 +678,7 @@ _devlink_trap_set_action() COMPREPLY=( $( compgen -W "action" -- "$cur" ) ) ;; $((7 + $i))) - COMPREPLY=( $( compgen -W "trap drop" -- "$cur" ) ) + COMPREPLY=( $( compgen -W "trap drop mirror" -- "$cur" ) ) ;; esac } @@ -708,7 +708,7 @@ _devlink_trap_group_set() case $prev in action) - COMPREPLY=( $( compgen -W "trap drop" -- "$cur" ) ) + COMPREPLY=( $( compgen -W "trap drop mirror" -- "$cur" ) ) return ;; policer) diff --git a/devlink/devlink.c b/devlink/devlink.c index 913feeb6..66e139ab 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -1156,6 +1156,8 @@ static int trap_action_get(const char *actionstr, *p_action = DEVLINK_TRAP_ACTION_DROP; } else if (strcmp(actionstr, "trap") == 0) { *p_action = DEVLINK_TRAP_ACTION_TRAP; + } else if (strcmp(actionstr, "mirror") == 0) { + *p_action = DEVLINK_TRAP_ACTION_MIRROR; } else { pr_err("Unknown trap action \"%s\"\n", actionstr); return -EINVAL; @@ -7094,6 +7096,8 @@ static const char *trap_action_name(uint8_t action) return "drop"; case DEVLINK_TRAP_ACTION_TRAP: return "trap"; + case DEVLINK_TRAP_ACTION_MIRROR: + return "mirror"; default: return ""; } @@ -7168,9 +7172,9 @@ static int cmd_trap_show_cb(const struct nlmsghdr *nlh, void *data) static void cmd_trap_help(void) { - pr_err("Usage: devlink trap set DEV trap TRAP [ action { trap | drop } ]\n"); + pr_err("Usage: devlink trap set DEV trap TRAP [ action { trap | drop | mirror } ]\n"); pr_err(" devlink trap show [ DEV trap TRAP ]\n"); - pr_err(" devlink trap group set DEV group GROUP [ action { trap | drop } ]\n"); + pr_err(" devlink trap group set DEV group GROUP [ action { trap | drop | mirror } ]\n"); pr_err(" [ policer POLICER ] [ nopolicer ]\n"); pr_err(" devlink trap group show [ DEV group GROUP ]\n"); pr_err(" devlink trap policer set DEV policer POLICER [ rate RATE ] [ burst BURST ]\n"); diff --git a/man/man8/devlink-trap.8 b/man/man8/devlink-trap.8 index f01f8317..1e693427 100644 --- a/man/man8/devlink-trap.8 +++ b/man/man8/devlink-trap.8 @@ -26,7 +26,7 @@ devlink-trap \- devlink trap configuration .ti -8 .BI "devlink trap set " DEV " trap " TRAP -.RB "[ " action " { " trap " | " drop " } ]" +.RB "[ " action " { " trap " | " drop " | " mirror " } ]" .ti -8 .B "devlink trap group show" @@ -36,7 +36,7 @@ devlink-trap \- devlink trap configuration .ti -8 .BI "devlink trap group set " DEV " group " GROUP -.RB "[ " action " { " trap " | " drop " } ]" +.RB "[ " action " { " trap " | " drop " | " mirror " } ]" .br .RB "[ " policer .IB "POLICER " ] @@ -76,7 +76,7 @@ Only applicable if a devlink device is also specified. - specifies the packet trap. .TP -.BR action " { " trap " | " drop " } " +.BR action " { " trap " | " drop " | " mirror " } " packet trap action. .I trap @@ -85,6 +85,9 @@ packet trap action. .I drop - the packet is dropped by the underlying device and a copy is not sent to the CPU. +.I mirror +- the packet is forwarded by the underlying device and a copy is sent to the CPU. + .SS devlink trap group show - display available packet trap groups and their attributes .PP @@ -108,7 +111,7 @@ Only applicable if a devlink device is also specified. - specifies the packet trap group. .TP -.BR action " { " trap " | " drop " } " +.BR action " { " trap " | " drop " | " mirror " } " packet trap action. The action is set for all the packet traps member in the trap group. The actions of non-drop traps cannot be changed and are thus skipped. From 3d66d83d25a2589ab735cf36742bff3bcf2a6ada Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 11 Jun 2020 09:52:38 -0700 Subject: [PATCH 04/28] uapi: update to magic.h Signed-off-by: Stephen Hemminger --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index d7806400..f3956fc1 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -94,6 +94,7 @@ #define BALLOON_KVM_MAGIC 0x13661366 #define ZSMALLOC_MAGIC 0x58295829 #define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ +#define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define Z3FOLD_MAGIC 0x33 #define PPC_CMM_MAGIC 0xc7571590 From 473d18e2196a78dc16bc836ffe29e83783022292 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 11 Jun 2020 20:35:43 +0300 Subject: [PATCH 05/28] ip address: Fix loop initial declarations are only allowed in C99 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some distros, i.e. rhel 7.6, compilation fails with the following: ipaddress.c: In function ‘lookup_flag_data_by_name’: ipaddress.c:1260:2: error: ‘for’ loop initial declarations are only allowed in C99 mode for (int i = 0; i < ARRAY_SIZE(ifa_flag_data); ++i) { ^ ipaddress.c:1260:2: note: use option -std=c99 or -std=gnu99 to compile your code This commit fixes the single place needed for compilation to pass. Fixes: 9d59c86e575b ("iproute2: ip addr: Organize flag properties structurally") Signed-off-by: Roi Dayan Signed-off-by: Stephen Hemminger --- ip/ipaddress.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 3b53933f..f97eaff3 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -1257,7 +1257,9 @@ static const struct ifa_flag_data_t { /* Returns a pointer to the data structure for a particular interface flag, or null if no flag could be found */ static const struct ifa_flag_data_t* lookup_flag_data_by_name(const char* flag_name) { - for (int i = 0; i < ARRAY_SIZE(ifa_flag_data); ++i) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(ifa_flag_data); ++i) { if (strcmp(flag_name, ifa_flag_data[i].name) == 0) return &ifa_flag_data[i]; } From 2f31d12a25d289d864fd9bffc417e4518043e37d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 24 Jun 2020 12:13:46 -0700 Subject: [PATCH 06/28] man/tc: remove obsolete reference to ipchains It isn't Linux 2.2 anymore. Signed-off-by: Stephen Hemminger --- man/man8/tc.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/tc.8 b/man/man8/tc.8 index e8e0cd0f..235216b6 100644 --- a/man/man8/tc.8 +++ b/man/man8/tc.8 @@ -454,7 +454,7 @@ qdiscs will use all three: tc filters If tc filters are attached to a class, they are consulted first for relevant instructions. Filters can match on all fields of a packet header, -as well as on the firewall mark applied by ipchains or iptables. +as well as on the firewall mark applied by iptables. .TP Type of Service Some qdiscs have built in rules for classifying packets based on the TOS field. From 2df0dc24370c2ed3b97494de3826ec973909b6a9 Mon Sep 17 00:00:00 2001 From: Bjarni Ingi Gislason Date: Sun, 28 Jun 2020 16:26:15 +0000 Subject: [PATCH 07/28] libnetlink.3: display section numbers in roman font, not boldface Typeset section numbers in roman font, see man-pages(7). ### Details: Output is from: test-groff -b -mandoc -T utf8 -rF0 -t -w w -z [ "test-groff" is a developmental version of "groff" ] <./man/man3/libnetlink.3>:53 (macro BR): only 1 argument, but more are expected <./man/man3/libnetlink.3>:132 (macro BR): only 1 argument, but more are expected <./man/man3/libnetlink.3>:134 (macro BR): only 1 argument, but more are expected <./man/man3/libnetlink.3>:197 (macro BR): only 1 argument, but more are expected <./man/man3/libnetlink.3>:198 (macro BR): only 1 argument, but more are expected Signed-off-by: Bjarni Ingi Gislason --- man/man3/libnetlink.3 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/man/man3/libnetlink.3 b/man/man3/libnetlink.3 index 8e3dc620..9a2c801c 100644 --- a/man/man3/libnetlink.3 +++ b/man/man3/libnetlink.3 @@ -50,7 +50,7 @@ int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data) int rta_addattr_l(struct rtattr *rta, int maxlen, int type, void *data, int alen) .SH DESCRIPTION libnetlink provides a higher level interface to -.BR rtnetlink(7). +.BR rtnetlink (7). The read functions return 0 on success and a negative errno on failure. The send functions return the amount of data sent, or -1 on error. .TP @@ -129,9 +129,9 @@ for parsing. The file should contain raw data as received from a rtnetlink socke The following functions are useful to construct custom rtnetlink messages. For simple database dumping with filtering it is better to use the higher level functions above. See -.BR rtnetlink(3) +.BR rtnetlink (3) and -.BR netlink(3) +.BR netlink (3) on how to generate a rtnetlink message. The following utility functions require a continuous buffer that already contains a netlink message header and a rtnetlink request. @@ -194,7 +194,7 @@ netlink/rtnetlink was designed and written by Alexey Kuznetsov. Andi Kleen wrote the man page. .SH SEE ALSO -.BR netlink(7), -.BR rtnetlink(7) +.BR netlink (7), +.BR rtnetlink (7) .br /usr/include/linux/rtnetlink.h From 57cfcc62afbf7817db351ac412d66677496c4fc3 Mon Sep 17 00:00:00 2001 From: Bjarni Ingi Gislason Date: Sun, 28 Jun 2020 22:46:26 +0000 Subject: [PATCH 08/28] man8/bridge.8: fix misuse of two-fonts macros Use a single-font macro for a single argument. Signed-off-by: Bjarni Ingi Gislason Signed-off-by: Stephen Hemminger --- man/man8/bridge.8 | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 71f2e890..efe67f7b 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -28,10 +28,9 @@ bridge \- show / manipulate bridge addresses and devices \fB\-o\fR[\fIneline\fr] } .ti -8 -.BR "bridge link set" +.B "bridge link set" .B dev -.IR DEV -.IR " [ " +.IR DEV " [ " .B cost .IR COST " ] [ " .B priority @@ -106,9 +105,9 @@ bridge \- show / manipulate bridge addresses and devices .ti -8 .BR "bridge mdb" " { " add " | " del " } " .B dev -.IR DEV +.I DEV .B port -.IR PORT +.I PORT .B grp .IR GROUP " [ " .BR permanent " | " temp " ] [ " @@ -123,10 +122,10 @@ bridge \- show / manipulate bridge addresses and devices .ti -8 .BR "bridge vlan" " { " add " | " del " } " .B dev -.IR DEV +.I DEV .B vid .IR VID " [ " -.BR tunnel_info +.B tunnel_info .IR TUNNEL_ID " ] [ " .BR pvid " ] [ " untagged " ] [ " .BR self " ] [ " master " ] " @@ -166,7 +165,7 @@ to the specified network namespace Actually it just simplifies executing of: .B ip netns exec -.IR NETNS +.I NETNS .B bridge .RI "[ " OPTIONS " ] " OBJECT " { " COMMAND " | " .BR help " }" @@ -183,7 +182,7 @@ Read commands from provided file or standard input and invoke them. First failure will cause termination of bridge command. .TP -.BR "\-force" +.B "\-force" Don't terminate bridge command on errors in batch mode. If there were any errors during execution of the commands, the application return code will be non zero. @@ -393,7 +392,7 @@ bridge FDB. Controls whether a given port will flood unicast traffic for which there is no FDB entry. By default this flag is on. .TP -.BI hwmode +.B hwmode Some network interface cards support HW bridge functionality and they may be configured in different modes. Currently support modes are: @@ -417,7 +416,7 @@ instead of multicast. By default this flag is off. This is done by copying the packet per host and changing the multicast destination MAC to a unicast one accordingly. -.BR mcast_to_unicast +.B mcast_to_unicast works on top of the multicast snooping feature of the bridge. Which means unicast copies are only delivered to hosts which are interested in it and signalized this via IGMP/MLD reports @@ -462,15 +461,15 @@ If the port loses carrier all traffic will be redirected to the configured backup port .TP -.BR nobackup_port +.B nobackup_port Removes the currently configured backup port .TP -.BI self +.B self link setting is configured on specified physical device .TP -.BI master +.B master link setting is configured on the software bridge (default) .TP @@ -499,7 +498,7 @@ and delete old ones. This command creates a new fdb entry. .TP -.BI "LLADDR" +.B LLADDR the Ethernet MAC address. .TP @@ -626,7 +625,7 @@ and last used time for each entry. lookup a bridge forwarding table entry. .TP -.BI "LLADDR" +.B LLADDR the Ethernet MAC address. .TP @@ -750,21 +749,21 @@ dst_metadata for every packet that belongs to this vlan (applicable to bridge ports with vlan_tunnel flag set). .TP -.BI pvid +.B pvid the vlan specified is to be considered a PVID at ingress. Any untagged frames will be assigned to this VLAN. .TP -.BI untagged +.B untagged the vlan specified is to be treated as untagged on egress. .TP -.BI self +.B self the vlan is configured on the specified physical device. Required if the device is the bridge device. .TP -.BI master +.B master the vlan is configured on the software bridge (default). .SS bridge vlan delete - delete a vlan filter entry From 472fb39d552fdd274e4baa0fa9d01926a491a2b2 Mon Sep 17 00:00:00 2001 From: Bjarni Ingi Gislason Date: Sun, 28 Jun 2020 23:26:12 +0000 Subject: [PATCH 09/28] devlink.8: Use a single-font macro for a single argument Use a single-font macro for a single argument Signed-off-by: Bjarni Ingi Gislason Signed-off-by: Stephen Hemminger --- man/man8/devlink.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/devlink.8 b/man/man8/devlink.8 index 7f4eda56..866fda51 100644 --- a/man/man8/devlink.8 +++ b/man/man8/devlink.8 @@ -31,7 +31,7 @@ Read commands from provided file or standard input and invoke them. First failure will cause termination of devlink. .TP -.BR "\-force" +.B \-force Don't terminate devlink on errors in batch mode. If there were any errors during execution of the commands, the application return code will be non zero. From f9bc806c9db1cc9616a05aeac57e87879b582c0c Mon Sep 17 00:00:00 2001 From: Bjarni Ingi Gislason Date: Sun, 28 Jun 2020 23:58:40 +0000 Subject: [PATCH 10/28] devlink-dev.8: use a single-font macro for one argument Use a single-font macro for one argument. Remove unnecessary quotes for a single font macro. Join some lines into one. The output of "nroff" and "groff" is unchanged, except for a font change in two lines. Signed-off-by: Bjarni Ingi Gislason Signed-off-by: Stephen Hemminger --- man/man8/devlink-dev.8 | 89 +++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8 index ac01bf60..279100c3 100644 --- a/man/man8/devlink-dev.8 +++ b/man/man8/devlink-dev.8 @@ -26,61 +26,60 @@ devlink-dev \- devlink device configuration .B devlink dev help .ti -8 -.BR "devlink dev eswitch set" -.IR DEV -.RI "[ " +.B devlink dev eswitch set +.I DEV +[ .BR mode " { " legacy " | " switchdev " } " -.RI "]" -.RI "[ " +] [ .BR inline-mode " { " none " | " link " | " network " | " transport " } " -.RI "]" -.RI "[ " +] [ .BR encap-mode " { " none " | " basic " } " -.RI "]" +] .ti -8 -.BR "devlink dev eswitch show" -.IR DEV +.B devlink dev eswitch show +.I DEV .ti -8 -.BR "devlink dev param set" -.IR DEV -.BR name -.IR PARAMETER -.BR value -.IR VALUE +.B devlink dev param set +.I DEV +.B name +.I PARAMETER +.B value +.I VALUE .BR cmode " { " runtime " | " driverinit " | " permanent " } " .ti -8 -.BR "devlink dev param show" -.RI "[ " -.IR DEV -.BR name -.IR PARAMETER -.RI "]" +.B devlink dev param show +[ +.I DEV +.B name +.I PARAMETER +] .ti -8 -.BR "devlink dev reload" -.IR DEV -.RI "[ " -.BI "netns { " PID " | " NAME " | " ID " } -.RI "]" +.B devlink dev reload +.I DEV +[ +.B netns +.RI "{ " PID " | " NAME " | " ID " }" +] .ti -8 -.BR "devlink dev info" -.RI "[ " -.IR DEV -.RI "]" +.B devlink dev info +[ +.I DEV +] .ti -8 -.BR "devlink dev flash" -.IR DEV -.BR file -.IR PATH -.RI "[" -.BR target -.IR ID -.RI "]" +.B devlink dev flash +.I DEV +.B file +.I PATH +[ +.B target +.I ID +] .SH "DESCRIPTION" .SS devlink dev show - display devlink device attributes @@ -159,8 +158,8 @@ Configuration mode in which the new value is set. .SS devlink dev param show - display devlink device supported configuration parameters attributes -.BR name -.IR PARAMETER +.B name +.I PARAMETER Specify parameter name to show. If this argument is omitted all parameters supported by devlink devices are listed. @@ -170,8 +169,8 @@ If this argument is omitted all parameters supported by devlink devices are list .I "DEV" - Specifies the devlink device to reload. -.BR netns -.BI { " PID " | " NAME " | " ID " } +.B netns +.RI { " PID " | " NAME " | " ID " } - Specifies the network namespace to reload into, either by pid, name or id. .SS devlink dev info - display device information. @@ -200,13 +199,13 @@ If this argument is omitted all devices are listed. .I "DEV" - specifies the devlink device to write to. -.BR file +.B file .I PATH - Path to the file which will be written into device's flash. The path needs to be relative to one of the directories searched by the kernel firmware loaded, such as /lib/firmware. -.BR component +.B component .I NAME - If device stores multiple firmware images in non-volatile memory, this parameter may be used to indicate which firmware image should be written. From 860a5d12d507f7d2fbe909b105e254dc9745bc76 Mon Sep 17 00:00:00 2001 From: Bjarni Ingi Gislason Date: Mon, 29 Jun 2020 00:42:48 +0000 Subject: [PATCH 11/28] devlint-health.8: use a single-font macro for a single argument Use a single font macro for a single argument. Remove unnecessary quotes for a single-font macro. Join two lines into one. The output of "nroff" and "groff" is unchanged. Signed-off-by: Bjarni Ingi Gislason Signed-off-by: Stephen Hemminger --- man/man8/devlink-health.8 | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/man/man8/devlink-health.8 b/man/man8/devlink-health.8 index 8a3c77be..215f549b 100644 --- a/man/man8/devlink-health.8 +++ b/man/man8/devlink-health.8 @@ -18,49 +18,47 @@ devlink-health \- devlink health reporting and recovery \fB\-V\fR[\fIersion\fR] } .ti -8 -.BR "devlink health show" +.B devlink health show .RI "[ " DEV "" .B reporter .RI ""REPORTER " ] " .ti -8 -.BR "devlink health recover" +.B devlink health recover .RI "" DEV "" .B reporter .RI "" REPORTER "" .ti -8 -.BR "devlink health diagnose" +.B devlink health diagnose .RI "" DEV "" .B reporter .RI "" REPORTER "" .ti -8 -.BR "devlink health dump show" +.B devlink health dump show .RI "" DEV "" .B reporter .RI "" REPORTER "" .ti -8 -.BR "devlink health dump clear" +.B devlink health dump clear .RI "" DEV "" .B reporter .RI "" REPORTER "" .ti -8 -.BR "devlink health set" +.B devlink health set .RI "" DEV "" .B reporter .RI "" REPORTER "" -.RI "[ " +[ .BI "grace_period " MSEC " -.RI "]" -.RI "[ " +] [ .BR auto_recover " { " true " | " false " } " -.RI "]" -.RI "[ " +] [ .BR auto_dump " { " true " | " false " } " -.RI "]" +] .ti -8 .B devlink health help From 085622b1f56070cf127abb591213fd441f2b499e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 6 Jul 2020 10:54:35 -0700 Subject: [PATCH 12/28] uapi: update bpf.h Signed-off-by: Stephen Hemminger --- include/uapi/linux/bpf.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6cad1444..4dfcb81b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3168,7 +3168,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of @@ -3761,6 +3761,19 @@ struct xdp_md { __u32 egress_ifindex; /* txq->dev->ifindex */ }; +/* DEVMAP map-value layout + * + * The struct data-layout of map-value is a configuration interface. + * New members can only be added to the end of this structure. + */ +struct bpf_devmap_val { + __u32 ifindex; /* device index */ + union { + int fd; /* prog fd on map write */ + __u32 id; /* prog id on map read */ + } bpf_prog; +}; + enum sk_action { SK_DROP = 0, SK_PASS, From d80a05b7954505f73434e6fb051c098c80fd4361 Mon Sep 17 00:00:00 2001 From: Anton Danilov Date: Fri, 3 Jul 2020 18:39:22 +0300 Subject: [PATCH 13/28] tc: improve the qdisc show command Before can be possible show only all qeueue disciplines on an interface. There wasn't a way to get the qdisc info by handle or parent, only full dump of the disciplines with a following grep/sed usage. Now new and old options work as expected to filter a qdisc by handle or parent. Full syntax of the qdisc show command: tc qdisc { show | list } [ dev STRING ] [ QDISC_ID ] [ invisible ] QDISC_ID := { root | ingress | handle QHANDLE | parent CLASSID } This change doesn't require any changes in the kernel. Signed-off-by: Anton Danilov Signed-off-by: Stephen Hemminger --- man/man8/tc.8 | 8 +++-- tc/tc_qdisc.c | 91 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 64 insertions(+), 35 deletions(-) diff --git a/man/man8/tc.8 b/man/man8/tc.8 index 235216b6..305bc569 100644 --- a/man/man8/tc.8 +++ b/man/man8/tc.8 @@ -77,9 +77,13 @@ tc \- show / manipulate traffic control settings .B tc .RI "[ " OPTIONS " ]" .RI "[ " FORMAT " ]" -.B qdisc show [ dev +.B qdisc { show | list } [ dev \fIDEV\fR -.B ] +.B ] [ root | ingress | handle +\fIQHANDLE\fR +.B | parent +\fICLASSID\fR +.B ] [ invisible ] .P .B tc .RI "[ " OPTIONS " ]" diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c index 181fe2f0..8eb08c34 100644 --- a/tc/tc_qdisc.c +++ b/tc/tc_qdisc.c @@ -35,11 +35,12 @@ static int usage(void) " [ ingress_block BLOCK_INDEX ] [ egress_block BLOCK_INDEX ]\n" " [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n" "\n" - " tc qdisc show [ dev STRING ] [ ingress | clsact ] [ invisible ]\n" + " tc qdisc { show | list } [ dev STRING ] [ QDISC_ID ] [ invisible ]\n" "Where:\n" "QDISC_KIND := { [p|b]fifo | tbf | prio | cbq | red | etc. }\n" "OPTIONS := ... try tc qdisc add help\n" - "STAB_OPTIONS := ... try tc qdisc add stab help\n"); + "STAB_OPTIONS := ... try tc qdisc add stab help\n" + "QDISC_ID := { root | ingress | handle QHANDLE | parent CLASSID }\n"); return -1; } @@ -212,6 +213,8 @@ static int tc_qdisc_modify(int cmd, unsigned int flags, int argc, char **argv) } static int filter_ifindex; +static __u32 filter_parent; +static __u32 filter_handle; int print_qdisc(struct nlmsghdr *n, void *arg) { @@ -235,6 +238,12 @@ int print_qdisc(struct nlmsghdr *n, void *arg) if (filter_ifindex && filter_ifindex != t->tcm_ifindex) return 0; + if (filter_handle && filter_handle != t->tcm_handle) + return 0; + + if (filter_parent && filter_parent != t->tcm_parent) + return 0; + parse_rtattr_flags(tb, TCA_MAX, TCA_RTA(t), len, NLA_F_NESTED); if (tb[TCA_KIND] == NULL) { @@ -344,21 +353,55 @@ int print_qdisc(struct nlmsghdr *n, void *arg) static int tc_qdisc_list(int argc, char **argv) { - struct tcmsg t = { .tcm_family = AF_UNSPEC }; + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[256]; + } req = { + .n.nlmsg_type = RTM_GETQDISC, + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .t.tcm_family = AF_UNSPEC, + }; + char d[IFNAMSIZ] = {}; bool dump_invisible = false; + __u32 handle; while (argc > 0) { if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); strncpy(d, *argv, sizeof(d)-1); + } else if (strcmp(*argv, "root") == 0) { + if (filter_parent) + invarg("parent is already specified", *argv); + else if (filter_handle) + invarg("handle is already specified", *argv); + filter_parent = TC_H_ROOT; } else if (strcmp(*argv, "ingress") == 0 || - strcmp(*argv, "clsact") == 0) { - if (t.tcm_parent) { - fprintf(stderr, "Duplicate parent ID\n"); - usage(); - } - t.tcm_parent = TC_H_INGRESS; + strcmp(*argv, "clsact") == 0) { + if (filter_parent) + invarg("parent is already specified", *argv); + else if (filter_handle) + invarg("handle is already specified", *argv); + filter_parent = TC_H_INGRESS; + } else if (matches(*argv, "parent") == 0) { + if (filter_parent) + invarg("parent is already specified", *argv); + else if (filter_handle) + invarg("handle is already specified", *argv); + NEXT_ARG(); + if (get_tc_classid(&handle, *argv)) + invarg("invalid parent ID", *argv); + filter_parent = handle; + } else if (matches(*argv, "handle") == 0) { + if (filter_parent) + invarg("parent is already specified", *argv); + else if (filter_handle) + invarg("handle is already specified", *argv); + NEXT_ARG(); + if (get_qdisc_handle(&handle, *argv)) + invarg("invalid handle ID", *argv); + filter_handle = handle; } else if (matches(*argv, "help") == 0) { usage(); } else if (strcmp(*argv, "invisible") == 0) { @@ -374,32 +417,18 @@ static int tc_qdisc_list(int argc, char **argv) ll_init_map(&rth); if (d[0]) { - t.tcm_ifindex = ll_name_to_index(d); - if (!t.tcm_ifindex) + req.t.tcm_ifindex = ll_name_to_index(d); + if (!req.t.tcm_ifindex) return -nodev(d); - filter_ifindex = t.tcm_ifindex; + filter_ifindex = req.t.tcm_ifindex; } if (dump_invisible) { - struct { - struct nlmsghdr n; - struct tcmsg t; - char buf[256]; - } req = { - .n.nlmsg_type = RTM_GETQDISC, - .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), - }; - - req.t.tcm_family = AF_UNSPEC; - addattr(&req.n, 256, TCA_DUMP_INVISIBLE); - if (rtnl_dump_request_n(&rth, &req.n) < 0) { - perror("Cannot send dump request"); - return 1; - } + } - } else if (rtnl_dump_request(&rth, RTM_GETQDISC, &t, sizeof(t)) < 0) { - perror("Cannot send dump request"); + if (rtnl_dump_request_n(&rth, &req.n) < 0) { + perror("Cannot send request"); return 1; } @@ -427,10 +456,6 @@ int do_qdisc(int argc, char **argv) return tc_qdisc_modify(RTM_NEWQDISC, NLM_F_REPLACE, argc-1, argv+1); if (matches(*argv, "delete") == 0) return tc_qdisc_modify(RTM_DELQDISC, 0, argc-1, argv+1); -#if 0 - if (matches(*argv, "get") == 0) - return tc_qdisc_get(RTM_GETQDISC, 0, argc-1, argv+1); -#endif if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0 || matches(*argv, "lst") == 0) return tc_qdisc_list(argc-1, argv+1); From 9e5d246877ec14ea159ad1b2a495aa9475fa6502 Mon Sep 17 00:00:00 2001 From: Sorah Fukumori Date: Fri, 26 Jun 2020 06:07:12 +0900 Subject: [PATCH 14/28] ip fou: respect preferred_family for IPv6 ip(8) accepts -family ipv6 (-6) option at the toplevel. It is straightforward to support the existing option for modifying listener on IPv6 addresses. Maintain the backward compatibility by leaving ip fou -6 flag implemented, while it's removed from the usage message. Signed-off-by: Sorah Fukumori Signed-off-by: Stephen Hemminger --- ip/ipfou.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ip/ipfou.c b/ip/ipfou.c index ea126b08..9c697770 100644 --- a/ip/ipfou.c +++ b/ip/ipfou.c @@ -27,10 +27,10 @@ static void usage(void) { fprintf(stderr, - "Usage: ip fou add port PORT { ipproto PROTO | gue } [ -6 ]\n" + "Usage: ip fou add port PORT { ipproto PROTO | gue }\n" " [ local IFADDR ] [ peer IFADDR ]\n" " [ peer_port PORT ] [ dev IFNAME ]\n" - " ip fou del port PORT [ -6 ] [ local IFADDR ]\n" + " ip fou del port PORT [ local IFADDR ]\n" " [ peer IFADDR ] [ peer_port PORT ]\n" " [ dev IFNAME ]\n" " ip fou show\n" @@ -55,13 +55,17 @@ static int fou_parse_opt(int argc, char **argv, struct nlmsghdr *n, { const char *local = NULL, *peer = NULL; __u16 port, peer_port = 0; - __u8 family = AF_INET; + __u8 family = preferred_family; bool gue_set = false; int ipproto_set = 0; __u8 ipproto, type; int port_set = 0; int index = 0; + if (preferred_family == AF_UNSPEC) { + family = AF_INET; + } + while (argc > 0) { if (!matches(*argv, "port")) { NEXT_ARG(); From 8f1cd119b3772be25e1ac6f017006012bd5a4541 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Sun, 5 Jul 2020 19:18:12 +0300 Subject: [PATCH 15/28] lib: fix checking of returned file handle size for cgroup Before this patch check is happened only in case when we try to find cgroup at cgroup2 mount point. v2: - add Fixes line before Signed-off-by (David Ahern) Fixes: d5e6ee0dac64 ("ss: introduce cgroup2 cache and helper functions") Signed-off-by: Dmitry Yakunin Signed-off-by: Stephen Hemminger --- lib/fs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/fs.c b/lib/fs.c index e265fc04..4b90a704 100644 --- a/lib/fs.c +++ b/lib/fs.c @@ -148,10 +148,10 @@ __u64 get_cgroup2_id(const char *path) strerror(errno)); goto out; } - if (fhp->handle_bytes != sizeof(__u64)) { - fprintf(stderr, "Invalid size of cgroup2 ID\n"); - goto out; - } + } + if (fhp->handle_bytes != sizeof(__u64)) { + fprintf(stderr, "Invalid size of cgroup2 ID\n"); + goto out; } memcpy(cg_id.bytes, fhp->f_handle, sizeof(__u64)); From a6c5c952ab2e0e94eb937b1235649978e22e8125 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 1 Jul 2020 21:45:04 +0200 Subject: [PATCH 16/28] ip link: initial support for bareudp devices Bareudp devices provide a generic L3 encapsulation for tunnelling different protocols like MPLS, IP, NSH, etc. inside a UDP tunnel. This patch is based on original work from Martin Varghese: https://lore.kernel.org/netdev/1570532361-15163-1-git-send-email-martinvarghesenokia@gmail.com/ Examples: - ip link add dev bareudp0 type bareudp dstport 6635 ethertype mpls_uc This creates a bareudp tunnel device which tunnels L3 traffic with ethertype 0x8847 (unicast MPLS traffic). The destination port of the UDP header will be set to 6635. The device will listen on UDP port 6635 to receive traffic. - ip link add dev bareudp0 type bareudp dstport 6635 ethertype ipv4 multiproto Same as the MPLS example, but for IPv4. The "multiproto" keyword allows the device to also tunnel IPv6 traffic. Signed-off-by: Guillaume Nault Signed-off-by: Stephen Hemminger --- ip/Makefile | 2 +- ip/iplink.c | 2 +- ip/iplink_bareudp.c | 150 ++++++++++++++++++++++++++++++++++++++++++ man/man8/ip-link.8.in | 44 +++++++++++++ 4 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 ip/iplink_bareudp.c diff --git a/ip/Makefile b/ip/Makefile index 8735b8e4..4cad619c 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -11,7 +11,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \ iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \ iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o ipila.o \ ipvrf.o iplink_xstats.o ipseg6.o iplink_netdevsim.o iplink_rmnet.o \ - ipnexthop.o ipmptcp.o + ipnexthop.o ipmptcp.o iplink_bareudp.o RTMONOBJ=rtmon.o diff --git a/ip/iplink.c b/ip/iplink.c index 47f73988..7d4b244d 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -124,7 +124,7 @@ void iplink_usage(void) " bridge | bond | team | ipoib | ip6tnl | ipip | sit | vxlan |\n" " gre | gretap | erspan | ip6gre | ip6gretap | ip6erspan |\n" " vti | nlmon | team_slave | bond_slave | bridge_slave |\n" - " ipvlan | ipvtap | geneve | vrf | macsec | netdevsim | rmnet |\n" + " ipvlan | ipvtap | geneve | bareudp | vrf | macsec | netdevsim | rmnet |\n" " xfrm }\n"); } exit(-1); diff --git a/ip/iplink_bareudp.c b/ip/iplink_bareudp.c new file mode 100644 index 00000000..885e1110 --- /dev/null +++ b/ip/iplink_bareudp.c @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +#include "libnetlink.h" +#include "linux/if_ether.h" +#include "linux/if_link.h" +#include "linux/netlink.h" +#include "linux/rtnetlink.h" +#include "rt_names.h" +#include "utils.h" +#include "ip_common.h" +#include "json_print.h" + +#define BAREUDP_ATTRSET(attrs, type) (((attrs) & (1L << (type))) != 0) + +static void print_explain(FILE *f) +{ + fprintf(f, + "Usage: ... bareudp dstport PORT\n" + " ethertype PROTO\n" + " [ srcportmin PORT ]\n" + " [ [no]multiproto ]\n" + "\n" + "Where: PORT := 0-65535\n" + " PROTO := NUMBER | ip | mpls\n" + " SRCPORTMIN := 0-65535\n" + ); +} + +static void explain(void) +{ + print_explain(stderr); +} + +static void check_duparg(__u64 *attrs, int type, const char *key, + const char *argv) +{ + if (!BAREUDP_ATTRSET(*attrs, type)) { + *attrs |= (1L << type); + return; + } + duparg2(key, argv); +} + +static int bareudp_parse_opt(struct link_util *lu, int argc, char **argv, + struct nlmsghdr *n) +{ + bool multiproto = false; + __u16 srcportmin = 0; + __be16 ethertype = 0; + __be16 dstport = 0; + __u64 attrs = 0; + + while (argc > 0) { + if (matches(*argv, "dstport") == 0) { + NEXT_ARG(); + check_duparg(&attrs, IFLA_BAREUDP_PORT, "dstport", + *argv); + if (get_be16(&dstport, *argv, 0)) + invarg("dstport", *argv); + } else if (matches(*argv, "ethertype") == 0) { + NEXT_ARG(); + check_duparg(&attrs, IFLA_BAREUDP_ETHERTYPE, + "ethertype", *argv); + if (ll_proto_a2n(ðertype, *argv)) + invarg("ethertype", *argv); + } else if (matches(*argv, "srcportmin") == 0) { + NEXT_ARG(); + check_duparg(&attrs, IFLA_BAREUDP_SRCPORT_MIN, + "srcportmin", *argv); + if (get_u16(&srcportmin, *argv, 0)) + invarg("srcportmin", *argv); + } else if (matches(*argv, "multiproto") == 0) { + check_duparg(&attrs, IFLA_BAREUDP_MULTIPROTO_MODE, + *argv, *argv); + multiproto = true; + } else if (matches(*argv, "nomultiproto") == 0) { + check_duparg(&attrs, IFLA_BAREUDP_MULTIPROTO_MODE, + *argv, *argv); + multiproto = false; + } else if (matches(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "bareudp: unknown command \"%s\"?\n", + *argv); + explain(); + return -1; + } + argc--, argv++; + } + + if (!BAREUDP_ATTRSET(attrs, IFLA_BAREUDP_PORT)) + missarg("dstport"); + if (!BAREUDP_ATTRSET(attrs, IFLA_BAREUDP_ETHERTYPE)) + missarg("ethertype"); + + addattr16(n, 1024, IFLA_BAREUDP_PORT, dstport); + addattr16(n, 1024, IFLA_BAREUDP_ETHERTYPE, ethertype); + if (BAREUDP_ATTRSET(attrs, IFLA_BAREUDP_SRCPORT_MIN)) + addattr16(n, 1024, IFLA_BAREUDP_SRCPORT_MIN, srcportmin); + if (multiproto) + addattr(n, 1024, IFLA_BAREUDP_MULTIPROTO_MODE); + + return 0; +} + +static void bareudp_print_opt(struct link_util *lu, FILE *f, + struct rtattr *tb[]) +{ + if (!tb) + return; + + if (tb[IFLA_BAREUDP_PORT]) + print_uint(PRINT_ANY, "dstport", "dstport %u ", + rta_getattr_be16(tb[IFLA_BAREUDP_PORT])); + + if (tb[IFLA_BAREUDP_ETHERTYPE]) { + struct rtattr *attr = tb[IFLA_BAREUDP_ETHERTYPE]; + SPRINT_BUF(ethertype); + + print_string(PRINT_ANY, "ethertype", "ethertype %s ", + ll_proto_n2a(rta_getattr_u16(attr), + ethertype, sizeof(ethertype))); + } + + if (tb[IFLA_BAREUDP_SRCPORT_MIN]) + print_uint(PRINT_ANY, "srcportmin", "srcportmin %u ", + rta_getattr_u16(tb[IFLA_BAREUDP_SRCPORT_MIN])); + + if (tb[IFLA_BAREUDP_MULTIPROTO_MODE]) + print_bool(PRINT_ANY, "multiproto", "multiproto ", true); + else + print_bool(PRINT_ANY, "multiproto", "nomultiproto ", false); +} + +static void bareudp_print_help(struct link_util *lu, int argc, char **argv, + FILE *f) +{ + print_explain(f); +} + +struct link_util bareudp_link_util = { + .id = "bareudp", + .maxattr = IFLA_BAREUDP_MAX, + .parse_opt = bareudp_parse_opt, + .print_opt = bareudp_print_opt, + .print_help = bareudp_print_help, +}; diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index e8a25451..c6bd2c53 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -223,6 +223,7 @@ ip-link \- network device configuration .BR ipvtap " |" .BR lowpan " |" .BR geneve " |" +.BR bareudp " |" .BR vrf " |" .BR macsec " |" .BR netdevsim " |" @@ -356,6 +357,9 @@ Link types: .BR geneve - GEneric NEtwork Virtualization Encapsulation .sp +.BR bareudp +- Bare UDP L3 encapsulation support +.sp .BR macsec - Interface for IEEE 802.1AE MAC Security (MACsec) .sp @@ -1293,6 +1297,46 @@ options. .in -8 +.TP +Bareudp Type Support +For a link of type +.I Bareudp +the following additional arguments are supported: + +.BI "ip link add " DEVICE +.BI type " bareudp " dstport " PORT " ethertype " ETHERTYPE" +[ +.BI srcportmin " SRCPORTMIN " +] [ +.RB [ no ] multiproto +] + +.in +8 +.sp +.BI dstport " PORT" +- specifies the destination port for the UDP tunnel. + +.sp +.BI ethertype " ETHERTYPE" +- specifies the ethertype of the L3 protocol being tunnelled. + +.sp +.BI srcportmin " SRCPORTMIN" +- selects the lowest value of the UDP tunnel source port range. + +.sp +.RB [ no ] multiproto +- activates support for protocols similar to the one +.RB "specified by " ethertype . +When +.I ETHERTYPE +is "mpls_uc" (that is, unicast MPLS), this allows the tunnel to also handle +multicast MPLS. +When +.I ETHERTYPE +is "ipv4", this allows the tunnel to also handle IPv6. This option is disabled +by default. + .TP MACVLAN and MACVTAP Type Support For a link of type From eb09a15c12fb597de39303c8bc327d17eb04126b Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 1 Jul 2020 21:49:18 +0200 Subject: [PATCH 17/28] tc: flower: support multiple MPLS LSE match Add the new "mpls" keyword that can be used to match MPLS fields in arbitrary Label Stack Entries. LSEs are introduced by the "lse" keyword and followed by LSE options: "depth", "label", "tc", "bos" and "ttl". The depth is manadtory, the other options are optionals. For example, the following filter drops MPLS packets having two labels, where the first label is 21 and has TTL 64 and the second label is 22: $ tc filter add dev ethX ingress proto mpls_uc flower mpls \ lse depth 1 label 21 ttl 64 \ lse depth 2 label 22 bos 1 \ action drop Signed-off-by: Guillaume Nault Signed-off-by: Stephen Hemminger --- man/man8/tc-flower.8 | 73 +++++++++++++- tc/f_flower.c | 221 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 292 insertions(+), 2 deletions(-) diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8 index 4d32ff1b..b5bcfd1d 100644 --- a/man/man8/tc-flower.8 +++ b/man/man8/tc-flower.8 @@ -46,6 +46,8 @@ flower \- flow based traffic control filter .IR PRIORITY " | " .BR cvlan_ethtype " { " ipv4 " | " ipv6 " | " .IR ETH_TYPE " } | " +.B mpls +.IR LSE_LIST " | " .B mpls_label .IR LABEL " | " .B mpls_tc @@ -96,7 +98,24 @@ flower \- flow based traffic control filter } .IR OPTIONS " | " .BR ip_flags -.IR IP_FLAGS +.IR IP_FLAGS " }" + +.ti -8 +.IR LSE_LIST " := [ " LSE_LIST " ] " LSE + +.ti -8 +.IR LSE " := " +.B lse depth +.IR DEPTH " { " +.B label +.IR LABEL " | " +.B tc +.IR TC " | " +.B bos +.IR BOS " | " +.B ttl +.IR TTL " }" + .SH DESCRIPTION The .B flower @@ -182,6 +201,56 @@ Match on QinQ layer three protocol. may be either .BR ipv4 ", " ipv6 or an unsigned 16bit value in hexadecimal format. + +.TP +.BI mpls " LSE_LIST" +Match on the MPLS label stack. +.I LSE_LIST +is a list of Label Stack Entries, each introduced by the +.BR lse " keyword." +This option can't be used together with the standalone +.BR mpls_label ", " mpls_tc ", " mpls_bos " and " mpls_ttl " options." +.RS +.TP +.BI lse " LSE_OPTIONS" +Match on an MPLS Label Stack Entry. +.I LSE_OPTIONS +is a list of options that describe the properties of the LSE to match. +.RS +.TP +.BI depth " DEPTH" +The depth of the Label Stack Entry to consider. Depth starts at 1 (the +outermost Label Stack Entry). The maximum usable depth may be limited by the +kernel. This option is mandatory. +.I DEPTH +is an unsigned 8 bit value in decimal format. +.TP +.BI label " LABEL" +Match on the MPLS Label field at the specified +.BR depth . +.I LABEL +is an unsigned 20 bit value in decimal format. +.TP +.BI tc " TC" +Match on the MPLS Traffic Class field at the specified +.BR depth . +.I TC +is an unsigned 3 bit value in decimal format. +.TP +.BI bos " BOS" +Match on the MPLS Bottom Of Stack field at the specified +.BR depth . +.I BOS +is a 1 bit value in decimal format. +.TP +.BI ttl " TTL" +Match on the MPLS Time To Live field at the specified +.BR depth . +.I TTL +is an unsigned 8 bit value in decimal format. +.RE +.RE + .TP .BI mpls_label " LABEL" Match the label id in the outermost MPLS label stack entry. @@ -393,7 +462,7 @@ on the matches of the next lower layer. Precisely, layer one and two matches (\fBindev\fR, \fBdst_mac\fR and \fBsrc_mac\fR) have no dependency, MPLS and layer three matches -(\fBmpls_label\fR, \fBmpls_tc\fR, \fBmpls_bos\fR, \fBmpls_ttl\fR, +(\fBmpls\fR, \fBmpls_label\fR, \fBmpls_tc\fR, \fBmpls_bos\fR, \fBmpls_ttl\fR, \fBip_proto\fR, \fBdst_ip\fR, \fBsrc_ip\fR, \fBarp_tip\fR, \fBarp_sip\fR, \fBarp_op\fR, \fBarp_tha\fR, \fBarp_sha\fR and \fBip_flags\fR) depend on the diff --git a/tc/f_flower.c b/tc/f_flower.c index fc136911..00c919fd 100644 --- a/tc/f_flower.c +++ b/tc/f_flower.c @@ -59,6 +59,7 @@ static void explain(void) " ip_proto [tcp | udp | sctp | icmp | icmpv6 | IP-PROTO ] |\n" " ip_tos MASKED-IP_TOS |\n" " ip_ttl MASKED-IP_TTL |\n" + " mpls LSE-LIST |\n" " mpls_label LABEL |\n" " mpls_tc TC |\n" " mpls_bos BOS |\n" @@ -89,6 +90,8 @@ static void explain(void) " ct_label MASKED_CT_LABEL |\n" " ct_mark MASKED_CT_MARK |\n" " ct_zone MASKED_CT_ZONE }\n" + " LSE-LIST := [ LSE-LIST ] LSE\n" + " LSE := lse depth DEPTH { label LABEL | tc TC | bos BOS | ttl TTL }\n" " FILTERID := X:Y:Z\n" " MASKED_LLADDR := { LLADDR | LLADDR/MASK | LLADDR/BITS }\n" " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new\n" @@ -1199,11 +1202,127 @@ static int flower_parse_enc_opts_erspan(char *str, struct nlmsghdr *n) return 0; } +static int flower_parse_mpls_lse(int *argc_p, char ***argv_p, + struct nlmsghdr *nlh) +{ + struct rtattr *lse_attr; + char **argv = *argv_p; + int argc = *argc_p; + __u8 depth = 0; + int ret; + + lse_attr = addattr_nest(nlh, MAX_MSG, + TCA_FLOWER_KEY_MPLS_OPTS_LSE | NLA_F_NESTED); + + while (argc > 0) { + if (matches(*argv, "depth") == 0) { + NEXT_ARG(); + ret = get_u8(&depth, *argv, 10); + if (ret < 0 || depth < 1) { + fprintf(stderr, "Illegal \"depth\"\n"); + return -1; + } + addattr8(nlh, MAX_MSG, + TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH, depth); + } else if (matches(*argv, "label") == 0) { + __u32 label; + + NEXT_ARG(); + ret = get_u32(&label, *argv, 10); + if (ret < 0 || + label & ~(MPLS_LS_LABEL_MASK >> MPLS_LS_LABEL_SHIFT)) { + fprintf(stderr, "Illegal \"label\"\n"); + return -1; + } + addattr32(nlh, MAX_MSG, + TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, label); + } else if (matches(*argv, "tc") == 0) { + __u8 tc; + + NEXT_ARG(); + ret = get_u8(&tc, *argv, 10); + if (ret < 0 || + tc & ~(MPLS_LS_TC_MASK >> MPLS_LS_TC_SHIFT)) { + fprintf(stderr, "Illegal \"tc\"\n"); + return -1; + } + addattr8(nlh, MAX_MSG, TCA_FLOWER_KEY_MPLS_OPT_LSE_TC, + tc); + } else if (matches(*argv, "bos") == 0) { + __u8 bos; + + NEXT_ARG(); + ret = get_u8(&bos, *argv, 10); + if (ret < 0 || bos & ~(MPLS_LS_S_MASK >> MPLS_LS_S_SHIFT)) { + fprintf(stderr, "Illegal \"bos\"\n"); + return -1; + } + addattr8(nlh, MAX_MSG, TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS, + bos); + } else if (matches(*argv, "ttl") == 0) { + __u8 ttl; + + NEXT_ARG(); + ret = get_u8(&ttl, *argv, 10); + if (ret < 0 || ttl & ~(MPLS_LS_TTL_MASK >> MPLS_LS_TTL_SHIFT)) { + fprintf(stderr, "Illegal \"ttl\"\n"); + return -1; + } + addattr8(nlh, MAX_MSG, TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL, + ttl); + } else { + break; + } + argc--; argv++; + } + + if (!depth) { + missarg("depth"); + return -1; + } + + addattr_nest_end(nlh, lse_attr); + + *argc_p = argc; + *argv_p = argv; + + return 0; +} + +static int flower_parse_mpls(int *argc_p, char ***argv_p, struct nlmsghdr *nlh) +{ + struct rtattr *mpls_attr; + char **argv = *argv_p; + int argc = *argc_p; + + mpls_attr = addattr_nest(nlh, MAX_MSG, + TCA_FLOWER_KEY_MPLS_OPTS | NLA_F_NESTED); + + while (argc > 0) { + if (matches(*argv, "lse") == 0) { + NEXT_ARG(); + if (flower_parse_mpls_lse(&argc, &argv, nlh) < 0) + return -1; + } else { + break; + } + } + + addattr_nest_end(nlh, mpls_attr); + + *argc_p = argc; + *argv_p = argv; + + return 0; +} + static int flower_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n) { int ret; struct tcmsg *t = NLMSG_DATA(n); + bool mpls_format_old = false; + bool mpls_format_new = false; struct rtattr *tail; __be16 eth_type = TC_H_MIN(t->tcm_info); __be16 vlan_ethtype = 0; @@ -1381,6 +1500,23 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, &cvlan_ethtype, n); if (ret < 0) return -1; + } else if (matches(*argv, "mpls") == 0) { + NEXT_ARG(); + if (eth_type != htons(ETH_P_MPLS_UC) && + eth_type != htons(ETH_P_MPLS_MC)) { + fprintf(stderr, + "Can't set \"mpls\" if ethertype isn't MPLS\n"); + return -1; + } + if (mpls_format_old) { + fprintf(stderr, + "Can't set \"mpls\" if \"mpls_label\", \"mpls_tc\", \"mpls_bos\" or \"mpls_ttl\" is set\n"); + return -1; + } + mpls_format_new = true; + if (flower_parse_mpls(&argc, &argv, n) < 0) + return -1; + continue; } else if (matches(*argv, "mpls_label") == 0) { __u32 label; @@ -1391,6 +1527,12 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, "Can't set \"mpls_label\" if ethertype isn't MPLS\n"); return -1; } + if (mpls_format_new) { + fprintf(stderr, + "Can't set \"mpls_label\" if \"mpls\" is set\n"); + return -1; + } + mpls_format_old = true; ret = get_u32(&label, *argv, 10); if (ret < 0 || label & ~(MPLS_LS_LABEL_MASK >> MPLS_LS_LABEL_SHIFT)) { fprintf(stderr, "Illegal \"mpls_label\"\n"); @@ -1407,6 +1549,12 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, "Can't set \"mpls_tc\" if ethertype isn't MPLS\n"); return -1; } + if (mpls_format_new) { + fprintf(stderr, + "Can't set \"mpls_tc\" if \"mpls\" is set\n"); + return -1; + } + mpls_format_old = true; ret = get_u8(&tc, *argv, 10); if (ret < 0 || tc & ~(MPLS_LS_TC_MASK >> MPLS_LS_TC_SHIFT)) { fprintf(stderr, "Illegal \"mpls_tc\"\n"); @@ -1423,6 +1571,12 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, "Can't set \"mpls_bos\" if ethertype isn't MPLS\n"); return -1; } + if (mpls_format_new) { + fprintf(stderr, + "Can't set \"mpls_bos\" if \"mpls\" is set\n"); + return -1; + } + mpls_format_old = true; ret = get_u8(&bos, *argv, 10); if (ret < 0 || bos & ~(MPLS_LS_S_MASK >> MPLS_LS_S_SHIFT)) { fprintf(stderr, "Illegal \"mpls_bos\"\n"); @@ -1439,6 +1593,12 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, "Can't set \"mpls_ttl\" if ethertype isn't MPLS\n"); return -1; } + if (mpls_format_new) { + fprintf(stderr, + "Can't set \"mpls_ttl\" if \"mpls\" is set\n"); + return -1; + } + mpls_format_old = true; ret = get_u8(&ttl, *argv, 10); if (ret < 0 || ttl & ~(MPLS_LS_TTL_MASK >> MPLS_LS_TTL_SHIFT)) { fprintf(stderr, "Illegal \"mpls_ttl\"\n"); @@ -2316,6 +2476,66 @@ static void flower_print_u32(const char *name, struct rtattr *attr) print_uint(PRINT_ANY, name, namefrm, rta_getattr_u32(attr)); } +static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse) +{ + struct rtattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1]; + struct rtattr *attr; + + if (lse->rta_type != (TCA_FLOWER_KEY_MPLS_OPTS_LSE | NLA_F_NESTED)) { + fprintf(stderr, "rta_type 0x%x, expecting 0x%x (0x%x & 0x%x)\n", + lse->rta_type, + TCA_FLOWER_KEY_MPLS_OPTS_LSE & NLA_F_NESTED, + TCA_FLOWER_KEY_MPLS_OPTS_LSE, NLA_F_NESTED); + return; + } + + parse_rtattr(tb, TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX, RTA_DATA(lse), + RTA_PAYLOAD(lse)); + + print_nl(); + open_json_array(PRINT_ANY, name); + attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]; + if (attr) + print_hhu(PRINT_ANY, "depth", " depth %u", + rta_getattr_u8(attr)); + attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]; + if (attr) + print_uint(PRINT_ANY, "label", " label %u", + rta_getattr_u32(attr)); + attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]; + if (attr) + print_hhu(PRINT_ANY, "tc", " tc %u", rta_getattr_u8(attr)); + attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]; + if (attr) + print_hhu(PRINT_ANY, "bos", " bos %u", rta_getattr_u8(attr)); + attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]; + if (attr) + print_hhu(PRINT_ANY, "ttl", " ttl %u", rta_getattr_u8(attr)); + close_json_array(PRINT_JSON, NULL); +} + +static void flower_print_mpls_opts(const char *name, struct rtattr *attr) +{ + struct rtattr *lse; + int rem; + + if (!attr || !(attr->rta_type & NLA_F_NESTED)) + return; + + print_nl(); + open_json_array(PRINT_ANY, name); + rem = RTA_PAYLOAD(attr); + lse = RTA_DATA(attr); + while (RTA_OK(lse, rem)) { + flower_print_mpls_opt_lse(" lse", lse); + lse = RTA_NEXT(lse, rem); + }; + if (rem) + fprintf(stderr, "!!!Deficit %d, rta_len=%d\n", + rem, lse->rta_len); + close_json_array(PRINT_JSON, NULL); +} + static void flower_print_arp_op(const char *name, struct rtattr *op_attr, struct rtattr *mask_attr) @@ -2430,6 +2650,7 @@ static int flower_print_opt(struct filter_util *qu, FILE *f, flower_print_ip_attr("ip_ttl", tb[TCA_FLOWER_KEY_IP_TTL], tb[TCA_FLOWER_KEY_IP_TTL_MASK]); + flower_print_mpls_opts(" mpls", tb[TCA_FLOWER_KEY_MPLS_OPTS]); flower_print_u32("mpls_label", tb[TCA_FLOWER_KEY_MPLS_LABEL]); flower_print_u8("mpls_tc", tb[TCA_FLOWER_KEY_MPLS_TC]); flower_print_u8("mpls_bos", tb[TCA_FLOWER_KEY_MPLS_BOS]); From 2d4c3f65e2dfb60152b9adef366bb4c266599448 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 23 Jun 2020 23:06:09 +0300 Subject: [PATCH 18/28] devlink: Document zero policer identifier When setting a policer to a trap group, a value of "0" will unbind the currently bound policer from the group. The behavior is intentional and tested in kernel selftests, so document it. Signed-off-by: Ido Schimmel Suggested-by: Alex Kushnarov Signed-off-by: Stephen Hemminger --- man/man8/devlink-trap.8 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/man8/devlink-trap.8 b/man/man8/devlink-trap.8 index 1e693427..f5e66412 100644 --- a/man/man8/devlink-trap.8 +++ b/man/man8/devlink-trap.8 @@ -118,7 +118,8 @@ skipped. .TP .BI policer " POLICER" -packet trap policer. The policer to bind to the packet trap group. +packet trap policer. The policer to bind to the packet trap group. A value of +"0" will unbind the currently bound policer. .TP .B nopolicer From 7c8d7848c7d24707d39a408a8b9c46094e7ff300 Mon Sep 17 00:00:00 2001 From: Louis Peens Date: Fri, 19 Jun 2020 13:50:07 +0200 Subject: [PATCH 19/28] devlink: add 'disk' to 'fw_load_policy' string validation The 'fw_load_policy' devlink parameter supports the 'disk' value since kernel v5.4, seems like there was some oversight in adding this to iproute, fixed by this patch. Signed-off-by: Louis Peens Reviewed-by: Simon Horman Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/devlink/devlink.c b/devlink/devlink.c index 66e139ab..7f83fb74 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -2358,6 +2358,11 @@ static const struct param_val_conv param_val_conv[] = { .vstr = "flash", .vuint = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH, }, + { + .name = "fw_load_policy", + .vstr = "disk", + .vuint = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK, + }, { .name = "reset_dev_on_drv_probe", .vstr = "unknown", From d44bcd2fbf36d113caa8b543f4e36c549f02455e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 8 Jul 2020 08:38:58 -0700 Subject: [PATCH 20/28] iplink_bareudp: use common include syntax Follow the precedent of other parts of iproute2 follow the example of: Standard libc headers Linux headers Iproute2 support headers Signed-off-by: Stephen Hemminger --- ip/iplink_bareudp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ip/iplink_bareudp.c b/ip/iplink_bareudp.c index 885e1110..860ec699 100644 --- a/ip/iplink_bareudp.c +++ b/ip/iplink_bareudp.c @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include +#include +#include +#include #include "libnetlink.h" -#include "linux/if_ether.h" -#include "linux/if_link.h" -#include "linux/netlink.h" -#include "linux/rtnetlink.h" #include "rt_names.h" #include "utils.h" #include "ip_common.h" From a12b203c789b0f16acef4e3815d19e95adbba19b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 8 Jul 2020 08:40:20 -0700 Subject: [PATCH 21/28] rtacct: drop unused header --- misc/rtacct.c | 1 - 1 file changed, 1 deletion(-) diff --git a/misc/rtacct.c b/misc/rtacct.c index b1cb3788..c4bb5bc3 100644 --- a/misc/rtacct.c +++ b/misc/rtacct.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include From 06897857827dc58156974e3536524f1d213d7f66 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 8 Jul 2020 08:41:24 -0700 Subject: [PATCH 22/28] genl: use <> for system includes Be consistent about local versus system headers. Signed-off-by: Stephen Hemminger --- genl/genl_utils.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/genl/genl_utils.h b/genl/genl_utils.h index a8d433a9..87b4f34c 100644 --- a/genl/genl_utils.h +++ b/genl/genl_utils.h @@ -2,11 +2,10 @@ #ifndef _TC_UTIL_H_ #define _TC_UTIL_H_ 1 +#include #include "utils.h" -#include "linux/genetlink.h" -struct genl_util -{ +struct genl_util { struct genl_util *next; char name[16]; int (*parse_genlopt)(struct genl_util *fu, int argc, char **argv); From a8d6f51c841a5aa23db0b194a23574b7bf8e2bb5 Mon Sep 17 00:00:00 2001 From: Andrea Claudi Date: Tue, 7 Jul 2020 21:49:47 +0200 Subject: [PATCH 23/28] ip address: remove useless include utils.h is included two times in ipaddress.c, there is no need for that. Signed-off-by: Andrea Claudi Signed-off-by: Stephen Hemminger --- ip/ipaddress.c | 1 - 1 file changed, 1 deletion(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index f97eaff3..ccf67d1d 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -30,7 +30,6 @@ #include #include -#include "utils.h" #include "rt_names.h" #include "utils.h" #include "ll_map.h" From 650591a7a70cd79d826fcdc579a20c168c987cf2 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Tue, 7 Jul 2020 00:58:33 -0700 Subject: [PATCH 24/28] configure: support ipset version 7 with kernel version 5 The configure script checks for ipset v6 availability but doesn't test for v7, which is backward compatible and used on kernel v5.x systems. Update the script to test for both ipset versions. Without this change, the tc ematch function em_ipset will be disabled. Signed-off-by: Tony Ambardar Signed-off-by: Stephen Hemminger --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index f415bf49..307912aa 100755 --- a/configure +++ b/configure @@ -208,7 +208,7 @@ typedef unsigned short ip_set_id_t; #include struct xt_set_info info; -#if IPSET_PROTOCOL == 6 +#if IPSET_PROTOCOL == 6 || IPSET_PROTOCOL == 7 int main(void) { return IPSET_MAXNAMELEN; From 8fc09aff8dfc65e1ebb7c1b4a6809cf6a4146ae7 Mon Sep 17 00:00:00 2001 From: Julien Fortin Date: Fri, 10 Jul 2020 02:53:02 +0200 Subject: [PATCH 25/28] bridge: fdb get: add missing json init (new_json_obj) 'bridge fdb get' has json support but the json object is never initialized before patch: $ bridge -j fdb get 56:23:28:4f:4f:e5 dev vx0 56:23:28:4f:4f:e5 dev vx0 master br0 permanent $ after patch: $ bridge -j fdb get 56:23:28:4f:4f:e5 dev vx0 | \ python -c \ 'import sys,json;print(json.dumps(json.loads(sys.stdin.read()),indent=4))' [ { "master": "br0", "mac": "56:23:28:4f:4f:e5", "flags": [], "ifname": "vx0", "state": "permanent" } ] $ Signed-off-by: Julien Fortin Signed-off-by: Stephen Hemminger --- bridge/fdb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bridge/fdb.c b/bridge/fdb.c index 710dfc99..a12b5474 100644 --- a/bridge/fdb.c +++ b/bridge/fdb.c @@ -619,10 +619,16 @@ static int fdb_get(int argc, char **argv) if (rtnl_talk(&rth, &req.n, &answer) < 0) return -2; + /* + * Initialize a json_writer and open an array object + * if -json was specified. + */ + new_json_obj(json); if (print_fdb(answer, stdout) < 0) { fprintf(stderr, "An error :-)\n"); return -1; } + delete_json_obj(); return 0; } From ca75a86337d0885e6f118b2c38d766493f4321e3 Mon Sep 17 00:00:00 2001 From: Hoang Huu Le Date: Thu, 9 Jul 2020 11:25:55 +0700 Subject: [PATCH 26/28] tipc: fixed a compile warning in tipc/link.c Fixes: 5027f233e35b ("tipc: add link broadcast get") Signed-off-by: Hoang Huu Le Acked-by: Jon Maloy Signed-off-by: Stephen Hemminger --- tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tipc/link.c b/tipc/link.c index ba77a201..192736ea 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -217,7 +217,7 @@ static int cmd_link_get_bcast_cb(const struct nlmsghdr *nlh, void *data) print_string(PRINT_ANY, "method", "%s", "AUTOSELECT"); close_json_object(); open_json_object(NULL); - print_uint(PRINT_ANY, "ratio", " ratio:%u%\n", + print_uint(PRINT_ANY, "ratio", " ratio:%u\n", mnl_attr_get_u32(props[prop_ratio])); break; default: From ee93c1107fa50bebd181b289d7e3531c51b7779e Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 9 Jul 2020 09:29:47 +0300 Subject: [PATCH 27/28] ip xfrm: update man page on setting/printing XFRMA_IF_ID in states/policies In commit aed63ae1acb9 ("ip xfrm: support setting/printing XFRMA_IF_ID attribute in states/policies") I added the ability to set/print the xfrm interface ID without updating the man page. Fixes: aed63ae1acb9 ("ip xfrm: support setting/printing XFRMA_IF_ID attribute in states/policies") Signed-off-by: Eyal Birger Signed-off-by: Stephen Hemminger --- man/man8/ip-xfrm.8 | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/man/man8/ip-xfrm.8 b/man/man8/ip-xfrm.8 index f99f30bb..d717205d 100644 --- a/man/man8/ip-xfrm.8 +++ b/man/man8/ip-xfrm.8 @@ -61,6 +61,8 @@ ip-xfrm \- transform configuration .IR EXTRA-FLAG-LIST " ]" .RB "[ " output-mark .IR OUTPUT-MARK " ]" +.RB "[ " if_id +.IR IF-ID " ]" .ti -8 .B "ip xfrm state allocspi" @@ -238,6 +240,8 @@ ip-xfrm \- transform configuration .IR PRIORITY " ]" .RB "[ " flag .IR FLAG-LIST " ]" +.RB "[ " if_id +.IR IF-ID " ]" .RI "[ " LIMIT-LIST " ] [ " TMPL-LIST " ]" .ti -8 @@ -561,6 +565,10 @@ used to match xfrm policies and states used to set the output mark to influence the routing of the packets emitted by the state +.TP +.I IF-ID +xfrm interface identifier used to in both xfrm policies and states + .sp .PP .TS From f33a871b8094ae0f6e6293804e1cc6edbba0e108 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 9 Jul 2020 09:29:48 +0300 Subject: [PATCH 28/28] ip xfrm: policy: support policies with IF_ID in get/delete/deleteall The XFRMA_IF_ID attribute is set in policies for them to be associated with an XFRM interface (4.19+). Add support for getting/deleting policies with this attribute. For supporting 'deleteall' the XFRMA_IF_ID attribute needs to be explicitly copied. Signed-off-by: Eyal Birger Signed-off-by: Stephen Hemminger --- ip/xfrm_policy.c | 17 ++++++++++++++++- man/man8/ip-xfrm.8 | 2 ++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c index d3c706d3..7cc00e7c 100644 --- a/ip/xfrm_policy.c +++ b/ip/xfrm_policy.c @@ -59,6 +59,7 @@ static void usage(void) " [ if_id IF_ID ] [ LIMIT-LIST ] [ TMPL-LIST ]\n" "Usage: ip xfrm policy { delete | get } { SELECTOR | index INDEX } dir DIR\n" " [ ctx CTX ] [ mark MARK [ mask MASK ] ] [ ptype PTYPE ]\n" + " [ if_id IF_ID ]\n" "Usage: ip xfrm policy { deleteall | list } [ nosock ] [ SELECTOR ] [ dir DIR ]\n" " [ index INDEX ] [ ptype PTYPE ] [ action ACTION ] [ priority PRIORITY ]\n" " [ flag FLAG-LIST ]\n" @@ -582,6 +583,8 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete, struct xfrm_user_sec_ctx sctx; char str[CTX_BUF_SIZE]; } ctx = {}; + bool is_if_id_set = false; + __u32 if_id = 0; while (argc > 0) { if (strcmp(*argv, "dir") == 0) { @@ -619,7 +622,11 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete, NEXT_ARG(); xfrm_policy_ptype_parse(&upt.type, &argc, &argv); - + } else if (strcmp(*argv, "if_id") == 0) { + NEXT_ARG(); + if (get_u32(&if_id, *argv, 0)) + invarg("IF_ID value is invalid", *argv); + is_if_id_set = true; } else { if (selp) invarg("unknown", *argv); @@ -669,6 +676,9 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete, (void *)&ctx, ctx.sctx.len); } + if (is_if_id_set) + addattr32(&req.n, sizeof(req.buf), XFRMA_IF_ID, if_id); + if (rtnl_talk(&rth, &req.n, answer) < 0) exit(2); @@ -767,6 +777,11 @@ static int xfrm_policy_keep(struct nlmsghdr *n, void *arg) } } + if (tb[XFRMA_IF_ID]) { + addattr32(new_n, xb->size, XFRMA_IF_ID, + rta_getattr_u32(tb[XFRMA_IF_ID])); + } + xb->offset += new_n->nlmsg_len; xb->nlmsg_count++; diff --git a/man/man8/ip-xfrm.8 b/man/man8/ip-xfrm.8 index d717205d..aa28db49 100644 --- a/man/man8/ip-xfrm.8 +++ b/man/man8/ip-xfrm.8 @@ -259,6 +259,8 @@ ip-xfrm \- transform configuration .IR MASK " ] ]" .RB "[ " ptype .IR PTYPE " ]" +.RB "[ " if_id +.IR IF-ID " ]" .ti -8 .BR ip " [ " -4 " | " -6 " ] " "xfrm policy" " { " deleteall " | " list " }"