From 5023df6a21c73560b514d7fde5381d140373afe9 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 4 Feb 2020 23:37:02 +0200 Subject: [PATCH 01/21] devlink: Add health error recovery status monitoring Add support for devlink health error recovery status monitoring. Update devlink-monitor man page accordingly. Signed-off-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 15 ++++++++++++++- man/man8/devlink-monitor.8 | 3 ++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 73ce9865..f48ff6c2 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -4128,6 +4128,7 @@ static const char *cmd_name(uint8_t cmd) case DEVLINK_CMD_FLASH_UPDATE: return "begin"; case DEVLINK_CMD_FLASH_UPDATE_END: return "end"; case DEVLINK_CMD_FLASH_UPDATE_STATUS: return "status"; + case DEVLINK_CMD_HEALTH_REPORTER_RECOVER: return "status"; case DEVLINK_CMD_TRAP_GET: return "get"; case DEVLINK_CMD_TRAP_SET: return "set"; case DEVLINK_CMD_TRAP_NEW: return "new"; @@ -4168,6 +4169,8 @@ static const char *cmd_obj(uint8_t cmd) case DEVLINK_CMD_FLASH_UPDATE_END: case DEVLINK_CMD_FLASH_UPDATE_STATUS: return "flash"; + case DEVLINK_CMD_HEALTH_REPORTER_RECOVER: + return "health"; case DEVLINK_CMD_TRAP_GET: case DEVLINK_CMD_TRAP_SET: case DEVLINK_CMD_TRAP_NEW: @@ -4229,6 +4232,7 @@ static void pr_out_flash_update(struct dl *dl, struct nlattr **tb) } static void pr_out_region(struct dl *dl, struct nlattr **tb); +static void pr_out_health(struct dl *dl, struct nlattr **tb_health); static void pr_out_trap(struct dl *dl, struct nlattr **tb, bool array); static void pr_out_trap_group(struct dl *dl, struct nlattr **tb, bool array); @@ -4295,6 +4299,14 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) pr_out_mon_header(genl->cmd); pr_out_flash_update(dl, tb); break; + case DEVLINK_CMD_HEALTH_REPORTER_RECOVER: + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_HEALTH_REPORTER]) + return MNL_CB_ERROR; + pr_out_mon_header(genl->cmd); + pr_out_health(dl, tb); + break; case DEVLINK_CMD_TRAP_GET: /* fall through */ case DEVLINK_CMD_TRAP_SET: /* fall through */ case DEVLINK_CMD_TRAP_NEW: /* fall through */ @@ -4337,6 +4349,7 @@ static int cmd_mon_show(struct dl *dl) if (strcmp(cur_obj, "all") != 0 && strcmp(cur_obj, "dev") != 0 && strcmp(cur_obj, "port") != 0 && + strcmp(cur_obj, "health") != 0 && strcmp(cur_obj, "trap") != 0 && strcmp(cur_obj, "trap-group") != 0) { pr_err("Unknown object \"%s\"\n", cur_obj); @@ -4355,7 +4368,7 @@ static int cmd_mon_show(struct dl *dl) static void cmd_mon_help(void) { pr_err("Usage: devlink monitor [ all | OBJECT-LIST ]\n" - "where OBJECT-LIST := { dev | port | trap | trap-group }\n"); + "where OBJECT-LIST := { dev | port | health | trap | trap-group }\n"); } static int cmd_mon(struct dl *dl) diff --git a/man/man8/devlink-monitor.8 b/man/man8/devlink-monitor.8 index fffab3a4..a96d350f 100644 --- a/man/man8/devlink-monitor.8 +++ b/man/man8/devlink-monitor.8 @@ -21,7 +21,7 @@ command is the first in the command line and then the object list. .I OBJECT-LIST is the list of object types that we want to monitor. It may contain -.BR dev ", " port ", " trap ", " trap-group . +.BR dev ", " port ", " health ", " trap ", " trap-group . .B devlink opens Devlink Netlink socket, listens on it and dumps state changes. @@ -31,6 +31,7 @@ opens Devlink Netlink socket, listens on it and dumps state changes. .BR devlink-dev (8), .BR devlink-sb (8), .BR devlink-port (8), +.BR devlink-health (8), .BR devlink-trap (8), .br From b6b8e40bf782c7b32e8ff5708726f8319fb02a14 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Mar 2020 19:44:21 +0000 Subject: [PATCH 02/21] Update kernel headers Update kernel headers to commit ef71037047b0 ("Merge branch 'act_ct-software-offload-of-established-flows-fixes'") Signed-off-by: David Ahern --- include/uapi/linux/bpf.h | 27 +++++++++++++++++++++++++-- include/uapi/linux/devlink.h | 3 +++ include/uapi/linux/if_link.h | 12 ++++++++++++ include/uapi/linux/inet_diag.h | 5 ++++- include/uapi/linux/sock_diag.h | 26 ++++++++++++++++++++++++++ include/uapi/linux/tcp.h | 2 ++ 6 files changed, 72 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 65764580..73a52a21 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -73,7 +73,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[0]; /* Arbitrary size */ + __u8 data[]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { @@ -2890,6 +2890,25 @@ union bpf_attr { * Obtain the 64bit jiffies * Return * The 64 bit jiffies + * + * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * Description + * For an eBPF program attached to a perf event, retrieve the + * branch records (struct perf_branch_entry) associated to *ctx* + * and store it in the buffer pointed by *buf* up to size + * *size* bytes. + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to + * instead return the number of bytes required to store all the + * branch entries. If this flag is set, *buf* may be NULL. + * + * **-EINVAL** if arguments invalid or **size** not a multiple + * of sizeof(struct perf_branch_entry). + * + * **-ENOENT** if architecture does not support branch records. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3010,7 +3029,8 @@ union bpf_attr { FN(probe_read_kernel_str), \ FN(tcp_send_ack), \ FN(send_signal_thread), \ - FN(jiffies64), + FN(jiffies64), \ + FN(read_branch_records), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3089,6 +3109,9 @@ enum bpf_func_id { /* BPF_FUNC_sk_storage_get flags */ #define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +/* BPF_FUNC_read_branch_records flags. */ +#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 3f82dedd..1b412281 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -187,6 +187,7 @@ enum devlink_port_flavour { * for the PCI VF. It is an internal * port that faces the PCI VF. */ + DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */ }; enum devlink_param_cmode { @@ -252,6 +253,8 @@ enum devlink_trap_type { enum { /* Trap can report input port as metadata */ DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT, + /* Trap can report flow action cookie as metadata */ + DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE, }; enum devlink_attr { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 533abd2c..cb88bcb4 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -588,6 +588,18 @@ enum ifla_geneve_df { GENEVE_DF_MAX = __GENEVE_DF_END - 1, }; +/* Bareudp section */ +enum { + IFLA_BAREUDP_UNSPEC, + IFLA_BAREUDP_PORT, + IFLA_BAREUDP_ETHERTYPE, + IFLA_BAREUDP_SRCPORT_MIN, + IFLA_BAREUDP_MULTIPROTO_MODE, + __IFLA_BAREUDP_MAX +}; + +#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) + /* PPP section */ enum { IFLA_PPP_UNSPEC, diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 3dff6841..e045d170 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -64,9 +64,11 @@ struct inet_diag_req_raw { enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, + INET_DIAG_REQ_SK_BPF_STORAGES, + __INET_DIAG_REQ_MAX, }; -#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE +#define INET_DIAG_REQ_MAX (__INET_DIAG_REQ_MAX - 1) /* Bytecode is sequence of 4 byte commands followed by variable arguments. * All the commands identified by "code" are conditional jumps forward: @@ -154,6 +156,7 @@ enum { INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */ INET_DIAG_MD5SIG, INET_DIAG_ULP_INFO, + INET_DIAG_SK_BPF_STORAGES, __INET_DIAG_MAX, }; diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index a69cf20f..35c0ce67 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -36,4 +36,30 @@ enum sknetlink_groups { }; #define SKNLGRP_MAX (__SKNLGRP_MAX - 1) +enum { + SK_DIAG_BPF_STORAGE_REQ_NONE, + SK_DIAG_BPF_STORAGE_REQ_MAP_FD, + __SK_DIAG_BPF_STORAGE_REQ_MAX, +}; + +#define SK_DIAG_BPF_STORAGE_REQ_MAX (__SK_DIAG_BPF_STORAGE_REQ_MAX - 1) + +enum { + SK_DIAG_BPF_STORAGE_REP_NONE, + SK_DIAG_BPF_STORAGE, + __SK_DIAG_BPF_STORAGE_REP_MAX, +}; + +#define SK_DIAB_BPF_STORAGE_REP_MAX (__SK_DIAG_BPF_STORAGE_REP_MAX - 1) + +enum { + SK_DIAG_BPF_STORAGE_NONE, + SK_DIAG_BPF_STORAGE_PAD, + SK_DIAG_BPF_STORAGE_MAP_ID, + SK_DIAG_BPF_STORAGE_MAP_VALUE, + __SK_DIAG_BPF_STORAGE_MAX, +}; + +#define SK_DIAG_BPF_STORAGE_MAX (__SK_DIAG_BPF_STORAGE_MAX - 1) + #endif /* __SOCK_DIAG_H__ */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index d79550b3..36532937 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -345,5 +345,7 @@ struct tcp_zerocopy_receive { __u64 address; /* in: address of mapping */ __u32 length; /* in/out: number of bytes to map/mapped */ __u32 recv_skip_hint; /* out: amount of bytes to skip */ + __u32 inq; /* out: amount of bytes in read queue */ + __s32 err; /* out: socket error */ }; #endif /* _LINUX_TCP_H */ From 4fe07b81461bb58cd48720019cd416da39aeadea Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 3 Mar 2020 14:27:42 +0100 Subject: [PATCH 03/21] devlink: add trap metadata type for flow action cookie Flow action cookie has been recently added to kernel, print it out. Signed-off-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/devlink/devlink.c b/devlink/devlink.c index 6e2115b6..eef27c27 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -6932,6 +6932,8 @@ static const char *trap_metadata_name(const struct nlattr *attr) switch (attr->nla_type) { case DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT: return "input_port"; + case DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE: + return "flow_action_cookie"; default: return ""; } From a5c44b821c9a32304c9d8211224e917808d412fe Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 3 Mar 2020 22:06:26 -0600 Subject: [PATCH 04/21] devlink: Introduce devlink port flavour virtual Currently PCI PF and VF devlink devices register their ports as physical port in non-representors mode. Introduce a new port flavour as virtual so that virtual devices can register 'virtual' flavour to make it more clear to users. An example of one PCI PF and 2 PCI virtual functions, each having one devlink port. $ devlink port show pci/0000:06:00.0/1: type eth netdev ens2f0 flavour physical port 0 pci/0000:06:00.2/1: type eth netdev ens2f2 flavour virtual port 0 pci/0000:06:00.3/1: type eth netdev ens2f3 flavour virtual port 0 Reviewed-by: Jiri Pirko Signed-off-by: Parav Pandit Signed-off-by: David Ahern --- devlink/devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/devlink/devlink.c b/devlink/devlink.c index eef27c27..67e6e641 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -3150,6 +3150,8 @@ static const char *port_flavour_name(uint16_t flavour) return "pcipf"; case DEVLINK_PORT_FLAVOUR_PCI_VF: return "pcivf"; + case DEVLINK_PORT_FLAVOUR_VIRTUAL: + return "virtual"; default: return ""; } From da6abdba09d800c87a3a0a4c8d2bb4879a037e92 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 3 Mar 2020 11:36:16 +0100 Subject: [PATCH 05/21] macsec: report the offloading mode currently selected This patch adds support to report the MACsec offloading mode currently being enabled, which as of now can either be 'off' or 'phy'. This information is reported through the `ip macsec show` command: # ip macsec show 18: macsec0: protect on validate strict sc off sa off encrypt on send_sci on end_station off scb off replay off cipher suite: GCM-AES-128, using ICV length 16 TXSC: 3e5035b67c860001 on SA 0 0: PN 1, state on, key 00000000000000000000000000000000 RXSC: b4969112700f0001, state on 0: PN 1, state on, key 01000000000000000000000000000000 offload: phy 19: macsec1: protect on validate strict sc off sa off encrypt on send_sci on end_station off scb off replay off cipher suite: GCM-AES-128, using ICV length 16 TXSC: 3e5035b67c880001 on SA 0 1: PN 1, state on, key 00000000000000000000000000000000 RXSC: b4969112700f0001, state on 1: PN 1, state on, key 01000000000000000000000000000000 offload: off Signed-off-by: Antoine Tenart Signed-off-by: David Ahern --- ip/ipmacsec.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ip/ipmacsec.c b/ip/ipmacsec.c index ad6ad7d6..4327c796 100644 --- a/ip/ipmacsec.c +++ b/ip/ipmacsec.c @@ -31,6 +31,11 @@ static const char * const validate_str[] = { [MACSEC_VALIDATE_STRICT] = "strict", }; +static const char * const offload_str[] = { + [MACSEC_OFFLOAD_OFF] = "off", + [MACSEC_OFFLOAD_PHY] = "phy", +}; + struct sci { __u64 sci; __u16 port; @@ -605,6 +610,14 @@ static const char *cs_id_to_name(__u64 cid) } } +static const char *offload_to_str(__u8 offload) +{ + if (offload >= ARRAY_SIZE(offload_str)) + return "(unknown)"; + + return offload_str[offload]; +} + static void print_attrs(struct rtattr *attrs[]) { print_flag(attrs, "protect", MACSEC_SECY_ATTR_PROTECT); @@ -997,6 +1010,19 @@ static int process(struct nlmsghdr *n, void *arg) if (attrs[MACSEC_ATTR_RXSC_LIST]) print_rxsc_list(attrs[MACSEC_ATTR_RXSC_LIST]); + if (attrs[MACSEC_ATTR_OFFLOAD]) { + struct rtattr *attrs_offload[MACSEC_OFFLOAD_ATTR_MAX + 1]; + __u8 offload; + + parse_rtattr_nested(attrs_offload, MACSEC_OFFLOAD_ATTR_MAX, + attrs[MACSEC_ATTR_OFFLOAD]); + + offload = rta_getattr_u8(attrs_offload[MACSEC_OFFLOAD_ATTR_TYPE]); + print_string(PRINT_ANY, "offload", + " offload: %s ", offload_to_str(offload)); + print_nl(); + } + close_json_object(); return 0; From 791bc7ee482b0e48d1020888521134161f216ff5 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 3 Mar 2020 11:36:17 +0100 Subject: [PATCH 06/21] macsec: add support for changing the offloading mode MacSEC can now be offloaded to specialized hardware devices. Offloading is off by default when creating a new MACsec interface, but the mode can be updated at runtime. This patch adds a new subcommand, `ip macsec offload`, to allow users to select the offloading mode of a MACsec interface. It takes the mode to switch to as an argument, which can for now either be 'off' or 'phy': # ip macsec offload macsec0 phy # ip macsec offload macsec0 off Signed-off-by: Antoine Tenart Signed-off-by: David Ahern --- ip/ipmacsec.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/ip/ipmacsec.c b/ip/ipmacsec.c index 4327c796..6104a3a5 100644 --- a/ip/ipmacsec.c +++ b/ip/ipmacsec.c @@ -98,6 +98,7 @@ static void ipmacsec_usage(void) " ip macsec del DEV rx SCI sa { 0..3 }\n" " ip macsec show\n" " ip macsec show DEV\n" + " ip macsec offload DEV [ off | phy ]\n" "where OPTS := [ pn ] [ on | off ]\n" " ID := 128-bit hex string\n" " KEY := 128-bit or 256-bit hex string\n" @@ -359,6 +360,7 @@ enum cmd { CMD_ADD, CMD_DEL, CMD_UPD, + CMD_OFFLOAD, __CMD_MAX }; @@ -375,6 +377,9 @@ static const enum macsec_nl_commands macsec_commands[__CMD_MAX][2][2] = { [0] = {-1, MACSEC_CMD_DEL_RXSC}, [1] = {MACSEC_CMD_DEL_TXSA, MACSEC_CMD_DEL_RXSA}, }, + [CMD_OFFLOAD] = { + [0] = {-1, MACSEC_CMD_UPD_OFFLOAD }, + }, }; static int do_modify_nl(enum cmd c, enum macsec_nl_commands cmd, int ifindex, @@ -534,6 +539,44 @@ static int do_modify(enum cmd c, int argc, char **argv) return -1; } +static int do_offload(enum cmd c, int argc, char **argv) +{ + enum macsec_offload offload; + struct rtattr *attr; + int ifindex, ret; + + if (argc == 0) + ipmacsec_usage(); + + ifindex = ll_name_to_index(*argv); + if (!ifindex) { + fprintf(stderr, "Device \"%s\" does not exist.\n", *argv); + return -1; + } + argc--; argv++; + + if (argc == 0) + ipmacsec_usage(); + + ret = one_of("offload", *argv, offload_str, ARRAY_SIZE(offload_str), + (int *)&offload); + if (ret) + ipmacsec_usage(); + + MACSEC_GENL_REQ(req, MACSEC_BUFLEN, macsec_commands[c][0][1], NLM_F_REQUEST); + + addattr32(&req.n, MACSEC_BUFLEN, MACSEC_ATTR_IFINDEX, ifindex); + + attr = addattr_nest(&req.n, MACSEC_BUFLEN, MACSEC_ATTR_OFFLOAD); + addattr8(&req.n, MACSEC_BUFLEN, MACSEC_OFFLOAD_ATTR_TYPE, offload); + addattr_nest_end(&req.n, attr); + + if (rtnl_talk(&genl_rth, &req.n, NULL) < 0) + return -2; + + return 0; +} + /* dump/show */ static struct { int ifindex; @@ -1094,6 +1137,8 @@ int do_ipmacsec(int argc, char **argv) return do_modify(CMD_UPD, argc-1, argv+1); if (matches(*argv, "delete") == 0) return do_modify(CMD_DEL, argc-1, argv+1); + if (matches(*argv, "offload") == 0) + return do_offload(CMD_OFFLOAD, argc-1, argv+1); fprintf(stderr, "Command \"%s\" is unknown, try \"ip macsec help\".\n", *argv); From 69166f909b16fff8cd0af28f705b48606380c6e4 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 3 Mar 2020 11:36:18 +0100 Subject: [PATCH 07/21] man: document the ip macsec offload command Add a description of the `ip macsec offload` command used to select the offloading mode on a macsec interface when the underlying device supports it. Signed-off-by: Antoine Tenart Signed-off-by: David Ahern --- man/man8/ip-macsec.8 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/man/man8/ip-macsec.8 b/man/man8/ip-macsec.8 index 2179b336..d5f9d240 100644 --- a/man/man8/ip-macsec.8 +++ b/man/man8/ip-macsec.8 @@ -53,6 +53,9 @@ ip-macsec \- MACsec device configuration .BI "ip macsec del " DEV " rx " SCI " sa" .RI "{ " 0..3 " }" +.BI "ip macsec offload " DEV +.RB "{ " off " | " phy " }" + .B ip macsec show .RI [ " DEV " ] @@ -102,6 +105,10 @@ type. .SS Display MACsec configuration .nf # ip macsec show +.PP +.SS Configure offloading on an interface +.nf +# ip macsec offload macsec0 phy .SH NOTES This tool can be used to configure the 802.1AE keys of the interface. Note that 802.1AE uses GCM-AES From c15674d80d49dac94d659de443675f19e23a734e Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 3 Mar 2020 11:36:19 +0100 Subject: [PATCH 08/21] macsec: add an accessor for validate_str This patch adds an accessor for the validate_str array, to handle future changes adding a member. Signed-off-by: Antoine Tenart Signed-off-by: David Ahern --- ip/ipmacsec.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ip/ipmacsec.c b/ip/ipmacsec.c index 6104a3a5..4e500e4e 100644 --- a/ip/ipmacsec.c +++ b/ip/ipmacsec.c @@ -653,6 +653,14 @@ static const char *cs_id_to_name(__u64 cid) } } +static const char *validate_to_str(__u8 validate) +{ + if (validate >= ARRAY_SIZE(validate_str)) + return "(unknown)"; + + return validate_str[validate]; +} + static const char *offload_to_str(__u8 offload) { if (offload >= ARRAY_SIZE(offload_str)) @@ -669,7 +677,7 @@ static void print_attrs(struct rtattr *attrs[]) __u8 val = rta_getattr_u8(attrs[MACSEC_SECY_ATTR_VALIDATE]); print_string(PRINT_ANY, "validate", - "validate %s ", validate_str[val]); + "validate %s ", validate_to_str(val)); } print_flag(attrs, "sc", MACSEC_RXSC_ATTR_ACTIVE); @@ -1208,7 +1216,7 @@ static void macsec_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) print_string(PRINT_ANY, "validation", "validate %s ", - validate_str[val]); + validate_to_str(val)); } const char *inc_sci, *es, *replay; From 92cfe3260e9110c3d33627847b6eaa153664c79c Mon Sep 17 00:00:00 2001 From: Leslie Monis Date: Thu, 5 Mar 2020 21:55:40 +0530 Subject: [PATCH 09/21] tc: pie: change maximum integer value of tc_pie_xstats->prob Kernel commit 105e808c1da2 ("pie: remove pie_vars->accu_prob_overflows"), changes the maximum value of tc_pie_xstats->prob from (2^64 - 1) to (2^56 - 1). Signed-off-by: Mohit P. Tahiliani Signed-off-by: Gautam Ramakrishnan Signed-off-by: Leslie Monis Signed-off-by: David Ahern --- tc/q_pie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tc/q_pie.c b/tc/q_pie.c index 709a78b4..e6939652 100644 --- a/tc/q_pie.c +++ b/tc/q_pie.c @@ -223,9 +223,9 @@ static int pie_print_xstats(struct qdisc_util *qu, FILE *f, st = RTA_DATA(xstats); - /* prob is returned as a fracion of maximum integer value */ + /* prob is returned as a fracion of (2^56 - 1) */ print_float(PRINT_ANY, "prob", " prob %lg", - (double)st->prob / (double)UINT64_MAX); + (double)st->prob / (double)(UINT64_MAX >> 8)); print_uint(PRINT_JSON, "delay", NULL, st->delay); print_string(PRINT_FP, NULL, " delay %s", sprint_time(st->delay, b1)); From 94c4ce822c2c8e080fdb612e2585bbec0a8a3cd9 Mon Sep 17 00:00:00 2001 From: Leslie Monis Date: Tue, 10 Mar 2020 23:45:49 +0530 Subject: [PATCH 10/21] Revert "tc: pie: change maximum integer value of tc_pie_xstats->prob" This reverts commit 92cfe3260e9110c3d33627847b6eaa153664c79c. Kernel commit 3f95f55eb55d ("net: sched: pie: change tc_pie_xstats->prob") removes the need to change the maximum integer value of tc_pie_stats->prob here. Suggested-by: Eric Dumazet Signed-off-by: Leslie Monis Signed-off-by: David Ahern --- tc/q_pie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tc/q_pie.c b/tc/q_pie.c index e6939652..709a78b4 100644 --- a/tc/q_pie.c +++ b/tc/q_pie.c @@ -223,9 +223,9 @@ static int pie_print_xstats(struct qdisc_util *qu, FILE *f, st = RTA_DATA(xstats); - /* prob is returned as a fracion of (2^56 - 1) */ + /* prob is returned as a fracion of maximum integer value */ print_float(PRINT_ANY, "prob", " prob %lg", - (double)st->prob / (double)(UINT64_MAX >> 8)); + (double)st->prob / (double)UINT64_MAX); print_uint(PRINT_JSON, "delay", NULL, st->delay); print_string(PRINT_FP, NULL, " delay %s", sprint_time(st->delay, b1)); From 25091a761f0d7d4d5c102da6f2282ea042b65404 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 20 Mar 2020 16:17:55 +0000 Subject: [PATCH 11/21] Update kernel headers Update kernel headers to commit: 3fd177cb2b47 ("net: stmmac: dwmac_lib: remove unnecessary checks in dwmac_dma_reset()") Signed-off-by: David Ahern --- include/uapi/linux/bpf.h | 221 +++++++++++++++++++++++---------- include/uapi/linux/if_bridge.h | 32 +++++ include/uapi/linux/if_macsec.h | 8 +- include/uapi/linux/in.h | 2 + include/uapi/linux/pkt_cls.h | 22 ++++ include/uapi/linux/pkt_sched.h | 19 +++ include/uapi/linux/tcp.h | 1 + 7 files changed, 236 insertions(+), 69 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 73a52a21..28667ac4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -210,6 +210,7 @@ enum bpf_attach_type { BPF_TRACE_RAW_TP, BPF_TRACE_FENTRY, BPF_TRACE_FEXIT, + BPF_MODIFY_RETURN, __MAX_BPF_ATTACH_TYPE }; @@ -325,44 +326,46 @@ enum bpf_attach_type { #define BPF_PSEUDO_CALL 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ -#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ +enum { + BPF_ANY = 0, /* create new element or update existing */ + BPF_NOEXIST = 1, /* create new element if it didn't exist */ + BPF_EXIST = 2, /* update existing element */ + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ +}; /* flags for BPF_MAP_CREATE command */ -#define BPF_F_NO_PREALLOC (1U << 0) +enum { + BPF_F_NO_PREALLOC = (1U << 0), /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list * which can scale and perform better. * Note, the LRU nodes (including free nodes) cannot be moved * across different LRU lists. */ -#define BPF_F_NO_COMMON_LRU (1U << 1) + BPF_F_NO_COMMON_LRU = (1U << 1), /* Specify numa node during map creation */ -#define BPF_F_NUMA_NODE (1U << 2) - -#define BPF_OBJ_NAME_LEN 16U + BPF_F_NUMA_NODE = (1U << 2), /* Flags for accessing BPF object from syscall side. */ -#define BPF_F_RDONLY (1U << 3) -#define BPF_F_WRONLY (1U << 4) + BPF_F_RDONLY = (1U << 3), + BPF_F_WRONLY = (1U << 4), /* Flag for stack_map, store build_id+offset instead of pointer */ -#define BPF_F_STACK_BUILD_ID (1U << 5) + BPF_F_STACK_BUILD_ID = (1U << 5), /* Zero-initialize hash function seed. This should only be used for testing. */ -#define BPF_F_ZERO_SEED (1U << 6) + BPF_F_ZERO_SEED = (1U << 6), /* Flags for accessing BPF object from program side. */ -#define BPF_F_RDONLY_PROG (1U << 7) -#define BPF_F_WRONLY_PROG (1U << 8) + BPF_F_RDONLY_PROG = (1U << 7), + BPF_F_WRONLY_PROG = (1U << 8), /* Clone map from listener for newly accepted socket */ -#define BPF_F_CLONE (1U << 9) + BPF_F_CLONE = (1U << 9), /* Enable memory-mapping BPF map */ -#define BPF_F_MMAPABLE (1U << 10) + BPF_F_MMAPABLE = (1U << 10), +}; /* Flags for BPF_PROG_QUERY. */ @@ -391,6 +394,8 @@ struct bpf_stack_build_id { }; }; +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -2909,6 +2914,42 @@ union bpf_attr { * of sizeof(struct perf_branch_entry). * * **-ENOENT** if architecture does not support branch records. + * + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * Description + * Returns 0 on success, values for *pid* and *tgid* as seen from the current + * *namespace* will be returned in *nsdata*. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number + * with nsfs of current task, or if dev conversion to dev_t lost high bits. + * + * **-ENOENT** if pidns does not exists for the current task. + * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3030,7 +3071,9 @@ union bpf_attr { FN(tcp_send_ack), \ FN(send_signal_thread), \ FN(jiffies64), \ - FN(read_branch_records), + FN(read_branch_records), \ + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3045,72 +3088,100 @@ enum bpf_func_id { /* All flags used by eBPF helper functions, placed here. */ /* BPF_FUNC_skb_store_bytes flags. */ -#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) -#define BPF_F_INVALIDATE_HASH (1ULL << 1) +enum { + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), + BPF_F_INVALIDATE_HASH = (1ULL << 1), +}; /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. * First 4 bits are for passing the header field size. */ -#define BPF_F_HDR_FIELD_MASK 0xfULL +enum { + BPF_F_HDR_FIELD_MASK = 0xfULL, +}; /* BPF_FUNC_l4_csum_replace flags. */ -#define BPF_F_PSEUDO_HDR (1ULL << 4) -#define BPF_F_MARK_MANGLED_0 (1ULL << 5) -#define BPF_F_MARK_ENFORCE (1ULL << 6) +enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), +}; /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -#define BPF_F_INGRESS (1ULL << 0) +enum { + BPF_F_INGRESS = (1ULL << 0), +}; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -#define BPF_F_TUNINFO_IPV6 (1ULL << 0) +enum { + BPF_F_TUNINFO_IPV6 = (1ULL << 0), +}; /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ -#define BPF_F_SKIP_FIELD_MASK 0xffULL -#define BPF_F_USER_STACK (1ULL << 8) +enum { + BPF_F_SKIP_FIELD_MASK = 0xffULL, + BPF_F_USER_STACK = (1ULL << 8), /* flags used by BPF_FUNC_get_stackid only. */ -#define BPF_F_FAST_STACK_CMP (1ULL << 9) -#define BPF_F_REUSE_STACKID (1ULL << 10) + BPF_F_FAST_STACK_CMP = (1ULL << 9), + BPF_F_REUSE_STACKID = (1ULL << 10), /* flags used by BPF_FUNC_get_stack only. */ -#define BPF_F_USER_BUILD_ID (1ULL << 11) + BPF_F_USER_BUILD_ID = (1ULL << 11), +}; /* BPF_FUNC_skb_set_tunnel_key flags. */ -#define BPF_F_ZERO_CSUM_TX (1ULL << 1) -#define BPF_F_DONT_FRAGMENT (1ULL << 2) -#define BPF_F_SEQ_NUMBER (1ULL << 3) +enum { + BPF_F_ZERO_CSUM_TX = (1ULL << 1), + BPF_F_DONT_FRAGMENT = (1ULL << 2), + BPF_F_SEQ_NUMBER = (1ULL << 3), +}; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +enum { + BPF_F_INDEX_MASK = 0xffffffffULL, + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, /* BPF_FUNC_perf_event_output for sk_buff input context. */ -#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), +}; /* Current network namespace */ -#define BPF_F_CURRENT_NETNS (-1L) +enum { + BPF_F_CURRENT_NETNS = (-1L), +}; /* BPF_FUNC_skb_adjust_room flags. */ -#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) +enum { + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), +}; -#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff -#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 +enum { + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, +}; -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) -#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) -#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) /* BPF_FUNC_sysctl_get_name flags. */ -#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +enum { + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), +}; /* BPF_FUNC_sk_storage_get flags */ -#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +enum { + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), +}; /* BPF_FUNC_read_branch_records flags. */ -#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) +enum { + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), +}; /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { @@ -3176,6 +3247,7 @@ struct __sk_buff { __u32 wire_len; __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); + __u32 gso_size; }; struct bpf_tunnel_key { @@ -3528,13 +3600,14 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently - * supported cb flags - */ +enum { + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), +/* Mask of all currently supported cb flags */ + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, +}; /* List of known BPF sock_ops operators. * New entries can only be added at the end @@ -3613,8 +3686,10 @@ enum { BPF_TCP_MAX_STATES /* Leave at the end! */ }; -#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +enum { + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ +}; struct bpf_perf_event_value { __u64 counter; @@ -3622,12 +3697,16 @@ struct bpf_perf_event_value { __u64 running; }; -#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) -#define BPF_DEVCG_ACC_READ (1ULL << 1) -#define BPF_DEVCG_ACC_WRITE (1ULL << 2) +enum { + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), + BPF_DEVCG_ACC_READ = (1ULL << 1), + BPF_DEVCG_ACC_WRITE = (1ULL << 2), +}; -#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) -#define BPF_DEVCG_DEV_CHAR (1ULL << 1) +enum { + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), + BPF_DEVCG_DEV_CHAR = (1ULL << 1), +}; struct bpf_cgroup_dev_ctx { /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ @@ -3643,8 +3722,10 @@ struct bpf_raw_tracepoint_args { /* DIRECT: Skip the FIB rules and go to FIB table associated with device * OUTPUT: Do lookup from egress perspective; default is ingress */ -#define BPF_FIB_LOOKUP_DIRECT (1U << 0) -#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) +enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), +}; enum { BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ @@ -3716,9 +3797,11 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; -#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) +enum { + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), +}; struct bpf_flow_keys { __u16 nhoff; @@ -3784,4 +3867,8 @@ struct bpf_sockopt { __s32 retval; }; +struct bpf_pidns_info { + __u32 pid; + __u32 tgid; +}; #endif /* __LINUX_BPF_H__ */ diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index e2e925ff..5dffabaf 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -131,6 +131,7 @@ enum { #define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */ #define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */ #define BRIDGE_VLAN_INFO_ONLY_OPTS (1<<6) /* Skip create/delete/flags */ +#define BRIDGE_VLAN_INFO_REMOVE_TUN (1<<7) /* Remove tunnel mapping */ struct bridge_vlan_info { __u16 flags; @@ -174,6 +175,16 @@ struct br_vlan_msg { __u32 ifindex; }; +enum { + BRIDGE_VLANDB_DUMP_UNSPEC, + BRIDGE_VLANDB_DUMP_FLAGS, + __BRIDGE_VLANDB_DUMP_MAX, +}; +#define BRIDGE_VLANDB_DUMP_MAX (__BRIDGE_VLANDB_DUMP_MAX - 1) + +/* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */ +#define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */ + /* Bridge vlan RTM attributes * [BRIDGE_VLANDB_ENTRY] = { * [BRIDGE_VLANDB_ENTRY_INFO] @@ -192,10 +203,31 @@ enum { BRIDGE_VLANDB_ENTRY_INFO, BRIDGE_VLANDB_ENTRY_RANGE, BRIDGE_VLANDB_ENTRY_STATE, + BRIDGE_VLANDB_ENTRY_TUNNEL_ID, + BRIDGE_VLANDB_ENTRY_STATS, __BRIDGE_VLANDB_ENTRY_MAX, }; #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) +/* [BRIDGE_VLANDB_ENTRY] = { + * [BRIDGE_VLANDB_ENTRY_STATS] = { + * [BRIDGE_VLANDB_STATS_RX_BYTES] + * ... + * } + * ... + * } + */ +enum { + BRIDGE_VLANDB_STATS_UNSPEC, + BRIDGE_VLANDB_STATS_RX_BYTES, + BRIDGE_VLANDB_STATS_RX_PACKETS, + BRIDGE_VLANDB_STATS_TX_BYTES, + BRIDGE_VLANDB_STATS_TX_PACKETS, + BRIDGE_VLANDB_STATS_PAD, + __BRIDGE_VLANDB_STATS_MAX, +}; +#define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1) + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 33c32051..eee31cec 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -22,9 +22,11 @@ #define MACSEC_KEYID_LEN 16 -/* cipher IDs as per IEEE802.1AEbn-2011 */ +/* cipher IDs as per IEEE802.1AE-2018 (Table 14-1) */ #define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL #define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL +#define MACSEC_CIPHER_ID_GCM_AES_XPN_128 0x0080C20001000003ULL +#define MACSEC_CIPHER_ID_GCM_AES_XPN_256 0x0080C20001000004ULL /* deprecated cipher ID for GCM-AES-128 */ #define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL @@ -88,11 +90,13 @@ enum macsec_sa_attrs { MACSEC_SA_ATTR_UNSPEC, MACSEC_SA_ATTR_AN, /* config/dump, u8 0..3 */ MACSEC_SA_ATTR_ACTIVE, /* config/dump, u8 0..1 */ - MACSEC_SA_ATTR_PN, /* config/dump, u32 */ + MACSEC_SA_ATTR_PN, /* config/dump, u32/u64 (u64 if XPN) */ MACSEC_SA_ATTR_KEY, /* config, data */ MACSEC_SA_ATTR_KEYID, /* config/dump, 128-bit */ MACSEC_SA_ATTR_STATS, /* dump, nested, macsec_sa_stats_attr */ MACSEC_SA_ATTR_PAD, + MACSEC_SA_ATTR_SSCI, /* config/dump, u32 - XPN only */ + MACSEC_SA_ATTR_SALT, /* config, 96-bit - XPN only */ __MACSEC_SA_ATTR_END, NUM_MACSEC_SA_ATTR = __MACSEC_SA_ATTR_END, MACSEC_SA_ATTR_MAX = __MACSEC_SA_ATTR_END - 1, diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 83a4c187..ca59dc76 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -74,6 +74,8 @@ enum { #define IPPROTO_UDPLITE IPPROTO_UDPLITE IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ #define IPPROTO_MPLS IPPROTO_MPLS + IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ +#define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MPTCP = 262, /* Multipath TCP connection */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 449a6397..81cc1a86 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -17,6 +17,7 @@ enum { TCA_ACT_PAD, TCA_ACT_COOKIE, TCA_ACT_FLAGS, + TCA_ACT_HW_STATS_TYPE, __TCA_ACT_MAX }; @@ -24,6 +25,27 @@ enum { * actions stats. */ +/* tca HW stats type + * When user does not pass the attribute, he does not care. + * It is the same as if he would pass the attribute with + * all supported bits set. + * In case no bits are set, user is not interested in getting any HW statistics. + */ +#define TCA_ACT_HW_STATS_TYPE_IMMEDIATE (1 << 0) /* Means that in dump, user + * gets the current HW stats + * state from the device + * queried at the dump time. + */ +#define TCA_ACT_HW_STATS_TYPE_DELAYED (1 << 1) /* Means that in dump, user gets + * HW stats that might be out + * of date for some time, maybe + * couple of seconds. This is + * the case when driver polls + * stats updates periodically + * or when it gets async stats update + * from the device. + */ + #define TCA_ACT_MAX __TCA_ACT_MAX #define TCA_OLD_COMPAT (TCA_ACT_MAX+1) #define TCA_ACT_MAX_PRIO 32 diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index bbe791b2..7307a29a 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -256,6 +256,7 @@ enum { TCA_RED_PARMS, TCA_RED_STAB, TCA_RED_MAX_P, + TCA_RED_FLAGS, /* bitfield32 */ __TCA_RED_MAX, }; @@ -268,12 +269,28 @@ struct tc_red_qopt { unsigned char Wlog; /* log(W) */ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ unsigned char Scell_log; /* cell size for idle damping */ + + /* This field can be used for flags that a RED-like qdisc has + * historically supported. E.g. when configuring RED, it can be used for + * ECN, HARDDROP and ADAPTATIVE. For SFQ it can be used for ECN, + * HARDDROP. Etc. Because this field has not been validated, and is + * copied back on dump, any bits besides those to which a given qdisc + * has assigned a historical meaning need to be considered for free use + * by userspace tools. + * + * Any further flags need to be passed differently, e.g. through an + * attribute (such as TCA_RED_FLAGS above). Such attribute should allow + * passing both recent and historic flags in one value. + */ unsigned char flags; #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 +#define TC_RED_NODROP 8 }; +#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE) + struct tc_red_xstats { __u32 early; /* Early drops */ __u32 pdrop; /* Drops due to queue limits */ @@ -894,6 +911,8 @@ enum { TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + TCA_FQ_TIMER_SLACK, /* timer slack */ + __TCA_FQ_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 36532937..f4e1003b 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -312,6 +312,7 @@ enum { TCP_NLA_REORD_SEEN, /* reordering events seen */ TCP_NLA_SRTT, /* smoothed RTT in usecs */ TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ + TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ }; /* for TCP_MD5SIG socket option */ From 341903dd3bd65219e9e8a92b1d451e2f35a2d190 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 14 Mar 2020 10:25:48 +0100 Subject: [PATCH 12/21] tc: m_action: introduce support for hw stats type Introduce support for per-action hw stats type config. This patch allows user to specify one of the following types of HW stats for added action: immediate - queried during dump time delayed - polled from HW periodically or sent by HW in async manner disabled - no stats needed Note that if "hw_stats" option is not passed, user does not care about the type, just expects any type of stats. Examples: $ tc filter add dev enp0s16np28 ingress proto ip handle 1 pref 1 flower skip_sw dst_ip 192.168.1.1 action drop hw_stats disabled $ tc -s filter show dev enp0s16np28 ingress filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 eth_type ipv4 dst_ip 192.168.1.1 skip_sw in_hw in_hw_count 2 action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 7 sec used 2 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 hw_stats disabled $ tc filter add dev enp0s16np28 ingress proto ip handle 1 pref 1 flower skip_sw dst_ip 192.168.1.1 action drop hw_stats immediate $ tc -s filter show dev enp0s16np28 ingress filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 eth_type ipv4 dst_ip 192.168.1.1 skip_sw in_hw in_hw_count 2 action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 11 sec used 4 sec Action statistics: Sent 102 bytes 1 pkt (dropped 1, overlimits 0 requeues 0) Sent software 0 bytes 0 pkt Sent hardware 102 bytes 1 pkt backlog 0b 0p requeues 0 hw_stats immediate Signed-off-by: Jiri Pirko Signed-off-by: David Ahern --- man/man8/tc-actions.8 | 31 ++++++++++++++++++++ tc/m_action.c | 66 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/man/man8/tc-actions.8 b/man/man8/tc-actions.8 index bee59f72..21795193 100644 --- a/man/man8/tc-actions.8 +++ b/man/man8/tc-actions.8 @@ -49,6 +49,8 @@ actions \- independently defined actions in tc ] [ .I FLAGS ] [ +.I HWSTATSSPEC +] [ .I CONTROL ] @@ -77,6 +79,12 @@ ACTNAME := .I no_percpu +.I HWSTATSSPEC +:= +.BR hw_stats " {" +.IR immediate " | " delayed " | " disabled +.R } + .I ACTDETAIL := .I ACTNAME ACTPARAMS @@ -200,6 +208,29 @@ which indicates that action is expected to have minimal software data-path traffic and doesn't need to allocate stat counters with percpu allocator. This option is intended to be used by hardware-offloaded actions. +.TP +.BI hw_stats " HW_STATS" +Specifies the type of HW stats of new action. If omitted, any stats counter type +is going to be used, according to driver and its resources. +The +.I HW_STATS +indicates the type. Any of the following are valid: +.RS +.TP +.B immediate +Means that in dump, user gets the current HW stats state from the device +queried at the dump time. +.TP +.B delayed +Means that in dump, user gets HW stats that might be out of date for +some time, maybe couple of seconds. This is the case when driver polls +stats updates periodically or when it gets async stats update +from the device. +.TP +.B disabled +No HW stats are going to be available in dump. +.RE + .TP .BI since " MSTIME" When dumping large number of actions, a millisecond time-filter can be diff --git a/tc/m_action.c b/tc/m_action.c index 4da810c8..58ae1846 100644 --- a/tc/m_action.c +++ b/tc/m_action.c @@ -51,8 +51,9 @@ static void act_usage(void) " FL := ls | list | flush | \n" " ACTNAMESPEC := action \n" " ACTISPEC := \n" - " ACTSPEC := action [INDEXSPEC]\n" + " ACTSPEC := action [INDEXSPEC] [HWSTATSSPEC]\n" " INDEXSPEC := index <32 bit indexvalue>\n" + " HWSTATSSPEC := hw_stats [ immediate | delayed | disabled ]\n" " ACTDETAIL := \n" " Example ACTNAME is gact, mirred, bpf, etc\n" " Each action has its own parameters (ACTPARAMS)\n" @@ -149,6 +150,59 @@ new_cmd(char **argv) (matches(*argv, "add") == 0); } +static const struct hw_stats_type_item { + const char *str; + __u8 type; +} hw_stats_type_items[] = { + { "immediate", TCA_ACT_HW_STATS_TYPE_IMMEDIATE }, + { "delayed", TCA_ACT_HW_STATS_TYPE_DELAYED }, + { "disabled", 0 }, /* no bit set */ +}; + +static void print_hw_stats(const struct rtattr *arg) +{ + struct nla_bitfield32 *hw_stats_type_bf = RTA_DATA(arg); + __u8 hw_stats_type; + int i; + + hw_stats_type = hw_stats_type_bf->value & hw_stats_type_bf->selector; + print_string(PRINT_FP, NULL, "\t", NULL); + open_json_array(PRINT_ANY, "hw_stats"); + + for (i = 0; i < ARRAY_SIZE(hw_stats_type_items); i++) { + const struct hw_stats_type_item *item; + + item = &hw_stats_type_items[i]; + if ((!hw_stats_type && !item->type) || + hw_stats_type & item->type) + print_string(PRINT_ANY, NULL, " %s", item->str); + } + close_json_array(PRINT_JSON, NULL); +} + +static int parse_hw_stats(const char *str, struct nlmsghdr *n) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(hw_stats_type_items); i++) { + const struct hw_stats_type_item *item; + + item = &hw_stats_type_items[i]; + if (matches(str, item->str) == 0) { + struct nla_bitfield32 hw_stats_type_bf = { + .value = item->type, + .selector = item->type + }; + + addattr_l(n, MAX_MSG, TCA_ACT_HW_STATS_TYPE, + &hw_stats_type_bf, sizeof(hw_stats_type_bf)); + return 0; + } + + } + return -1; +} + int parse_action(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n) { int argc = *argc_p; @@ -250,6 +304,14 @@ done0: addattr_l(n, MAX_MSG, TCA_ACT_COOKIE, &act_ck, act_ck_len); + if (*argv && matches(*argv, "hw_stats") == 0) { + NEXT_ARG(); + ret = parse_hw_stats(*argv, n); + if (ret < 0) + invarg("value is invalid\n", *argv); + NEXT_ARG_FWD(); + } + if (*argv && strcmp(*argv, "no_percpu") == 0) { struct nla_bitfield32 flags = { TCA_ACT_FLAGS_NO_PERCPU_STATS, @@ -337,6 +399,8 @@ static int tc_print_one_action(FILE *f, struct rtattr *arg) TCA_ACT_FLAGS_NO_PERCPU_STATS); print_string(PRINT_FP, NULL, "%s", _SL_); } + if (tb[TCA_ACT_HW_STATS_TYPE]) + print_hw_stats(tb[TCA_ACT_HW_STATS_TYPE]); return 0; } From 1ff1edb6d5b5c1de062ad818b42ea1f5564b521f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 25 Mar 2020 16:41:49 +0000 Subject: [PATCH 13/21] Update kernel headers Update kernel headers to commit: cd556e40fdf3 ("devlink: expand the devlink-info documentation") Signed-off-by: David Ahern --- include/uapi/linux/if_bridge.h | 18 ++++++++++++++++-- include/uapi/linux/pkt_cls.h | 29 ++++++++++++++--------------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5dffabaf..cb581cc0 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -131,7 +131,6 @@ enum { #define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */ #define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */ #define BRIDGE_VLAN_INFO_ONLY_OPTS (1<<6) /* Skip create/delete/flags */ -#define BRIDGE_VLAN_INFO_REMOVE_TUN (1<<7) /* Remove tunnel mapping */ struct bridge_vlan_info { __u16 flags; @@ -203,12 +202,27 @@ enum { BRIDGE_VLANDB_ENTRY_INFO, BRIDGE_VLANDB_ENTRY_RANGE, BRIDGE_VLANDB_ENTRY_STATE, - BRIDGE_VLANDB_ENTRY_TUNNEL_ID, + BRIDGE_VLANDB_ENTRY_TUNNEL_INFO, BRIDGE_VLANDB_ENTRY_STATS, __BRIDGE_VLANDB_ENTRY_MAX, }; #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) +/* [BRIDGE_VLANDB_ENTRY] = { + * [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { + * [BRIDGE_VLANDB_TINFO_ID] + * ... + * } + * } + */ +enum { + BRIDGE_VLANDB_TINFO_UNSPEC, + BRIDGE_VLANDB_TINFO_ID, + BRIDGE_VLANDB_TINFO_CMD, + __BRIDGE_VLANDB_TINFO_MAX, +}; +#define BRIDGE_VLANDB_TINFO_MAX (__BRIDGE_VLANDB_TINFO_MAX - 1) + /* [BRIDGE_VLANDB_ENTRY] = { * [BRIDGE_VLANDB_ENTRY_STATS] = { * [BRIDGE_VLANDB_STATS_RX_BYTES] diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 81cc1a86..6fcf7307 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -17,7 +17,7 @@ enum { TCA_ACT_PAD, TCA_ACT_COOKIE, TCA_ACT_FLAGS, - TCA_ACT_HW_STATS_TYPE, + TCA_ACT_HW_STATS, __TCA_ACT_MAX }; @@ -31,20 +31,19 @@ enum { * all supported bits set. * In case no bits are set, user is not interested in getting any HW statistics. */ -#define TCA_ACT_HW_STATS_TYPE_IMMEDIATE (1 << 0) /* Means that in dump, user - * gets the current HW stats - * state from the device - * queried at the dump time. - */ -#define TCA_ACT_HW_STATS_TYPE_DELAYED (1 << 1) /* Means that in dump, user gets - * HW stats that might be out - * of date for some time, maybe - * couple of seconds. This is - * the case when driver polls - * stats updates periodically - * or when it gets async stats update - * from the device. - */ +#define TCA_ACT_HW_STATS_IMMEDIATE (1 << 0) /* Means that in dump, user + * gets the current HW stats + * state from the device + * queried at the dump time. + */ +#define TCA_ACT_HW_STATS_DELAYED (1 << 1) /* Means that in dump, user gets + * HW stats that might be out of date + * for some time, maybe couple of + * seconds. This is the case when + * driver polls stats updates + * periodically or when it gets async + * stats update from the device. + */ #define TCA_ACT_MAX __TCA_ACT_MAX #define TCA_OLD_COMPAT (TCA_ACT_MAX+1) From 1c74c20cbe1b77b6c05b8f2db354ad3c4d359fb4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 20 Mar 2020 13:21:45 -0700 Subject: [PATCH 14/21] tc: m_action: rename hw stats type uAPI Follow the kernel rename to shorten the identifiers. Rename hw_stats_type to hw_stats. Signed-off-by: Jakub Kicinski Signed-off-by: David Ahern --- tc/m_action.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/tc/m_action.c b/tc/m_action.c index 58ae1846..2c4b5df6 100644 --- a/tc/m_action.c +++ b/tc/m_action.c @@ -150,31 +150,30 @@ new_cmd(char **argv) (matches(*argv, "add") == 0); } -static const struct hw_stats_type_item { +static const struct hw_stats_item { const char *str; __u8 type; -} hw_stats_type_items[] = { - { "immediate", TCA_ACT_HW_STATS_TYPE_IMMEDIATE }, - { "delayed", TCA_ACT_HW_STATS_TYPE_DELAYED }, +} hw_stats_items[] = { + { "immediate", TCA_ACT_HW_STATS_IMMEDIATE }, + { "delayed", TCA_ACT_HW_STATS_DELAYED }, { "disabled", 0 }, /* no bit set */ }; static void print_hw_stats(const struct rtattr *arg) { - struct nla_bitfield32 *hw_stats_type_bf = RTA_DATA(arg); - __u8 hw_stats_type; + struct nla_bitfield32 *hw_stats_bf = RTA_DATA(arg); + __u8 hw_stats; int i; - hw_stats_type = hw_stats_type_bf->value & hw_stats_type_bf->selector; + hw_stats = hw_stats_bf->value & hw_stats_bf->selector; print_string(PRINT_FP, NULL, "\t", NULL); open_json_array(PRINT_ANY, "hw_stats"); - for (i = 0; i < ARRAY_SIZE(hw_stats_type_items); i++) { - const struct hw_stats_type_item *item; + for (i = 0; i < ARRAY_SIZE(hw_stats_items); i++) { + const struct hw_stats_item *item; - item = &hw_stats_type_items[i]; - if ((!hw_stats_type && !item->type) || - hw_stats_type & item->type) + item = &hw_stats_items[i]; + if ((!hw_stats && !item->type) || hw_stats & item->type) print_string(PRINT_ANY, NULL, " %s", item->str); } close_json_array(PRINT_JSON, NULL); @@ -184,18 +183,18 @@ static int parse_hw_stats(const char *str, struct nlmsghdr *n) { int i; - for (i = 0; i < ARRAY_SIZE(hw_stats_type_items); i++) { - const struct hw_stats_type_item *item; + for (i = 0; i < ARRAY_SIZE(hw_stats_items); i++) { + const struct hw_stats_item *item; - item = &hw_stats_type_items[i]; + item = &hw_stats_items[i]; if (matches(str, item->str) == 0) { - struct nla_bitfield32 hw_stats_type_bf = { + struct nla_bitfield32 hw_stats_bf = { .value = item->type, .selector = item->type }; - addattr_l(n, MAX_MSG, TCA_ACT_HW_STATS_TYPE, - &hw_stats_type_bf, sizeof(hw_stats_type_bf)); + addattr_l(n, MAX_MSG, TCA_ACT_HW_STATS, + &hw_stats_bf, sizeof(hw_stats_bf)); return 0; } @@ -399,8 +398,8 @@ static int tc_print_one_action(FILE *f, struct rtattr *arg) TCA_ACT_FLAGS_NO_PERCPU_STATS); print_string(PRINT_FP, NULL, "%s", _SL_); } - if (tb[TCA_ACT_HW_STATS_TYPE]) - print_hw_stats(tb[TCA_ACT_HW_STATS_TYPE]); + if (tb[TCA_ACT_HW_STATS]) + print_hw_stats(tb[TCA_ACT_HW_STATS]); return 0; } From 6c10fdca70b0495314c047d206638408f13605cc Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 23 Mar 2020 18:12:21 +0200 Subject: [PATCH 15/21] tc: q_red: Support 'nodrop' flag Recognize the new configuration option of the RED Qdisc, "nodrop". Add support for passing flags through TCA_RED_FLAGS, and use it when passing TC_RED_NODROP flag. Signed-off-by: Petr Machata Signed-off-by: David Ahern --- man/man8/tc-red.8 | 6 +++++- tc/q_red.c | 25 ++++++++++++++++++++----- tc/tc_red.c | 5 +++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/man/man8/tc-red.8 b/man/man8/tc-red.8 index dd1ab74c..b5aaa986 100644 --- a/man/man8/tc-red.8 +++ b/man/man8/tc-red.8 @@ -13,7 +13,7 @@ bytes bytes .B [ burst packets -.B ] [ ecn ] [ harddrop] [ bandwidth +.B ] [ ecn ] [ harddrop ] [ nodrop ] [ bandwidth rate .B ] [ probability chance @@ -123,6 +123,10 @@ If average flow queue size is above .B max bytes, this parameter forces a drop instead of ecn marking. .TP +nodrop +With this parameter, traffic that should be marked, but is not ECN-capable, is +enqueued. Without the parameter it is early-dropped. +.TP adaptive (Added in linux-3.3) Sets RED in adaptive mode as described in http://icir.org/floyd/papers/adaptiveRed.pdf .nf diff --git a/tc/q_red.c b/tc/q_red.c index 6256420f..53181c82 100644 --- a/tc/q_red.c +++ b/tc/q_red.c @@ -30,12 +30,17 @@ static void explain(void) fprintf(stderr, "Usage: ... red limit BYTES [min BYTES] [max BYTES] avpkt BYTES [burst PACKETS]\n" " [adaptive] [probability PROBABILITY] [bandwidth KBPS]\n" - " [ecn] [harddrop]\n"); + " [ecn] [harddrop] [nodrop]\n"); } +#define RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP) + static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n, const char *dev) { + struct nla_bitfield32 flags_bf = { + .selector = RED_SUPPORTED_FLAGS, + }; struct tc_red_qopt opt = {}; unsigned int burst = 0; unsigned int avpkt = 0; @@ -95,13 +100,15 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, return -1; } } else if (strcmp(*argv, "ecn") == 0) { - opt.flags |= TC_RED_ECN; + flags_bf.value |= TC_RED_ECN; } else if (strcmp(*argv, "harddrop") == 0) { - opt.flags |= TC_RED_HARDDROP; + flags_bf.value |= TC_RED_HARDDROP; + } else if (strcmp(*argv, "nodrop") == 0) { + flags_bf.value |= TC_RED_NODROP; } else if (strcmp(*argv, "adaptative") == 0) { - opt.flags |= TC_RED_ADAPTATIVE; + flags_bf.value |= TC_RED_ADAPTATIVE; } else if (strcmp(*argv, "adaptive") == 0) { - opt.flags |= TC_RED_ADAPTATIVE; + flags_bf.value |= TC_RED_ADAPTATIVE; } else if (strcmp(*argv, "help") == 0) { explain(); return -1; @@ -154,6 +161,7 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, addattr_l(n, 1024, TCA_RED_STAB, sbuf, 256); max_P = probability * pow(2, 32); addattr_l(n, 1024, TCA_RED_MAX_P, &max_P, sizeof(max_P)); + addattr_l(n, 1024, TCA_RED_FLAGS, &flags_bf, sizeof(flags_bf)); addattr_nest_end(n, tail); return 0; } @@ -161,6 +169,7 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, static int red_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { struct rtattr *tb[TCA_RED_MAX + 1]; + struct nla_bitfield32 *flags_bf; struct tc_red_qopt *qopt; __u32 max_P = 0; @@ -183,6 +192,12 @@ static int red_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) RTA_PAYLOAD(tb[TCA_RED_MAX_P]) >= sizeof(__u32)) max_P = rta_getattr_u32(tb[TCA_RED_MAX_P]); + if (tb[TCA_RED_FLAGS] && + RTA_PAYLOAD(tb[TCA_RED_FLAGS]) >= sizeof(*flags_bf)) { + flags_bf = RTA_DATA(tb[TCA_RED_FLAGS]); + qopt->flags = flags_bf->value; + } + print_uint(PRINT_JSON, "limit", NULL, qopt->limit); print_string(PRINT_FP, NULL, "limit %s ", sprint_size(qopt->limit, b1)); print_uint(PRINT_JSON, "min", NULL, qopt->qth_min); diff --git a/tc/tc_red.c b/tc/tc_red.c index 681ca297..88f5ff35 100644 --- a/tc/tc_red.c +++ b/tc/tc_red.c @@ -116,4 +116,9 @@ void tc_red_print_flags(__u32 flags) print_bool(PRINT_ANY, "adaptive", "adaptive ", true); else print_bool(PRINT_ANY, "adaptive", NULL, false); + + if (flags & TC_RED_NODROP) + print_bool(PRINT_ANY, "nodrop", "nodrop ", true); + else + print_bool(PRINT_ANY, "nodrop", NULL, false); } From 5a3faf29491e08e2feb96e63b1b8158e970f9df9 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Wed, 25 Mar 2020 11:25:34 +0200 Subject: [PATCH 16/21] bash-completion: devlink: add bash-completion function Add function for command completion for devlink in bash, and update Makefile to install it under /usr/share/bash-completion/completions/. Signed-off-by: Danielle Ratson Tested-by: Ido Schimmel Signed-off-by: David Ahern --- Makefile | 1 + bash-completion/devlink | 822 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 823 insertions(+) create mode 100644 bash-completion/devlink diff --git a/Makefile b/Makefile index 0b79b1f1..25d05fac 100644 --- a/Makefile +++ b/Makefile @@ -90,6 +90,7 @@ install: all install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR) install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR) install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR) + install -m 0644 bash-completion/devlink $(DESTDIR)$(BASH_COMPDIR) install -m 0644 include/bpf_elf.h $(DESTDIR)$(HDRDIR) snapshot: diff --git a/bash-completion/devlink b/bash-completion/devlink new file mode 100644 index 00000000..45fba75c --- /dev/null +++ b/bash-completion/devlink @@ -0,0 +1,822 @@ +# bash completion for devlink(8) -*- shell-script -*- + +# Get all the optional commands for devlink +_devlink_get_optional_commands() +{ + local object=$1; shift + + local filter_options="" + local options="$(devlink $object help 2>&1 \ + | command sed -n -e "s/^.*devlink $object //p" \ + | cut -d " " -f 1)" + + # Remove duplicate options from "devlink $OBJECT help" command + local opt + for opt in $options; do + if [[ $filter_options =~ $opt ]]; then + continue + else + filter_options="$filter_options $opt" + fi + done + + echo $filter_options +} + +# Complete based on given word, for when an argument or an option name has +# but a few possible arguments. +_devlink_direct_complete() +{ + local dev port region value + + case $1 in + dev) + value=$(devlink dev show 2>/dev/null) + ;; + param_name) + dev=${words[4]} + value=$(devlink -j dev param show 2>/dev/null \ + | jq ".param[\"$dev\"][].name") + ;; + port) + value=$(devlink -j port show 2>/dev/null \ + | jq '.port as $ports | $ports | keys[] as $key + | ($ports[$key].netdev // $key)') + ;; + region) + value=$(devlink -j region show 2>/dev/null \ + | jq '.regions' | jq 'keys[]') + ;; + snapshot) + region=${words[3]} + value=$(devlink -j region show 2>/dev/null \ + | jq ".regions[\"$region\"].snapshot[]") + ;; + trap) + dev=${words[3]} + value=$(devlink -j trap show 2>/dev/null \ + | jq ".trap[\"$dev\"][].name") + ;; + trap_group) + dev=${words[4]} + value=$(devlink -j trap group show 2>/dev/null \ + | jq ".trap_group[\"$dev\"][].name") + ;; + health_dev) + value=$(devlink -j health show 2>/dev/null | jq '.health' \ + | jq 'keys[]') + ;; + reporter) + dev=${words[cword - 2]} + value=$(devlink -j health show 2>/dev/null \ + | jq ".health[\"$dev\"][].reporter") + ;; + pool) + dev=$pprev + value=$(devlink -j sb pool show 2>/dev/null \ + | jq ".pool[\"$dev\"][].pool") + ;; + port_pool) + port=${words[5]} + value=$(devlink -j sb port pool show 2>/dev/null \ + | jq ".port_pool[\"$port\"][].pool") + ;; + tc) + port=$pprev + value=$(devlink -j sb tc bind show 2>/dev/null \ + | jq ".tc_bind[\"$port\"][].tc") + ;; + esac + + COMPREPLY+=( $( compgen -W "$value" -- "$cur" ) ) + # Remove colon containing prefix from COMPREPLY items in order to avoid + # wordbreaks with colon. + __ltrim_colon_completions "$cur" +} + +# Completion for devlink dev eswitch set +_devlink_dev_eswitch_set() +{ + local -A settings=( + [mode]=notseen + [inline-mode]=notseen + [encap]=notseen + ) + + if [[ $cword -eq 5 ]]; then + COMPREPLY=( $( compgen -W "mode inline-mode encap" -- "$cur" ) ) + fi + + # Mark seen settings + local word + for word in "${words[@]:5:${#words[@]}-1}"; do + if [[ -n $word ]]; then + if [[ "${settings[$word]}" ]]; then + settings[$word]=seen + fi + fi + done + + case $prev in + mode) + COMPREPLY=( $( compgen -W "legacy switchdev" -- "$cur" ) ) + return + ;; + inline-mode) + COMPREPLY=( $( compgen -W "none link network transport" -- \ + "$cur" ) ) + return + ;; + encap) + COMPREPLY=( $( compgen -W "disable enable" -- "$cur" ) ) + return + ;; + esac + + local -a comp_words=() + + # Add settings not seen to completions + local setting + for setting in "${!settings[@]}"; do + if [ "${settings[$setting]}" = notseen ]; then + comp_words+=( "$setting" ) + fi + done + + COMPREPLY=( $( compgen -W "${comp_words[*]}" -- "$cur" ) ) +} + +# Completion for devlink dev eswitch +_devlink_dev_eswitch() +{ + case "$cword" in + 3) + COMPREPLY=( $( compgen -W "show set" -- "$cur" ) ) + return + ;; + 4) + _devlink_direct_complete "dev" + return + ;; + esac + + case "${words[3]}" in + set) + _devlink_dev_eswitch_set + return + ;; + show) + return + ;; + esac +} + +# Completion for devlink dev param set +_devlink_dev_param_set() +{ + case $cword in + 7) + COMPREPLY=( $( compgen -W "value" -- "$cur" ) ) + return + ;; + 8) + # String argument + return + ;; + 9) + COMPREPLY=( $( compgen -W "cmode" -- "$cur" ) ) + return + ;; + 10) + COMPREPLY=( $( compgen -W "runtime driverinit permanent" -- \ + "$cur" ) ) + return + ;; + esac +} + +# Completion for devlink dev param +_devlink_dev_param() +{ + case "$cword" in + 3) + COMPREPLY=( $( compgen -W "show set" -- "$cur" ) ) + return + ;; + 4) + _devlink_direct_complete "dev" + return + ;; + 5) + COMPREPLY=( $( compgen -W "name" -- "$cur" ) ) + return + ;; + 6) + _devlink_direct_complete "param_name" + return + ;; + esac + + if [[ "${words[3]}" == "set" ]]; then + _devlink_dev_param_set + fi +} + +# Completion for devlink dev reload +_devlink_dev_reload() +{ + case "$cword" in + 4) + COMPREPLY=( $( compgen -W "netns" -- "$cur" ) ) + return + ;; + 5) + local nslist=$( ip netns list 2>/dev/null ) + COMPREPLY=( $( compgen -W "$nslist" -- "$cur" ) ) + return + ;; + esac +} + +# Completion for devlink dev flash +_devlink_dev_flash() +{ + case "$cword" in + 4) + COMPREPLY=( $( compgen -W "file" -- "$cur" ) ) + return + ;; + 5) + _filedir + return + ;; + 6) + COMPREPLY=( $( compgen -W "component" -- "$cur" ) ) + return + ;; + esac +} + +# Completion for devlink dev +_devlink_dev() +{ + case $command in + show|reload|info|flash) + if [[ $cword -le 3 ]]; then + _devlink_direct_complete "dev" + elif [[ $command == "reload" || $command == "flash" ]];then + _devlink_dev_$command + fi + return + ;; + eswitch|param) + _devlink_dev_$command + return + ;; + esac +} + +# Completion for devlink port set +_devlink_port_set() +{ + case "$cword" in + 3) + _devlink_direct_complete "port" + return + ;; + 4) + COMPREPLY=( $( compgen -W "type" -- "$cur" ) ) + return + ;; + 5) + COMPREPLY=( $( compgen -W "eth ib auto" -- "$cur" ) ) + return + ;; + esac +} + +# Completion for devlink port split +_devlink_port_split() +{ + case "$cword" in + 3) + _devlink_direct_complete "port" + return + ;; + 4) + COMPREPLY=( $( compgen -W "count" -- "$cur" ) ) + return + ;; + 5) + # Integer argument + return + ;; + esac +} + +# Completion for devlink port +_devlink_port() +{ + case $command in + set) + _devlink_port_set + return + ;; + split) + _devlink_port_split + return + ;; + show|unsplit) + if [[ $cword -eq 3 ]]; then + _devlink_direct_complete "port" + fi + return + ;; + esac +} + +# Completion for devlink dpipe +_devlink_dpipe() +{ + local options="$(devlink dpipe help 2>&1 \ + | command sed -e '/OBJECT-LIST := /!d' \ + -e 's/.*{ //' -e 's/}.*//' -e 's/|//g' )" + + if [[ $cword -eq 2 ]]; then + COMPREPLY+=( $( compgen -W "$options" -- "$cur" ) ) + fi +} + +# Completion for devlink monitor +_devlink_monitor() +{ + local options="$(devlink monitor help 2>&1 \ + | command sed -e '/OBJECT-LIST := /!d' \ + -e 's/.*{ //' -e 's/}.*//' -e 's/|//g' )" + + if [[ $cword -eq 2 ]]; then + COMPREPLY+=( $( compgen -W "all $options" -- "$cur" ) ) + fi +} + +# Completion for the rest of devlink sb $command +_devlink_sb_command_options() +{ + local subcmd + + case $command in + pool) + subcmd=${words[3]} + if [[ $cword -eq 5 ]]; then + COMPREPLY=( $( compgen -W "pool" -- "$cur" ) ) + fi + if [[ $subcmd == "set" ]]; then + case $cword in + 7) + COMPREPLY+=( $( compgen -W "size" -- "$cur" ) ) + ;; + 9) + COMPREPLY+=( $( compgen -W "thtype" -- "$cur" ) ) + ;; + esac + fi + ;; + port) + subcmd=${words[4]} + if [[ $cword -eq 6 ]]; then + COMPREPLY+=( $( compgen -W "pool" -- "$cur" ) ) + fi + if [[ $subcmd == "set" ]]; then + case $cword in + 8) + COMPREPLY+=( $( compgen -W "th" -- "$cur" ) ) + ;; + esac + fi + ;; + tc) + subcmd=${words[4]} + case $cword in + 6) + COMPREPLY+=( $( compgen -W "tc" -- "$cur" ) ) + ;; + 8) + COMPREPLY+=( $( compgen -W "type" -- "$cur" ) ) + ;; + esac + if [[ $subcmd == "set" ]]; then + case $cword in + 10) + COMPREPLY+=( $( compgen -W "pool" -- "$cur" ) ) + ;; + 12) + COMPREPLY+=( $( compgen -W "th" -- "$cur" ) ) + ;; + esac + fi + ;; + esac +} + +# Completion for devlink sb +_devlink_sb() +{ + case $prev in + bind) + COMPREPLY=( $( compgen -W "set show" -- "$cur" ) ) + ;; + occupancy) + COMPREPLY=( $( compgen -W "show snapshot clearmax" -- "$cur" ) ) + ;; + pool) + if [[ $cword -eq 3 || $cword -eq 4 ]]; then + COMPREPLY=( $( compgen -W "set show" -- "$cur" ) ) + elif [[ $command == "port" || $command == "tc" ]]; then + _devlink_direct_complete "port_pool" + else + _devlink_direct_complete "pool" + fi + ;; + port) + if [[ $cword -eq 3 ]]; then + COMPREPLY=( $( compgen -W "pool" -- "$cur" ) ) + fi + ;; + show|set|snapshot|clearmax) + case $command in + show|pool|occupancy) + _devlink_direct_complete "dev" + if [[ $command == "occupancy" && $prev == "show" ]];then + _devlink_direct_complete "port" + fi + ;; + port|tc) + _devlink_direct_complete "port" + ;; + esac + ;; + size) + # Integer argument + ;; + thtype) + COMPREPLY=( $( compgen -W "static dynamic" -- "$cur" ) ) + ;; + th) + # Integer argument + ;; + tc) + if [[ $cword -eq 3 ]]; then + COMPREPLY=( $( compgen -W "bind" -- "$cur" ) ) + else + _devlink_direct_complete "tc" + fi + ;; + type) + COMPREPLY=( $( compgen -W "ingress egress" -- "$cur" ) ) + ;; + esac + + _devlink_sb_command_options + return +} + +# Completion for devlink resource set path argument +_devlink_resource_path() +{ + local path parents parent all_path + local dev=${words[3]} + local -a path + + local all_path=$( + devlink resource show $dev \ + | sed -E '# Of resource lines, keep only the name itself. + s/name ([^ ]*) .*/\1/ + # Drop headers. + /:$/d + # First layer is not aligned enough, align it. + s/^/ / + # Use slashes as unary code for resource depth. + s, ,/,g + # Separate tally count from resource name. + s,/*,&\t,' \ + | while read d name; do + while ((${#path[@]} > ${#d})); do + unset path[$((${#path[@]} - 1))] + done + path[$((${#d} - 1))]=$name + echo ${path[@]} + done \ + | sed '# Convert paths to slash-separated + s,^,/,;s, ,/,g;s,$,/,' + ) + COMPREPLY=( ${COMPREPLY[@]:-} $( compgen -W "$all_path" -- "$cur" ) ) +} + +# Completion for devlink resource set +_devlink_resource_set() +{ + case "$cword" in + 3) + _devlink_direct_complete "dev" + return + ;; + 4) + COMPREPLY=( $( compgen -W "path" -- "$cur" ) ) + return + ;; + 5) + _devlink_resource_path + return + ;; + 6) + COMPREPLY=( $( compgen -W "size" -- "$cur" ) ) + return + ;; + 7) + # Integer argument + return + ;; + esac +} + +# Completion for devlink resource +_devlink_resource() +{ + case $command in + show) + if [[ $cword -eq 3 ]]; then + _devlink_direct_complete "dev" + fi + return + ;; + set) + _devlink_resource_set + return + ;; + esac +} + +# Completion for devlink region read +_devlink_region_read() +{ + case "$cword" in + 6) + COMPREPLY=( $( compgen -W "address" -- "$cur" ) ) + return + ;; + 7) + # Address argument, for example: 0x10 + return + ;; + 8) + COMPREPLY=( $( compgen -W "length" -- "$cur" ) ) + return + ;; + 9) + # Integer argument + return + ;; + esac +} + +# Completion for devlink region +_devlink_region() +{ + if [[ $cword -eq 3 && $command != "help" ]]; then + _devlink_direct_complete "region" + fi + + case $command in + show) + return + ;; + del|dump|read) + case "$cword" in + 4) + COMPREPLY=( $( compgen -W "snapshot" -- "$cur" ) ) + ;; + 5) + _devlink_direct_complete "snapshot" + ;; + esac + + if [[ $command == "read" ]]; then + _devlink_region_read + fi + return + ;; + esac +} + +# Completion reporter for devlink health +_devlink_health_reporter() +{ + local i=$1; shift + + case $cword in + $((3 + $i))) + _devlink_direct_complete "health_dev" + ;; + $((4 + $i))) + COMPREPLY=( $( compgen -W "reporter" -- "$cur" ) ) + ;; + $((5 + $i))) + _devlink_direct_complete "reporter" + ;; + esac +} + +# Completion for devlink health +_devlink_health() +{ + case $command in + show|recover|diagnose|set) + _devlink_health_reporter 0 + if [[ $command == "set" ]]; then + case $cword in + 6) + COMPREPLY=( $( compgen -W "grace_period auto_recover" \ + -- "$cur" ) ) + ;; + 7) + case $prev in + grace_period) + # Integer argument- msec + ;; + auto_recover) + COMPREPLY=( $( compgen -W "true false" -- \ + "$cur" ) ) + ;; + esac + esac + fi + return + ;; + dump) + if [[ $cword -eq 3 ]]; then + COMPREPLY=( $( compgen -W "show clear" -- "$cur" ) ) + fi + + _devlink_health_reporter 1 + return + ;; + esac +} + +# Completion for action in devlink trap set +_devlink_trap_set_action() +{ + local i=$1; shift + + case $cword in + $((6 + $i))) + COMPREPLY=( $( compgen -W "action" -- "$cur" ) ) + ;; + $((7 + $i))) + COMPREPLY=( $( compgen -W "trap drop" -- "$cur" ) ) + ;; + esac +} + +# Completion for devlink trap group +_devlink_trap_group() +{ + case $cword in + 3) + COMPREPLY=( $( compgen -W "set show" -- "$cur" ) ) + return + ;; + 4) + _devlink_direct_complete "dev" + return + ;; + 5) + COMPREPLY=( $( compgen -W "group" -- "$cur" ) ) + return + ;; + 6) + _devlink_direct_complete "trap_group" + return + ;; + esac + + if [[ ${words[3]} == "set" ]]; then + _devlink_trap_set_action 1 + fi +} + +# Completion for devlink trap +_devlink_trap() +{ + case $command in + show|set) + case $cword in + 3) + _devlink_direct_complete "dev" + ;; + 4) + COMPREPLY=( $( compgen -W "trap" -- "$cur" ) ) + ;; + 5) + _devlink_direct_complete "trap" + ;; + esac + + if [[ $command == "set" ]]; then + _devlink_trap_set_action 0 + fi + return + ;; + group) + _devlink_trap_$command + return + ;; + esac +} + +# Complete any devlink command +_devlink() +{ + local cur prev words cword + local opt='--Version --no-nice-names --json --pretty --verbose \ + --statistics --force --Netns --batch' + local objects="$(devlink help 2>&1 | command sed -e '/OBJECT := /!d' \ + -e 's/.*{//' -e 's/}.*//' -e \ 's/|//g' )" + + _init_completion || return + # Gets the word-to-complete without considering the colon as word breaks + _get_comp_words_by_ref -n : cur prev words cword + + if [[ $cword -eq 1 ]]; then + case $cur in + -*) + COMPREPLY=( $( compgen -W "$opt" -- "$cur" ) ) + return 0 + ;; + *) + COMPREPLY=( $( compgen -W "$objects" -- "$cur" ) ) + return 0 + ;; + esac + fi + + # Deal with options + if [[ $prev == -* ]]; then + case $prev in + -V|--Version) + return 0 + ;; + -b|--batch) + _filedir + return 0 + ;; + --force) + COMPREPLY=( $( compgen -W "--batch" -- "$cur" ) ) + return 0 + ;; + -N|--Netns) + local nslist=$( ip netns list 2>/dev/null ) + COMPREPLY=( $( compgen -W "$nslist" -- "$cur" ) ) + return 0 + ;; + -j|--json) + COMPREPLY=( $( compgen -W "--pretty $objects" -- "$cur" ) ) + return 0 + ;; + *) + COMPREPLY=( $( compgen -W "$objects" -- "$cur" ) ) + return 0 + ;; + esac + fi + + # Remove all options so completions don't have to deal with them. + local i + for (( i=1; i < ${#words[@]}; )); do + if [[ ${words[i]::1} == - ]]; then + words=( "${words[@]:0:i}" "${words[@]:i+1}" ) + [[ $i -le $cword ]] && cword=$(( cword - 1 )) + else + i=$(( ++i )) + fi + done + + local object=${words[1]} + local command=${words[2]} + local pprev=${words[cword - 2]} + + if [[ $objects =~ $object ]]; then + if [[ $cword -eq 2 ]]; then + COMPREPLY=( $( compgen -W "help" -- "$cur") ) + if [[ $object != "monitor" && $object != "dpipe" ]]; then + COMPREPLY+=( $( compgen -W \ + "$(_devlink_get_optional_commands $object)" -- "$cur" ) ) + fi + fi + "_devlink_$object" + fi + +} && +complete -F _devlink devlink + +# ex: ts=4 sw=4 et filetype=sh From ce9191ffee31d440591bf49ef530b80ee9975dfb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 31 Mar 2020 23:23:28 +0000 Subject: [PATCH 17/21] Update kernel headers Update kernel headers to commit: 7f80ccfe9968 ("net: ipv6: rpl_iptunnel: Fix potential memory leak in rpl_do_srh_inline") Signed-off-by: David Ahern --- include/uapi/linux/bpf.h | 82 +++++++++++++++++++++++++++++++++- include/uapi/linux/devlink.h | 13 ++++++ include/uapi/linux/if_link.h | 6 ++- include/uapi/linux/inet_diag.h | 1 + include/uapi/linux/lwtunnel.h | 1 + include/uapi/linux/pkt_cls.h | 1 + include/uapi/linux/pkt_sched.h | 4 +- 7 files changed, 103 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28667ac4..c7b2ffb2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -111,6 +111,8 @@ enum bpf_cmd { BPF_MAP_LOOKUP_AND_DELETE_BATCH, BPF_MAP_UPDATE_BATCH, BPF_MAP_DELETE_BATCH, + BPF_LINK_CREATE, + BPF_LINK_UPDATE, }; enum bpf_map_type { @@ -181,6 +183,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_TRACING, BPF_PROG_TYPE_STRUCT_OPS, BPF_PROG_TYPE_EXT, + BPF_PROG_TYPE_LSM, }; enum bpf_attach_type { @@ -211,6 +214,7 @@ enum bpf_attach_type { BPF_TRACE_FENTRY, BPF_TRACE_FEXIT, BPF_MODIFY_RETURN, + BPF_LSM_MAC, __MAX_BPF_ATTACH_TYPE }; @@ -539,7 +543,7 @@ union bpf_attr { __u32 prog_cnt; } query; - struct { + struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ __u64 name; __u32 prog_fd; } raw_tracepoint; @@ -567,6 +571,24 @@ union bpf_attr { __u64 probe_offset; /* output: probe_offset */ __u64 probe_addr; /* output: probe_addr */ } task_fd_query; + + struct { /* struct used by BPF_LINK_CREATE command */ + __u32 prog_fd; /* eBPF program to attach */ + __u32 target_fd; /* object to attach to */ + __u32 attach_type; /* attach type */ + __u32 flags; /* extra flags */ + } link_create; + + struct { /* struct used by BPF_LINK_UPDATE command */ + __u32 link_fd; /* link fd */ + /* new program fd to update link with */ + __u32 new_prog_fd; + __u32 flags; /* extra flags */ + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags */ + __u32 old_prog_fd; + } link_update; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -2950,6 +2972,59 @@ union bpf_attr { * restricted to raw_tracepoint bpf programs. * Return * 0 on success, or a negative error in case of failure. + * + * u64 bpf_get_netns_cookie(void *ctx) + * Description + * Retrieve the cookie (generated by the kernel) of the network + * namespace the input *ctx* is associated with. The network + * namespace cookie remains stable for its lifetime and provides + * a global identifier that can be assumed unique. If *ctx* is + * NULL, then the helper returns the cookie for the initial + * network namespace. The cookie itself is very similar to that + * of bpf_get_socket_cookie() helper, but for network namespaces + * instead of sockets. + * Return + * A 8-byte long opaque number. + * + * u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of the cgroup associated + * with the current task at the *ancestor_level*. The root cgroup + * is at *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with the current task, then return value will be the + * same as that of **bpf_get_current_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with the current task. + * + * The format of returned id and helper limitations are same as in + * **bpf_get_current_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * Description + * Assign the *sk* to the *skb*. When combined with appropriate + * routing configuration to receive the packet towards the socket, + * will cause *skb* to be delivered to the specified socket. + * Subsequent redirection of *skb* via **bpf_redirect**\ (), + * **bpf_clone_redirect**\ () or other methods outside of BPF may + * interfere with successful delivery to the socket. + * + * This operation is only valid from TC ingress path. + * + * The *flags* argument must be zero. + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EINVAL** Unsupported flags specified. + * * **-ENOENT** Socket is unavailable for assignment. + * * **-ENETUNREACH** Socket is unreachable (wrong netns). + * * **-EOPNOTSUPP** Unsupported operation, for example a + * call from outside of TC ingress. + * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport). */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3073,7 +3148,10 @@ union bpf_attr { FN(jiffies64), \ FN(read_branch_records), \ FN(get_ns_current_pid_tgid), \ - FN(xdp_output), + FN(xdp_output), \ + FN(get_netns_cookie), \ + FN(get_current_ancestor_cgroup_id), \ + FN(sk_assign), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 1b412281..80e33d25 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -117,6 +117,11 @@ enum devlink_command { DEVLINK_CMD_TRAP_GROUP_NEW, DEVLINK_CMD_TRAP_GROUP_DEL, + DEVLINK_CMD_TRAP_POLICER_GET, /* can dump */ + DEVLINK_CMD_TRAP_POLICER_SET, + DEVLINK_CMD_TRAP_POLICER_NEW, + DEVLINK_CMD_TRAP_POLICER_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -217,6 +222,7 @@ enum devlink_param_reset_dev_on_drv_probe_value { enum { DEVLINK_ATTR_STATS_RX_PACKETS, /* u64 */ DEVLINK_ATTR_STATS_RX_BYTES, /* u64 */ + DEVLINK_ATTR_STATS_RX_DROPPED, /* u64 */ __DEVLINK_ATTR_STATS_MAX, DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1 @@ -429,6 +435,13 @@ enum devlink_attr { DEVLINK_ATTR_NETNS_FD, /* u32 */ DEVLINK_ATTR_NETNS_PID, /* u32 */ DEVLINK_ATTR_NETNS_ID, /* u32 */ + + DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, /* u8 */ + + DEVLINK_ATTR_TRAP_POLICER_ID, /* u32 */ + DEVLINK_ATTR_TRAP_POLICER_RATE, /* u64 */ + DEVLINK_ATTR_TRAP_POLICER_BURST, /* u64 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cb88bcb4..978f98c7 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -461,6 +461,7 @@ enum { IFLA_MACSEC_REPLAY_PROTECT, IFLA_MACSEC_VALIDATION, IFLA_MACSEC_PAD, + IFLA_MACSEC_OFFLOAD, __IFLA_MACSEC_MAX, }; @@ -487,6 +488,7 @@ enum macsec_validation_type { enum macsec_offload { MACSEC_OFFLOAD_OFF = 0, MACSEC_OFFLOAD_PHY = 1, + MACSEC_OFFLOAD_MAC = 2, __MACSEC_OFFLOAD_END, MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1, }; @@ -970,11 +972,12 @@ enum { #define XDP_FLAGS_SKB_MODE (1U << 1) #define XDP_FLAGS_DRV_MODE (1U << 2) #define XDP_FLAGS_HW_MODE (1U << 3) +#define XDP_FLAGS_REPLACE (1U << 4) #define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ XDP_FLAGS_DRV_MODE | \ XDP_FLAGS_HW_MODE) #define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ - XDP_FLAGS_MODES) + XDP_FLAGS_MODES | XDP_FLAGS_REPLACE) /* These are stored into IFLA_XDP_ATTACHED on dump. */ enum { @@ -994,6 +997,7 @@ enum { IFLA_XDP_DRV_PROG_ID, IFLA_XDP_SKB_PROG_ID, IFLA_XDP_HW_PROG_ID, + IFLA_XDP_EXPECTED_FD, __IFLA_XDP_MAX, }; diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index e045d170..0c1c781c 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -166,6 +166,7 @@ enum { INET_ULP_INFO_UNSPEC, INET_ULP_INFO_NAME, INET_ULP_INFO_TLS, + INET_ULP_INFO_MPTCP, __INET_ULP_INFO_MAX, }; #define INET_ULP_INFO_MAX (__INET_ULP_INFO_MAX - 1) diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 532c9370..b7c0191f 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -13,6 +13,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_SEG6, LWTUNNEL_ENCAP_BPF, LWTUNNEL_ENCAP_SEG6_LOCAL, + LWTUNNEL_ENCAP_RPL, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 6fcf7307..9f06d29c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -18,6 +18,7 @@ enum { TCA_ACT_COOKIE, TCA_ACT_FLAGS, TCA_ACT_HW_STATS, + TCA_ACT_USED_HW_STATS, __TCA_ACT_MAX }; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 7307a29a..0c02737c 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1216,8 +1216,8 @@ enum { * [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL] */ -#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST BIT(0) -#define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD BIT(1) +#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST _BITUL(0) +#define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD _BITUL(1) enum { TCA_TAPRIO_ATTR_UNSPEC, From a66af5569337b01a1aa11f929cf50ac04cae359e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 31 Mar 2020 11:42:51 +0300 Subject: [PATCH 18/21] devlink: Add devlink trap policer set and show commands The trap policer set command allows the user to set the parameters of the packet trap policer, such as rate and burst size. Example: # devlink trap policer set netdevsim/netdevsim10 policer 1 rate 1000 burst 32 The trap policer show command allows the user to get the current parameters of an individual policer or a dump of all policers in case one is not specified. When '-s' is specified the policer's statistics are shown. Example: # devlink -jps trap policer show netdevsim/netdevsim10 policer 1 { "trap_policer": { "netdevsim/netdevsim10": [ { "policer": 1, "rate": 1000, "burst": 32, "stats": { "rx": { "dropped": 53 } } } ] } } Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 174 ++++++++++++++++++++++++++++++++++++- man/man8/devlink-monitor.8 | 2 +- man/man8/devlink-trap.8 | 40 +++++++++ 3 files changed, 213 insertions(+), 3 deletions(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 67e6e641..9380792a 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -262,6 +262,9 @@ static void ifname_map_free(struct ifname_map *ifname_map) #define DL_OPT_TRAP_ACTION BIT(31) #define DL_OPT_TRAP_GROUP_NAME BIT(32) #define DL_OPT_NETNS BIT(33) +#define DL_OPT_TRAP_POLICER_ID BIT(34) +#define DL_OPT_TRAP_POLICER_RATE BIT(35) +#define DL_OPT_TRAP_POLICER_BURST BIT(36) struct dl_opts { uint64_t present; /* flags of present items */ @@ -303,6 +306,9 @@ struct dl_opts { enum devlink_trap_action trap_action; bool netns_is_pid; uint32_t netns; + uint32_t trap_policer_id; + uint64_t trap_policer_rate; + uint64_t trap_policer_burst; }; struct dl { @@ -506,12 +512,16 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_TRAP_METADATA] = MNL_TYPE_NESTED, [DEVLINK_ATTR_TRAP_GROUP_NAME] = MNL_TYPE_STRING, [DEVLINK_ATTR_RELOAD_FAILED] = MNL_TYPE_U8, + [DEVLINK_ATTR_TRAP_POLICER_ID] = MNL_TYPE_U32, + [DEVLINK_ATTR_TRAP_POLICER_RATE] = MNL_TYPE_U64, + [DEVLINK_ATTR_TRAP_POLICER_BURST] = MNL_TYPE_U64, }; static const enum mnl_attr_data_type devlink_stats_policy[DEVLINK_ATTR_STATS_MAX + 1] = { [DEVLINK_ATTR_STATS_RX_PACKETS] = MNL_TYPE_U64, [DEVLINK_ATTR_STATS_RX_BYTES] = MNL_TYPE_U64, + [DEVLINK_ATTR_STATS_RX_DROPPED] = MNL_TYPE_U64, }; static int attr_cb(const struct nlattr *attr, void *data) @@ -1490,6 +1500,27 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required, opts->netns_is_pid = true; } o_found |= DL_OPT_NETNS; + } else if (dl_argv_match(dl, "policer") && + (o_all & DL_OPT_TRAP_POLICER_ID)) { + dl_arg_inc(dl); + err = dl_argv_uint32_t(dl, &opts->trap_policer_id); + if (err) + return err; + o_found |= DL_OPT_TRAP_POLICER_ID; + } else if (dl_argv_match(dl, "rate") && + (o_all & DL_OPT_TRAP_POLICER_RATE)) { + dl_arg_inc(dl); + err = dl_argv_uint64_t(dl, &opts->trap_policer_rate); + if (err) + return err; + o_found |= DL_OPT_TRAP_POLICER_RATE; + } else if (dl_argv_match(dl, "burst") && + (o_all & DL_OPT_TRAP_POLICER_BURST)) { + dl_arg_inc(dl); + err = dl_argv_uint64_t(dl, &opts->trap_policer_burst); + if (err) + return err; + o_found |= DL_OPT_TRAP_POLICER_BURST; } else { pr_err("Unknown option \"%s\"\n", dl_argv(dl)); return -EINVAL; @@ -1617,6 +1648,15 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) opts->netns_is_pid ? DEVLINK_ATTR_NETNS_PID : DEVLINK_ATTR_NETNS_FD, opts->netns); + if (opts->present & DL_OPT_TRAP_POLICER_ID) + mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, + opts->trap_policer_id); + if (opts->present & DL_OPT_TRAP_POLICER_RATE) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_RATE, + opts->trap_policer_rate); + if (opts->present & DL_OPT_TRAP_POLICER_BURST) + mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, + opts->trap_policer_burst); } static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl, @@ -2058,6 +2098,9 @@ static void pr_out_stats(struct dl *dl, struct nlattr *nla_stats) if (tb[DEVLINK_ATTR_STATS_RX_PACKETS]) pr_out_u64(dl, "packets", mnl_attr_get_u64(tb[DEVLINK_ATTR_STATS_RX_PACKETS])); + if (tb[DEVLINK_ATTR_STATS_RX_DROPPED]) + pr_out_u64(dl, "dropped", + mnl_attr_get_u64(tb[DEVLINK_ATTR_STATS_RX_DROPPED])); pr_out_object_end(dl); pr_out_object_end(dl); } @@ -4141,6 +4184,10 @@ static const char *cmd_name(uint8_t cmd) case DEVLINK_CMD_TRAP_GROUP_SET: return "set"; case DEVLINK_CMD_TRAP_GROUP_NEW: return "new"; case DEVLINK_CMD_TRAP_GROUP_DEL: return "del"; + case DEVLINK_CMD_TRAP_POLICER_GET: return "get"; + case DEVLINK_CMD_TRAP_POLICER_SET: return "set"; + case DEVLINK_CMD_TRAP_POLICER_NEW: return "new"; + case DEVLINK_CMD_TRAP_POLICER_DEL: return "del"; default: return ""; } } @@ -4185,6 +4232,11 @@ static const char *cmd_obj(uint8_t cmd) case DEVLINK_CMD_TRAP_GROUP_NEW: case DEVLINK_CMD_TRAP_GROUP_DEL: return "trap-group"; + case DEVLINK_CMD_TRAP_POLICER_GET: + case DEVLINK_CMD_TRAP_POLICER_SET: + case DEVLINK_CMD_TRAP_POLICER_NEW: + case DEVLINK_CMD_TRAP_POLICER_DEL: + return "trap-policer"; default: return ""; } } @@ -4239,6 +4291,7 @@ static void pr_out_region(struct dl *dl, struct nlattr **tb); static void pr_out_health(struct dl *dl, struct nlattr **tb_health); static void pr_out_trap(struct dl *dl, struct nlattr **tb, bool array); static void pr_out_trap_group(struct dl *dl, struct nlattr **tb, bool array); +static void pr_out_trap_policer(struct dl *dl, struct nlattr **tb, bool array); static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) { @@ -4339,6 +4392,19 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) pr_out_mon_header(genl->cmd); pr_out_trap_group(dl, tb, false); break; + case DEVLINK_CMD_TRAP_POLICER_GET: /* fall through */ + case DEVLINK_CMD_TRAP_POLICER_SET: /* fall through */ + case DEVLINK_CMD_TRAP_POLICER_NEW: /* fall through */ + case DEVLINK_CMD_TRAP_POLICER_DEL: /* fall through */ + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_TRAP_POLICER_ID] || + !tb[DEVLINK_ATTR_TRAP_POLICER_RATE] || + !tb[DEVLINK_ATTR_TRAP_POLICER_BURST]) + return MNL_CB_ERROR; + pr_out_mon_header(genl->cmd); + pr_out_trap_policer(dl, tb, false); + break; } return MNL_CB_OK; } @@ -4355,7 +4421,8 @@ static int cmd_mon_show(struct dl *dl) strcmp(cur_obj, "port") != 0 && strcmp(cur_obj, "health") != 0 && strcmp(cur_obj, "trap") != 0 && - strcmp(cur_obj, "trap-group") != 0) { + strcmp(cur_obj, "trap-group") != 0 && + strcmp(cur_obj, "trap-policer") != 0) { pr_err("Unknown object \"%s\"\n", cur_obj); return -EINVAL; } @@ -4372,7 +4439,7 @@ static int cmd_mon_show(struct dl *dl) static void cmd_mon_help(void) { pr_err("Usage: devlink monitor [ all | OBJECT-LIST ]\n" - "where OBJECT-LIST := { dev | port | health | trap | trap-group }\n"); + "where OBJECT-LIST := { dev | port | health | trap | trap-group | trap-policer }\n"); } static int cmd_mon(struct dl *dl) @@ -7002,6 +7069,8 @@ static void cmd_trap_help(void) pr_err(" devlink trap show [ DEV trap TRAP ]\n"); pr_err(" devlink trap group set DEV group GROUP [ action { trap | drop } ]\n"); pr_err(" devlink trap group show [ DEV group GROUP ]\n"); + pr_err(" devlink trap policer set DEV policer POLICER [ rate RATE ] [ burst BURST ]\n"); + pr_err(" devlink trap policer show DEV policer POLICER\n"); } static int cmd_trap_show(struct dl *dl) @@ -7136,6 +7205,104 @@ static int cmd_trap_group(struct dl *dl) return -ENOENT; } +static void pr_out_trap_policer(struct dl *dl, struct nlattr **tb, bool array) +{ + if (array) + pr_out_handle_start_arr(dl, tb); + else + __pr_out_handle_start(dl, tb, true, false); + + check_indent_newline(dl); + print_uint(PRINT_ANY, "policer", "policer %u", + mnl_attr_get_u32(tb[DEVLINK_ATTR_TRAP_POLICER_ID])); + print_u64(PRINT_ANY, "rate", " rate %llu", + mnl_attr_get_u64(tb[DEVLINK_ATTR_TRAP_POLICER_RATE])); + print_u64(PRINT_ANY, "burst", " burst %llu", + mnl_attr_get_u64(tb[DEVLINK_ATTR_TRAP_POLICER_BURST])); + if (tb[DEVLINK_ATTR_STATS]) + pr_out_stats(dl, tb[DEVLINK_ATTR_STATS]); + pr_out_handle_end(dl); +} + +static int cmd_trap_policer_show_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; + struct dl *dl = data; + + mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); + if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || + !tb[DEVLINK_ATTR_TRAP_POLICER_ID] || + !tb[DEVLINK_ATTR_TRAP_POLICER_RATE] || + !tb[DEVLINK_ATTR_TRAP_POLICER_BURST]) + return MNL_CB_ERROR; + + pr_out_trap_policer(dl, tb, true); + + return MNL_CB_OK; +} + +static int cmd_trap_policer_show(struct dl *dl) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; + struct nlmsghdr *nlh; + int err; + + if (dl_argc(dl) == 0) + flags |= NLM_F_DUMP; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_TRAP_POLICER_GET, flags); + + if (dl_argc(dl) > 0) { + err = dl_argv_parse_put(nlh, dl, + DL_OPT_HANDLE | DL_OPT_TRAP_POLICER_ID, + 0); + if (err) + return err; + } + + pr_out_section_start(dl, "trap_policer"); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_trap_policer_show_cb, dl); + pr_out_section_end(dl); + + return err; +} + +static int cmd_trap_policer_set(struct dl *dl) +{ + struct nlmsghdr *nlh; + int err; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_TRAP_POLICER_SET, + NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, + DL_OPT_HANDLE | DL_OPT_TRAP_POLICER_ID, + DL_OPT_TRAP_POLICER_RATE | + DL_OPT_TRAP_POLICER_BURST); + if (err) + return err; + + return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +} + +static int cmd_trap_policer(struct dl *dl) +{ + if (dl_argv_match(dl, "help")) { + cmd_trap_help(); + return 0; + } else if (dl_argv_match(dl, "show") || + dl_argv_match(dl, "list") || dl_no_arg(dl)) { + dl_arg_inc(dl); + return cmd_trap_policer_show(dl); + } else if (dl_argv_match(dl, "set")) { + dl_arg_inc(dl); + return cmd_trap_policer_set(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +} + static int cmd_trap(struct dl *dl) { if (dl_argv_match(dl, "help")) { @@ -7151,6 +7318,9 @@ static int cmd_trap(struct dl *dl) } else if (dl_argv_match(dl, "group")) { dl_arg_inc(dl); return cmd_trap_group(dl); + } else if (dl_argv_match(dl, "policer")) { + dl_arg_inc(dl); + return cmd_trap_policer(dl); } pr_err("Command \"%s\" not found\n", dl_argv(dl)); return -ENOENT; diff --git a/man/man8/devlink-monitor.8 b/man/man8/devlink-monitor.8 index a96d350f..de351f32 100644 --- a/man/man8/devlink-monitor.8 +++ b/man/man8/devlink-monitor.8 @@ -21,7 +21,7 @@ command is the first in the command line and then the object list. .I OBJECT-LIST is the list of object types that we want to monitor. It may contain -.BR dev ", " port ", " health ", " trap ", " trap-group . +.BR dev ", " port ", " health ", " trap ", " trap-group ", " trap-policer . .B devlink opens Devlink Netlink socket, listens on it and dumps state changes. diff --git a/man/man8/devlink-trap.8 b/man/man8/devlink-trap.8 index db19fe4c..113eda4a 100644 --- a/man/man8/devlink-trap.8 +++ b/man/man8/devlink-trap.8 @@ -38,6 +38,13 @@ devlink-trap \- devlink trap configuration .BI "devlink trap group set " DEV " group " GROUP .RB "[ " action " { " trap " | " drop " } ]" +.ti -8 +.BI "devlink trap policer set " DEV " policer " POLICER +.RB "[ " rate +.IR "RATE " ] +.RB "[ " burst +.IR "BURST " ] + .ti -8 .B devlink trap help @@ -102,6 +109,24 @@ packet trap action. The action is set for all the packet traps member in the trap group. The actions of non-drop traps cannot be changed and are thus skipped. +.SS devlink trap policer set - set attributes of packet trap policer + +.PP +.I "DEV" +- specifies the devlink device the packet trap policer belongs to. + +.PP +.BI "policer " POLICER +- specifies the packet trap policer. + +.PP +.BI rate " RATE " +- packet trap policer rate in packets per second. + +.PP +.BI burst " BURST " +- packet trap policer burst size in packets. + .SH "EXAMPLES" .PP devlink trap show @@ -128,6 +153,21 @@ devlink trap set pci/0000:01:00.0 trap source_mac_is_multicast action trap .RS 4 Set the action of a specific packet trap to 'trap'. .RE +.PP +devlink trap policer show +.RS 4 +List available packet trap policers. +.RE +.PP +devlink -s trap policer show pci/0000:01:00.0 policer 1 +.RS 4 +Show attributes and statistics of a specific packet trap policer. +.RE +.PP +devlink trap policer set pci/0000:01:00.0 policer 1 rate 1000 burst 128 +.RS 4 +Set the rate and burst size of a specific packet trap policer. +.RE .SH SEE ALSO .BR devlink (8), From 02a2a6683ff2df271c0f58e57a7fe6ff979c6de5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 31 Mar 2020 11:42:52 +0300 Subject: [PATCH 19/21] devlink: Add ability to bind policer to trap group Add ability to associate a policer with a trap group. The policer can be unbound by using the 'nopolicer' keyword. In which case, the value encoded in the 'DEVLINK_ATTR_TRAP_POLICER_ID' attribute will be '0'. This is consistent with ip-link 'nomaster' keyword and the 'IFLA_MASTER' attribute. Example: # devlink trap group set netdevsim/netdevsim10 group l3_drops policer 2 # devlink -jp trap group show netdevsim/netdevsim10 group l3_drops { "trap_group": { "netdevsim/netdevsim10": [ { "name": "l3_drops", "generic": true, "policer": 2 } ] } } # devlink trap group set netdevsim/netdevsim10 group l3_drops nopolicer # devlink -jp trap group show netdevsim/netdevsim10 group l3_drops { "trap_group": { "netdevsim/netdevsim10": [ { "name": "l3_drops", "generic": true } ] } } Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David Ahern --- devlink/devlink.c | 11 ++++++++++- man/man8/devlink-trap.8 | 12 ++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 9380792a..6405d4be 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -1507,6 +1507,11 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required, if (err) return err; o_found |= DL_OPT_TRAP_POLICER_ID; + } else if (dl_argv_match(dl, "nopolicer") && + (o_all & DL_OPT_TRAP_POLICER_ID)) { + dl_arg_inc(dl); + opts->trap_policer_id = 0; + o_found |= DL_OPT_TRAP_POLICER_ID; } else if (dl_argv_match(dl, "rate") && (o_all & DL_OPT_TRAP_POLICER_RATE)) { dl_arg_inc(dl); @@ -7068,6 +7073,7 @@ static void cmd_trap_help(void) pr_err("Usage: devlink trap set DEV trap TRAP [ action { trap | drop } ]\n"); pr_err(" devlink trap show [ DEV trap TRAP ]\n"); pr_err(" devlink trap group set DEV group GROUP [ action { trap | drop } ]\n"); + pr_err(" [ policer POLICER ] [ nopolicer ]\n"); pr_err(" devlink trap group show [ DEV group GROUP ]\n"); pr_err(" devlink trap policer set DEV policer POLICER [ rate RATE ] [ burst BURST ]\n"); pr_err(" devlink trap policer show DEV policer POLICER\n"); @@ -7125,6 +7131,9 @@ static void pr_out_trap_group(struct dl *dl, struct nlattr **tb, bool array) print_string(PRINT_ANY, "name", "name %s", mnl_attr_get_str(tb[DEVLINK_ATTR_TRAP_GROUP_NAME])); print_bool(PRINT_ANY, "generic", " generic %s", !!tb[DEVLINK_ATTR_TRAP_GENERIC]); + if (tb[DEVLINK_ATTR_TRAP_POLICER_ID]) + print_uint(PRINT_ANY, "policer", " policer %u", + mnl_attr_get_u32(tb[DEVLINK_ATTR_TRAP_POLICER_ID])); pr_out_stats(dl, tb[DEVLINK_ATTR_STATS]); pr_out_handle_end(dl); } @@ -7181,7 +7190,7 @@ static int cmd_trap_group_set(struct dl *dl) err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE | DL_OPT_TRAP_GROUP_NAME, - DL_OPT_TRAP_ACTION); + DL_OPT_TRAP_ACTION | DL_OPT_TRAP_POLICER_ID); if (err) return err; diff --git a/man/man8/devlink-trap.8 b/man/man8/devlink-trap.8 index 113eda4a..f01f8317 100644 --- a/man/man8/devlink-trap.8 +++ b/man/man8/devlink-trap.8 @@ -37,6 +37,10 @@ devlink-trap \- devlink trap configuration .ti -8 .BI "devlink trap group set " DEV " group " GROUP .RB "[ " action " { " trap " | " drop " } ]" +.br +.RB "[ " policer +.IB "POLICER " ] +.RB "[ " nopolicer " ]" .ti -8 .BI "devlink trap policer set " DEV " policer " POLICER @@ -109,6 +113,14 @@ packet trap action. The action is set for all the packet traps member in the trap group. The actions of non-drop traps cannot be changed and are thus skipped. +.TP +.BI policer " POLICER" +packet trap policer. The policer to bind to the packet trap group. + +.TP +.B nopolicer +Unbind packet trap policer from the packet trap group. + .SS devlink trap policer set - set attributes of packet trap policer .PP From 0141ca64b83c4c5e1b739ef73d159d77bef93405 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 31 Mar 2020 11:42:53 +0300 Subject: [PATCH 20/21] bash-completion: devlink: Extend bash-completion for new commands Extend bash-completion for two new commands: devlink trap policer set DEV policer POLICER [ rate RATE ] [ burst BURST ] devlink trap policer show DEV policer POLICER And for "policer" / "nopolicer" parameters in existing command: devlink trap group set DEV group GROUP [ action { trap | drop } ] [ policer POLICER ] [ nopolicer ] Signed-off-by: Ido Schimmel Signed-off-by: David Ahern --- bash-completion/devlink | 131 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 1 deletion(-) diff --git a/bash-completion/devlink b/bash-completion/devlink index 45fba75c..45ca1fe6 100644 --- a/bash-completion/devlink +++ b/bash-completion/devlink @@ -62,6 +62,11 @@ _devlink_direct_complete() value=$(devlink -j trap group show 2>/dev/null \ | jq ".trap_group[\"$dev\"][].name") ;; + trap_policer) + dev=${words[4]} + value=$(devlink -j trap policer show 2>/dev/null \ + | jq ".trap_policer[\"$dev\"][].policer") + ;; health_dev) value=$(devlink -j health show 2>/dev/null | jq '.health' \ | jq 'keys[]') @@ -678,6 +683,53 @@ _devlink_trap_set_action() esac } +# Completion for devlink trap group set +_devlink_trap_group_set() +{ + local -A settings=( + [action]=notseen + [policer]=notseen + [nopolicer]=notseen + ) + + if [[ $cword -eq 7 ]]; then + COMPREPLY=( $( compgen -W "action policer nopolicer" -- "$cur" ) ) + fi + + # Mark seen settings + local word + for word in "${words[@]:7:${#words[@]}-1}"; do + if [[ -n $word ]]; then + if [[ "${settings[$word]}" ]]; then + settings[$word]=seen + fi + fi + done + + case $prev in + action) + COMPREPLY=( $( compgen -W "trap drop" -- "$cur" ) ) + return + ;; + policer) + _devlink_direct_complete "trap_policer" + return + ;; + esac + + local -a comp_words=() + + # Add settings not seen to completions + local setting + for setting in "${!settings[@]}"; do + if [ "${settings[$setting]}" = notseen ]; then + comp_words+=( "$setting" ) + fi + done + + COMPREPLY=( $( compgen -W "${comp_words[*]}" -- "$cur" ) ) +} + # Completion for devlink trap group _devlink_trap_group() { @@ -701,7 +753,80 @@ _devlink_trap_group() esac if [[ ${words[3]} == "set" ]]; then - _devlink_trap_set_action 1 + _devlink_trap_group_set + fi +} + +# Completion for devlink trap policer set +_devlink_trap_policer_set() +{ + local -A settings=( + [rate]=notseen + [burst]=notseen + ) + + if [[ $cword -eq 7 ]]; then + COMPREPLY=( $( compgen -W "rate burst" -- "$cur" ) ) + fi + + # Mark seen settings + local word + for word in "${words[@]:7:${#words[@]}-1}"; do + if [[ -n $word ]]; then + if [[ "${settings[$word]}" ]]; then + settings[$word]=seen + fi + fi + done + + case $prev in + rate) + # Integer argument + return + ;; + burst) + # Integer argument + return + ;; + esac + + local -a comp_words=() + + # Add settings not seen to completions + local setting + for setting in "${!settings[@]}"; do + if [ "${settings[$setting]}" = notseen ]; then + comp_words+=( "$setting" ) + fi + done + + COMPREPLY=( $( compgen -W "${comp_words[*]}" -- "$cur" ) ) +} + +# Completion for devlink trap policer +_devlink_trap_policer() +{ + case $cword in + 3) + COMPREPLY=( $( compgen -W "set show" -- "$cur" ) ) + return + ;; + 4) + _devlink_direct_complete "dev" + return + ;; + 5) + COMPREPLY=( $( compgen -W "policer" -- "$cur" ) ) + return + ;; + 6) + _devlink_direct_complete "trap_policer" + return + ;; + esac + + if [[ ${words[3]} == "set" ]]; then + _devlink_trap_policer_set fi } @@ -731,6 +856,10 @@ _devlink_trap() _devlink_trap_$command return ;; + policer) + _devlink_trap_$command + return + ;; esac } From 0827cc53f346e70c30300824517e44a57a2170bd Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 31 Mar 2020 10:50:31 +0200 Subject: [PATCH 21/21] tc: show used HW stats types If kernel provides the attribute, show the used HW stats types. Example: $ tc filter add dev enp3s0np1 ingress proto ip handle 1 pref 1 flower dst_ip 192.168.1.1 action drop $ tc -s filter show dev enp3s0np1 ingress filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 eth_type ipv4 dst_ip 192.168.1.1 in_hw in_hw_count 2 action order 1: gact action drop random type none pass val 0 index 1 ref 1 bind 1 installed 10 sec used 10 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 used_hw_stats immediate <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Signed-off-by: Jiri Pirko Signed-off-by: David Ahern --- tc/m_action.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tc/m_action.c b/tc/m_action.c index 2c4b5df6..108329db 100644 --- a/tc/m_action.c +++ b/tc/m_action.c @@ -159,7 +159,7 @@ static const struct hw_stats_item { { "disabled", 0 }, /* no bit set */ }; -static void print_hw_stats(const struct rtattr *arg) +static void print_hw_stats(const struct rtattr *arg, bool print_used) { struct nla_bitfield32 *hw_stats_bf = RTA_DATA(arg); __u8 hw_stats; @@ -167,7 +167,7 @@ static void print_hw_stats(const struct rtattr *arg) hw_stats = hw_stats_bf->value & hw_stats_bf->selector; print_string(PRINT_FP, NULL, "\t", NULL); - open_json_array(PRINT_ANY, "hw_stats"); + open_json_array(PRINT_ANY, print_used ? "used_hw_stats" : "hw_stats"); for (i = 0; i < ARRAY_SIZE(hw_stats_items); i++) { const struct hw_stats_item *item; @@ -177,6 +177,7 @@ static void print_hw_stats(const struct rtattr *arg) print_string(PRINT_ANY, NULL, " %s", item->str); } close_json_array(PRINT_JSON, NULL); + print_string(PRINT_FP, NULL, "%s", _SL_); } static int parse_hw_stats(const char *str, struct nlmsghdr *n) @@ -399,7 +400,10 @@ static int tc_print_one_action(FILE *f, struct rtattr *arg) print_string(PRINT_FP, NULL, "%s", _SL_); } if (tb[TCA_ACT_HW_STATS]) - print_hw_stats(tb[TCA_ACT_HW_STATS]); + print_hw_stats(tb[TCA_ACT_HW_STATS], false); + + if (tb[TCA_ACT_USED_HW_STATS]) + print_hw_stats(tb[TCA_ACT_USED_HW_STATS], true); return 0; }