From 83d4d61bc90e38eaf483f8c6f94f3f7059f07bc9 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Mon, 5 Jul 2021 14:43:07 +0200 Subject: [PATCH 01/10] libbpf: fix attach of prog with multiple sections When BPF programs which consists of multiple executable sections via iproute2+libbpf (configured with LIBBPF_FORCE=on), we noticed that a wrong section can be attached to a device. E.g.: # tc qdisc replace dev lxc_health clsact # tc filter replace dev lxc_health ingress prio 1 \ handle 1 bpf da obj bpf_lxc.o sec from-container # tc filter show dev lxc_health ingress filter protocol all pref 1 bpf chain 0 filter protocol all pref 1 bpf chain 0 handle 0x1 bpf_lxc.o:[__send_drop_notify] <-- WRONG SECTION direct-action not_in_hw id 38 tag 7d891814eda6809e jited After taking a closer look into load_bpf_object() in lib/bpf_libbpf.c, we noticed that the filter used in the program iterator does not check whether a program section name matches a requested section name (cfg->section). This can lead to a wrong prog FD being used to attach the program. Fixes: 6d61a2b55799 ("lib: add libbpf support") Signed-off-by: Martynas Pumputis Acked-by: Hangbin Liu Signed-off-by: Stephen Hemminger --- lib/bpf_libbpf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/bpf_libbpf.c b/lib/bpf_libbpf.c index 864f8c35..dbec2cb5 100644 --- a/lib/bpf_libbpf.c +++ b/lib/bpf_libbpf.c @@ -268,10 +268,12 @@ static int load_bpf_object(struct bpf_cfg_in *cfg) } bpf_object__for_each_program(p, obj) { + bool prog_to_attach = !prog && cfg->section && + !strcmp(get_bpf_program__section_name(p), cfg->section); + /* Only load the programs that will either be subsequently * attached or inserted into a tail call map */ - if (find_legacy_tail_calls(p, obj) < 0 && cfg->section && - strcmp(get_bpf_program__section_name(p), cfg->section)) { + if (find_legacy_tail_calls(p, obj) < 0 && !prog_to_attach) { ret = bpf_program__set_autoload(p, false); if (ret) return -EINVAL; @@ -280,7 +282,8 @@ static int load_bpf_object(struct bpf_cfg_in *cfg) bpf_program__set_type(p, cfg->type); bpf_program__set_ifindex(p, cfg->ifindex); - if (!prog) + + if (prog_to_attach) prog = p; } From 8f85d085feae11324bed3bd52069f961d8bf1d0c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 6 Jul 2021 17:07:24 -0700 Subject: [PATCH 02/10] uapi: update kernel headers from 5.14-rc1 Signed-off-by: Stephen Hemminger --- include/uapi/linux/sctp.h | 8 ++++++++ include/uapi/linux/snmp.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 4831d333..53fdfafc 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -141,6 +141,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE 131 #define SCTP_EXPOSE_PF_STATE SCTP_EXPOSE_POTENTIALLY_FAILED_STATE #define SCTP_REMOTE_UDP_ENCAPS_PORT 132 +#define SCTP_PLPMTUD_PROBE_INTERVAL 133 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -1207,4 +1208,11 @@ enum sctp_sched_type { SCTP_SS_MAX = SCTP_SS_RR }; +/* Probe Interval socket option */ +struct sctp_probeinterval { + sctp_assoc_t spi_assoc_id; + struct sockaddr_storage spi_address; + __u32 spi_interval; +}; + #endif /* _SCTP_H */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 26fc60ce..904909d0 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -290,6 +290,8 @@ enum LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ + LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ + LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ __LINUX_MIB_MAX }; From 459ce6e3d792afe3fe8f7f4b78baa68bb72b8439 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Wed, 7 Jul 2021 15:22:01 +0300 Subject: [PATCH 03/10] ip route: ignore ENOENT during save if RT_TABLE_MAIN is being dumped We started to use in-kernel filtering feature which allows to get only needed tables (see iproute_dump_filter()). From the kernel side it's implemented in net/ipv4/fib_frontend.c (inet_dump_fib), net/ipv6/ip6_fib.c (inet6_dump_fib). The problem here is that behaviour of "ip route save" was changed after c7e6371bc ("ip route: Add protocol, table id and device to dump request"). If filters are used, then kernel returns ENOENT error if requested table is absent, but in newly created net namespace even RT_TABLE_MAIN table doesn't exist. It is really allocated, for instance, after issuing "ip l set lo up". Reproducer is fairly simple: $ unshare -n ip route save > dump Error: ipv4: FIB table does not exist. Dump terminated Expected result here is to get empty dump file (as it was before this change). v2: reworked, so, now it takes into account NLMSGERR_ATTR_MSG (see nl_dump_ext_ack_done() function). We want to suppress error messages in stderr about absent FIB table from kernel too. v3: reworked to make code clearer. Introduced rtnl_suppressed_errors(), rtnl_suppress_error() helpers. User may suppress up to 3 errors (may be easily extended by changing SUPPRESS_ERRORS_INIT macro). v4: reworked, rtnl_dump_filter_errhndlr() was introduced. Thanks to Stephen Hemminger for comments and suggestions v5: space fixes, commit message reformat, empty initializers Fixes: c7e6371bc ("ip route: Add protocol, table id and device to dump request") Cc: David Ahern Cc: Stephen Hemminger Cc: Andrei Vagin Cc: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Stephen Hemminger --- include/libnetlink.h | 32 ++++++++++++++++++++++++++ ip/iproute.c | 15 +++++++++++- lib/libnetlink.c | 54 +++++++++++++++++++++++++++++++++++--------- 3 files changed, 89 insertions(+), 12 deletions(-) diff --git a/include/libnetlink.h b/include/libnetlink.h index 6bff6bae..9e4cc101 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -109,6 +109,27 @@ struct rtnl_ctrl_data { typedef int (*rtnl_filter_t)(struct nlmsghdr *n, void *); +/** + * rtnl error handler called from + * rtnl_dump_done() + * rtnl_dump_error() + * + * Return value is a bitmask of the following values: + * RTNL_LET_NLERR + * error handled as usual + * RTNL_SUPPRESS_NLMSG_DONE_NLERR + * error in nlmsg_type == NLMSG_DONE will be suppressed + * RTNL_SUPPRESS_NLMSG_ERROR_NLERR + * error in nlmsg_type == NLMSG_ERROR will be suppressed + * and nlmsg will be skipped + * RTNL_SUPPRESS_NLERR - suppress error in both previous cases + */ +#define RTNL_LET_NLERR 0x01 +#define RTNL_SUPPRESS_NLMSG_DONE_NLERR 0x02 +#define RTNL_SUPPRESS_NLMSG_ERROR_NLERR 0x04 +#define RTNL_SUPPRESS_NLERR 0x06 +typedef int (*rtnl_err_hndlr_t)(struct nlmsghdr *n, void *); + typedef int (*rtnl_listen_filter_t)(struct rtnl_ctrl_data *, struct nlmsghdr *n, void *); @@ -118,6 +139,8 @@ typedef int (*nl_ext_ack_fn_t)(const char *errmsg, uint32_t off, struct rtnl_dump_filter_arg { rtnl_filter_t filter; void *arg1; + rtnl_err_hndlr_t errhndlr; + void *arg2; __u16 nc_flags; }; @@ -126,6 +149,15 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth, void *arg, __u16 nc_flags); #define rtnl_dump_filter(rth, filter, arg) \ rtnl_dump_filter_nc(rth, filter, arg, 0) +int rtnl_dump_filter_errhndlr_nc(struct rtnl_handle *rth, + rtnl_filter_t filter, + void *arg1, + rtnl_err_hndlr_t errhndlr, + void *arg2, + __u16 nc_flags); +#define rtnl_dump_filter_errhndlr(rth, filter, farg, errhndlr, earg) \ + rtnl_dump_filter_errhndlr_nc(rth, filter, farg, errhndlr, earg, 0) + int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, struct nlmsghdr **answer) __attribute__((warn_unused_result)); diff --git a/ip/iproute.c b/ip/iproute.c index bdeb9644..1ccf51a5 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -1734,6 +1734,18 @@ static int iproute_flush(int family, rtnl_filter_t filter_fn) } } +static int save_route_errhndlr(struct nlmsghdr *n, void *arg) +{ + int err = -*(int *)NLMSG_DATA(n); + + if (n->nlmsg_type == NLMSG_DONE && + filter.tb == RT_TABLE_MAIN && + err == ENOENT) + return RTNL_SUPPRESS_NLMSG_DONE_NLERR; + + return RTNL_LET_NLERR; +} + static int iproute_list_flush_or_save(int argc, char **argv, int action) { int dump_family = preferred_family; @@ -1946,7 +1958,8 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action) new_json_obj(json); - if (rtnl_dump_filter(&rth, filter_fn, stdout) < 0) { + if (rtnl_dump_filter_errhndlr(&rth, filter_fn, stdout, + save_route_errhndlr, NULL) < 0) { fprintf(stderr, "Dump terminated\n"); return -2; } diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 2f2cc1fe..b92f10e1 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -718,7 +718,8 @@ int rtnl_dump_request_n(struct rtnl_handle *rth, struct nlmsghdr *n) return sendmsg(rth->fd, &msg, 0); } -static int rtnl_dump_done(struct nlmsghdr *h) +static int rtnl_dump_done(struct nlmsghdr *h, + const struct rtnl_dump_filter_arg *a) { int len = *(int *)NLMSG_DATA(h); @@ -728,11 +729,15 @@ static int rtnl_dump_done(struct nlmsghdr *h) } if (len < 0) { + errno = -len; + + if (a->errhndlr(h, a->arg2) & RTNL_SUPPRESS_NLMSG_DONE_NLERR) + return 0; + /* check for any messages returned from kernel */ if (nl_dump_ext_ack_done(h, len)) return len; - errno = -len; switch (errno) { case ENOENT: case EOPNOTSUPP: @@ -753,8 +758,9 @@ static int rtnl_dump_done(struct nlmsghdr *h) return 0; } -static void rtnl_dump_error(const struct rtnl_handle *rth, - struct nlmsghdr *h) +static int rtnl_dump_error(const struct rtnl_handle *rth, + struct nlmsghdr *h, + const struct rtnl_dump_filter_arg *a) { if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) { @@ -766,11 +772,16 @@ static void rtnl_dump_error(const struct rtnl_handle *rth, if (rth->proto == NETLINK_SOCK_DIAG && (errno == ENOENT || errno == EOPNOTSUPP)) - return; + return -1; + + if (a->errhndlr(h, a->arg2) & RTNL_SUPPRESS_NLMSG_ERROR_NLERR) + return 0; if (!(rth->flags & RTNL_HANDLE_F_SUPPRESS_NLERR)) perror("RTNETLINK answers"); } + + return -1; } static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags) @@ -879,7 +890,7 @@ static int rtnl_dump_filter_l(struct rtnl_handle *rth, dump_intr = 1; if (h->nlmsg_type == NLMSG_DONE) { - err = rtnl_dump_done(h); + err = rtnl_dump_done(h, a); if (err < 0) { free(buf); return -1; @@ -890,9 +901,13 @@ static int rtnl_dump_filter_l(struct rtnl_handle *rth, } if (h->nlmsg_type == NLMSG_ERROR) { - rtnl_dump_error(rth, h); - free(buf); - return -1; + err = rtnl_dump_error(rth, h, a); + if (err < 0) { + free(buf); + return -1; + } + + goto skip_it; } if (!rth->dump_fp) { @@ -932,8 +947,25 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth, void *arg1, __u16 nc_flags) { const struct rtnl_dump_filter_arg a[2] = { - { .filter = filter, .arg1 = arg1, .nc_flags = nc_flags, }, - { .filter = NULL, .arg1 = NULL, .nc_flags = 0, }, + { .filter = filter, .arg1 = arg1, + .errhndlr = NULL, .arg2 = NULL, .nc_flags = nc_flags, }, + { }, + }; + + return rtnl_dump_filter_l(rth, a); +} + +int rtnl_dump_filter_errhndlr_nc(struct rtnl_handle *rth, + rtnl_filter_t filter, + void *arg1, + rtnl_err_hndlr_t errhndlr, + void *arg2, + __u16 nc_flags) +{ + const struct rtnl_dump_filter_arg a[2] = { + { .filter = filter, .arg1 = arg1, + .errhndlr = errhndlr, .arg2 = arg2, .nc_flags = nc_flags, }, + { }, }; return rtnl_dump_filter_l(rth, a); From 0015ada629a70be0139ee9a0e4d3fb6512f90f56 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Jul 2021 07:39:07 -0700 Subject: [PATCH 04/10] libnetlink: cosmetic changes Don't initialize arguments that are NULL, and format initialization in a more logical way. Signed-off-by: Stephen Hemminger --- lib/libnetlink.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/lib/libnetlink.c b/lib/libnetlink.c index b92f10e1..5f062c7d 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -943,12 +943,14 @@ skip_it: } int rtnl_dump_filter_nc(struct rtnl_handle *rth, - rtnl_filter_t filter, - void *arg1, __u16 nc_flags) + rtnl_filter_t filter, + void *arg1, __u16 nc_flags) { - const struct rtnl_dump_filter_arg a[2] = { - { .filter = filter, .arg1 = arg1, - .errhndlr = NULL, .arg2 = NULL, .nc_flags = nc_flags, }, + const struct rtnl_dump_filter_arg a[] = { + { + .filter = filter, .arg1 = arg1, + .nc_flags = nc_flags, + }, { }, }; @@ -962,9 +964,12 @@ int rtnl_dump_filter_errhndlr_nc(struct rtnl_handle *rth, void *arg2, __u16 nc_flags) { - const struct rtnl_dump_filter_arg a[2] = { - { .filter = filter, .arg1 = arg1, - .errhndlr = errhndlr, .arg2 = arg2, .nc_flags = nc_flags, }, + const struct rtnl_dump_filter_arg a[] = { + { + .filter = filter, .arg1 = arg1, + .errhndlr = errhndlr, .arg2 = arg2, + .nc_flags = nc_flags, + }, { }, }; From 115e9870358ba08ec8921ff8f459d379522f0368 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Sun, 11 Jul 2021 14:15:46 +0300 Subject: [PATCH 05/10] libnetlink: check error handler is present before a call Fix nullptr dereference of errhndlr from rtnl_dump_filter_arg struct in rtnl_dump_done and rtnl_dump_error functions. Fixes: 459ce6e3d792 ("ip route: ignore ENOENT during save if RT_TABLE_MAIN is being dumped") Cc: Stephen Hemminger Cc: Roi Dayan Cc: Alexander Mikhalitsyn Reported-by: Roi Dayan Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Roi Dayan Signed-off-by: Stephen Hemminger --- lib/libnetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 5f062c7d..6836c21c 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -731,7 +731,7 @@ static int rtnl_dump_done(struct nlmsghdr *h, if (len < 0) { errno = -len; - if (a->errhndlr(h, a->arg2) & RTNL_SUPPRESS_NLMSG_DONE_NLERR) + if (a->errhndlr && (a->errhndlr(h, a->arg2) & RTNL_SUPPRESS_NLMSG_DONE_NLERR)) return 0; /* check for any messages returned from kernel */ @@ -774,7 +774,7 @@ static int rtnl_dump_error(const struct rtnl_handle *rth, errno == EOPNOTSUPP)) return -1; - if (a->errhndlr(h, a->arg2) & RTNL_SUPPRESS_NLMSG_ERROR_NLERR) + if (a->errhndlr && (a->errhndlr(h, a->arg2) & RTNL_SUPPRESS_NLMSG_ERROR_NLERR)) return 0; if (!(rth->flags & RTNL_HANDLE_F_SUPPRESS_NLERR)) From 1f2c908d53cef06878a1b65179ef79ce5fdf75df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Sch=C3=BCrmann?= Date: Thu, 15 Jul 2021 18:17:36 +0200 Subject: [PATCH 06/10] man8/ip-tunnel.8: fix typo, 'encaplim' is not a valid option Signed-off-by: Stephen Hemminger --- man/man8/ip-tunnel.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/ip-tunnel.8 b/man/man8/ip-tunnel.8 index 9a510af0..57e030dd 100644 --- a/man/man8/ip-tunnel.8 +++ b/man/man8/ip-tunnel.8 @@ -235,7 +235,7 @@ flag is equivalent to the combination .B It doesn't work. Don't use it. .TP -.BI encaplim " ELIM" +.BI encaplimit " ELIM" .RB ( " only IPv6 tunnels " ) set a fixed encapsulation limit. Default is 4. From 7a7e9ed98fd93eae01ada48ef46909401f148a47 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 17 Jul 2021 11:12:47 -0700 Subject: [PATCH 07/10] uapi: headers update Signed-off-by: Stephen Hemminger --- include/uapi/linux/magic.h | 1 + include/uapi/linux/virtio_ids.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f3956fc1..35687dcb 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -97,5 +97,6 @@ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ #define Z3FOLD_MAGIC 0x33 #define PPC_CMM_MAGIC 0xc7571590 +#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 4fe842c3..70a8057a 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -57,4 +57,16 @@ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ +/* + * Virtio Transitional IDs + */ + +#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ +#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ +#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ +#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ +#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ +#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ +#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ + #endif /* _LINUX_VIRTIO_IDS_H */ From f760bff328316244b510986cf0ed7ee1c3c689ef Mon Sep 17 00:00:00 2001 From: Lahav Schlesinger Date: Thu, 15 Jul 2021 17:38:56 +0300 Subject: [PATCH 08/10] ipmonitor: Fix recvmsg with ancillary data A successful call to recvmsg() causes msg.msg_controllen to contain the length of the received ancillary data. However, the current code in the 'ip' utility doesn't reset this value after each recvmsg(). This means that if a call to recvmsg() doesn't have ancillary data, then 'msg.msg_controllen' will be set to 0, causing future recvmsg() which do contain ancillary data to get MSG_CTRUNC set in msg.msg_flags. This fixes 'ip monitor' running with the all-nsid option - With this option the kernel passes the nsid as ancillary data. If while 'ip monitor' is running an even on the current netns is received, then no ancillary data will be sent, causing 'msg.msg_controllen' to be set to 0, which causes 'ip monitor' to indefinitely print "[nsid current]" instead of the real nsid. Fixes: 449b824ad196 ("ipmonitor: allows to monitor in several netns") Cc: Nicolas Dichtel Signed-off-by: Lahav Schlesinger Acked-by: Nicolas Dichtel Signed-off-by: Stephen Hemminger --- lib/libnetlink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 6836c21c..7e977a67 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -1175,16 +1175,16 @@ int rtnl_listen(struct rtnl_handle *rtnl, char buf[16384]; char cmsgbuf[BUFSIZ]; - if (rtnl->flags & RTNL_HANDLE_F_LISTEN_ALL_NSID) { - msg.msg_control = &cmsgbuf; - msg.msg_controllen = sizeof(cmsgbuf); - } - iov.iov_base = buf; while (1) { struct rtnl_ctrl_data ctrl; struct cmsghdr *cmsg; + if (rtnl->flags & RTNL_HANDLE_F_LISTEN_ALL_NSID) { + msg.msg_control = &cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + } + iov.iov_len = sizeof(buf); status = recvmsg(rtnl->fd, &msg, 0); From 71d36000dc9ce8397fc45b680e0c0340df5a28e5 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 12 Jul 2021 15:26:53 +0300 Subject: [PATCH 09/10] police: Fix normal output back to what it was With the json support fix the normal output was changed. set it back to what it was. Print overhead with print_size(). Print newline before ref. Fixes: 0d5cf51e0d6c ("police: Add support for json output") Signed-off-by: Roi Dayan Signed-off-by: Stephen Hemminger --- tc/m_police.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tc/m_police.c b/tc/m_police.c index 2594c089..f38ab90a 100644 --- a/tc/m_police.c +++ b/tc/m_police.c @@ -278,7 +278,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) __u64 rate64, prate64; __u64 pps64, ppsburst64; - print_string(PRINT_ANY, "kind", "%s", "police"); + print_string(PRINT_JSON, "kind", "%s", "police"); if (arg == NULL) return 0; @@ -301,7 +301,8 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) RTA_PAYLOAD(tb[TCA_POLICE_RATE64]) >= sizeof(rate64)) rate64 = rta_getattr_u64(tb[TCA_POLICE_RATE64]); - print_uint(PRINT_ANY, "index", "\t index %u ", p->index); + print_hex(PRINT_FP, NULL, " police 0x%x ", p->index); + print_uint(PRINT_JSON, "index", NULL, p->index); tc_print_rate(PRINT_FP, NULL, "rate %s ", rate64); buffer = tc_calc_xmitsize(rate64, p->burst); print_size(PRINT_FP, NULL, "burst %s ", buffer); @@ -342,12 +343,13 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) print_string(PRINT_FP, NULL, " ", NULL); } - print_uint(PRINT_ANY, "overhead", "overhead %u ", p->rate.overhead); + print_size(PRINT_ANY, "overhead", "overhead %s ", p->rate.overhead); linklayer = (p->rate.linklayer & TC_LINKLAYER_MASK); if (linklayer > TC_LINKLAYER_ETHERNET || show_details) print_string(PRINT_ANY, "linklayer", "linklayer %s ", sprint_linklayer(linklayer, b2)); - print_int(PRINT_ANY, "ref", "ref %d ", p->refcnt); + print_nl(); + print_int(PRINT_ANY, "ref", "\tref %d ", p->refcnt); print_int(PRINT_ANY, "bind", "bind %d ", p->bindcnt); if (show_stats) { if (tb[TCA_POLICE_TM]) { From c06d313d86c1acb8dd72589816301853ff5a4ac4 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 20 Jul 2021 12:21:45 -0700 Subject: [PATCH 10/10] tc/skbmod: Remove misinformation about the swap action Currently man 8 tc-skbmod says that "...the swap action will occur after any smac/dmac substitutions are executed, if they are present." This is false. In fact, trying to "set" and "swap" in a single skbmod command causes the "set" part to be completely ignored. As an example: $ tc filter add dev eth0 parent 1: protocol ip prio 10 \ matchall action skbmod \ set dmac AA:AA:AA:AA:AA:AA smac BB:BB:BB:BB:BB:BB \ swap mac The above command simply does a "swap", without setting DMAC or SMAC to AA's or BB's. The root cause of this is in the kernel, see net/sched/act_skbmod.c:tcf_skbmod_init(): parm = nla_data(tb[TCA_SKBMOD_PARMS]); index = parm->index; if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; ^^^^^^^^^^^^^^^^^^^^^^^^^^ Doing a "=" instead of "|=" clears all other "set" flags when doing a "swap". Discourage using "set" and "swap" in the same command by documenting it as undefined behavior, and update the "SYNOPSIS" section as well as tc -help text accordingly. If one really needs to e.g. "set" DMAC to all AA's then "swap" DMAC and SMAC, one should do two separate commands and "pipe" them together. Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Signed-off-by: Stephen Hemminger --- man/man8/tc-skbmod.8 | 24 +++++++++++++----------- tc/m_skbmod.c | 5 ++--- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/man/man8/tc-skbmod.8 b/man/man8/tc-skbmod.8 index eb3c38fa..76512311 100644 --- a/man/man8/tc-skbmod.8 +++ b/man/man8/tc-skbmod.8 @@ -5,12 +5,12 @@ skbmod - user-friendly packet editor action .SH SYNOPSIS .in +8 .ti -8 -.BR tc " ... " "action skbmod " "{ [ " "set " -.IR SETTABLE " ] [ " +.BR tc " ... " "action skbmod " "{ " "set " +.IR SETTABLE " | " .BI swap " SWAPPABLE" -.RI " ] [ " CONTROL " ] [ " +.RI " } [ " CONTROL " ] [ " .BI index " INDEX " -] } +] .ti -8 .IR SETTABLE " := " @@ -25,6 +25,7 @@ skbmod - user-friendly packet editor action .IR SWAPPABLE " := " .B mac .ti -8 + .IR CONTROL " := {" .BR reclassify " | " pipe " | " drop " | " shot " | " continue " | " pass " }" .SH DESCRIPTION @@ -48,10 +49,7 @@ Change the source mac to the specified address. Change the ethertype to the specified value. .TP .BI mac -Used to swap mac addresses. The -.B swap mac -directive is performed -after any outstanding D/SMAC changes. +Used to swap mac addresses. .TP .I CONTROL The following keywords allow to control how the tree of qdisc, classes, @@ -128,9 +126,13 @@ tc filter add dev eth3 parent 1: protocol ip prio 10 \\ .EE .RE -As mentioned above, the swap action will occur after any -.B " smac/dmac " -substitutions are executed, if they are present. +However, trying to +.B set +and +.B swap +in a single +.B skbmod +command will cause undefined behavior. .SH SEE ALSO .BR tc (8), diff --git a/tc/m_skbmod.c b/tc/m_skbmod.c index e13d3f16..3fe30651 100644 --- a/tc/m_skbmod.c +++ b/tc/m_skbmod.c @@ -28,10 +28,9 @@ static void skbmod_explain(void) { fprintf(stderr, - "Usage:... skbmod {[set ] [swap ]} [CONTROL] [index INDEX]\n" + "Usage:... skbmod { set | swap } [CONTROL] [index INDEX]\n" "where SETTABLE is: [dmac DMAC] [smac SMAC] [etype ETYPE]\n" - "where SWAPABLE is: \"mac\" to swap mac addresses\n" - "note: \"swap mac\" is done after any outstanding D/SMAC change\n" + "where SWAPPABLE is: \"mac\" to swap mac addresses\n" "\tDMAC := 6 byte Destination MAC address\n" "\tSMAC := optional 6 byte Source MAC address\n" "\tETYPE := optional 16 bit ethertype\n"