From 96303c25eee69596877a186a6c179559b9d0f947 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 12 Mar 2018 13:58:17 -0700 Subject: [PATCH 01/12] Revert "iproute: "list/flush/save default" selected all of the routes" This reverts commit 9135c4d6037ff9f1818507bac0049fc44db8c3d2. Debian maintainer found that basic command: # ip route flush all No longer worked as expected which breaks user scripts and expectations. It no longer flushed all IPv4 routes. Reported-by: Luca Boccassi Signed-off-by: Stephen Hemminger --- ip/iproute.c | 65 +++++++++++++++------------------------------------- lib/utils.c | 13 +++++++++++ 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/ip/iproute.c b/ip/iproute.c index bf886fda..32c93ed5 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -191,42 +191,20 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) return 0; if ((filter.tos^r->rtm_tos)&filter.tosmask) return 0; - if (filter.rdst.family) { - if (r->rtm_family != filter.rdst.family || - filter.rdst.bitlen > r->rtm_dst_len) - return 0; - } else if (filter.rdst.flags & PREFIXLEN_SPECIFIED) { - if (filter.rdst.bitlen > r->rtm_dst_len) - return 0; - } - if (filter.mdst.family) { - if (r->rtm_family != filter.mdst.family || - (filter.mdst.bitlen >= 0 && - filter.mdst.bitlen < r->rtm_dst_len)) - return 0; - } else if (filter.mdst.flags & PREFIXLEN_SPECIFIED) { - if (filter.mdst.bitlen >= 0 && - filter.mdst.bitlen < r->rtm_dst_len) - return 0; - } - if (filter.rsrc.family) { - if (r->rtm_family != filter.rsrc.family || - filter.rsrc.bitlen > r->rtm_src_len) - return 0; - } else if (filter.rsrc.flags & PREFIXLEN_SPECIFIED) { - if (filter.rsrc.bitlen > r->rtm_src_len) - return 0; - } - if (filter.msrc.family) { - if (r->rtm_family != filter.msrc.family || - (filter.msrc.bitlen >= 0 && - filter.msrc.bitlen < r->rtm_src_len)) - return 0; - } else if (filter.msrc.flags & PREFIXLEN_SPECIFIED) { - if (filter.msrc.bitlen >= 0 && - filter.msrc.bitlen < r->rtm_src_len) - return 0; - } + if (filter.rdst.family && + (r->rtm_family != filter.rdst.family || filter.rdst.bitlen > r->rtm_dst_len)) + return 0; + if (filter.mdst.family && + (r->rtm_family != filter.mdst.family || + (filter.mdst.bitlen >= 0 && filter.mdst.bitlen < r->rtm_dst_len))) + return 0; + if (filter.rsrc.family && + (r->rtm_family != filter.rsrc.family || filter.rsrc.bitlen > r->rtm_src_len)) + return 0; + if (filter.msrc.family && + (r->rtm_family != filter.msrc.family || + (filter.msrc.bitlen >= 0 && filter.msrc.bitlen < r->rtm_src_len))) + return 0; if (filter.rvia.family) { int family = r->rtm_family; @@ -243,9 +221,7 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) if (tb[RTA_DST]) memcpy(&dst.data, RTA_DATA(tb[RTA_DST]), (r->rtm_dst_len+7)/8); - if (filter.rsrc.family || filter.msrc.family || - filter.rsrc.flags & PREFIXLEN_SPECIFIED || - filter.msrc.flags & PREFIXLEN_SPECIFIED) { + if (filter.rsrc.family || filter.msrc.family) { if (tb[RTA_SRC]) memcpy(&src.data, RTA_DATA(tb[RTA_SRC]), (r->rtm_src_len+7)/8); } @@ -265,18 +241,15 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) memcpy(&prefsrc.data, RTA_DATA(tb[RTA_PREFSRC]), host_len/8); } - if ((filter.rdst.family || filter.rdst.flags & PREFIXLEN_SPECIFIED) && - inet_addr_match(&dst, &filter.rdst, filter.rdst.bitlen)) + if (filter.rdst.family && inet_addr_match(&dst, &filter.rdst, filter.rdst.bitlen)) return 0; - if ((filter.mdst.family || filter.mdst.flags & PREFIXLEN_SPECIFIED) && + if (filter.mdst.family && filter.mdst.bitlen >= 0 && inet_addr_match(&dst, &filter.mdst, r->rtm_dst_len)) return 0; - if ((filter.rsrc.family || filter.rsrc.flags & PREFIXLEN_SPECIFIED) && - inet_addr_match(&src, &filter.rsrc, filter.rsrc.bitlen)) + if (filter.rsrc.family && inet_addr_match(&src, &filter.rsrc, filter.rsrc.bitlen)) return 0; - if ((filter.msrc.family || filter.msrc.flags & PREFIXLEN_SPECIFIED) && - filter.msrc.bitlen >= 0 && + if (filter.msrc.family && filter.msrc.bitlen >= 0 && inet_addr_match(&src, &filter.msrc, r->rtm_src_len)) return 0; diff --git a/lib/utils.c b/lib/utils.c index 379739d6..87b609f2 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -681,6 +681,19 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) char *slash; int err, bitlen, flags; + memset(dst, 0, sizeof(*dst)); + + if (strcmp(arg, "default") == 0 || + strcmp(arg, "any") == 0 || + strcmp(arg, "all") == 0) { + if ((family == AF_DECnet) || (family == AF_MPLS)) + return -1; + dst->family = family; + dst->bytelen = 0; + dst->bitlen = 0; + return 0; + } + slash = strchr(arg, '/'); if (slash) *slash = 0; From e9fa16583a1e78482265b195d4cbe7a67420f6df Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Tue, 13 Mar 2018 09:57:10 -0400 Subject: [PATCH 02/12] tc: print actual action for sample action Signed-off-by: Roman Mashak Signed-off-by: Stephen Hemminger --- tc/m_sample.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tc/m_sample.c b/tc/m_sample.c index ff5ee6bd..d88846c6 100644 --- a/tc/m_sample.c +++ b/tc/m_sample.c @@ -163,6 +163,8 @@ static int print_sample(struct action_util *au, FILE *f, struct rtattr *arg) fprintf(f, " trunc_size %d", rta_getattr_u32(tb[TCA_SAMPLE_TRUNC_SIZE])); + print_action_control(f, " ", p->action, ""); + fprintf(f, "\n\tindex %d ref %d bind %d", p->index, p->refcnt, p->bindcnt); From bf7d148803950e858471d9a54caae239ddac468a Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Tue, 13 Mar 2018 17:16:23 -0400 Subject: [PATCH 03/12] tc: use get_u32() in psample action to match types Signed-off-by: Roman Mashak Acked-by: Yotam Gigi Signed-off-by: Stephen Hemminger --- tc/m_sample.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tc/m_sample.c b/tc/m_sample.c index d88846c6..01763cb4 100644 --- a/tc/m_sample.c +++ b/tc/m_sample.c @@ -65,7 +65,7 @@ static int parse_sample(struct action_util *a, int *argc_p, char ***argv_p, while (argc > 0) { if (matches(*argv, "rate") == 0) { NEXT_ARG(); - if (get_unsigned(&rate, *argv, 10) != 0) { + if (get_u32(&rate, *argv, 10) != 0) { fprintf(stderr, "Illegal rate %s\n", *argv); usage(); return -1; @@ -73,7 +73,7 @@ static int parse_sample(struct action_util *a, int *argc_p, char ***argv_p, rate_set = true; } else if (matches(*argv, "group") == 0) { NEXT_ARG(); - if (get_unsigned(&group, *argv, 10) != 0) { + if (get_u32(&group, *argv, 10) != 0) { fprintf(stderr, "Illegal group num %s\n", *argv); usage(); @@ -82,7 +82,7 @@ static int parse_sample(struct action_util *a, int *argc_p, char ***argv_p, group_set = true; } else if (matches(*argv, "trunc") == 0) { NEXT_ARG(); - if (get_unsigned(&trunc, *argv, 10) != 0) { + if (get_u32(&trunc, *argv, 10) != 0) { fprintf(stderr, "Illegal truncation size %s\n", *argv); usage(); From b8d26199896784be3cdd13849d9ce7656e146ac1 Mon Sep 17 00:00:00 2001 From: Alexander Zubkov Date: Sun, 18 Mar 2018 17:50:25 +0100 Subject: [PATCH 04/12] treat "default" and "all"/"any" addresses differenty Debian maintainer found that basic command: # ip route flush all No longer worked as expected which breaks user scripts and expectations. It no longer flushed all IPv4 routes. Recently behavior of "default" prefix parameter was corrected. But at the same time behavior of "all"/"any" was altered too, because they were the same branch of the code. As those parameters mean different, they need to be treated differently in code too. This patch reflects the difference. Also after mentioned change, address parsing code was changed more and address family was set explicitly even for "all"/"any" addresses. And that broke matching conditions further. This patch fixes that too and returns AF_UNSPEC to "all"/"any" address. Now "default" is treated as top-level prefix (for example 0.0.0.0/0 in IPv4) and "all"/"any" always matches anything in exact, root and match modes. Reported-by: Luca Boccassi Signed-off-by: Alexander Zubkov --- lib/utils.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/utils.c b/lib/utils.c index 379739d6..eba4fa74 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -560,14 +560,23 @@ static int __get_addr_1(inet_prefix *addr, const char *name, int family) { memset(addr, 0, sizeof(*addr)); - if (strcmp(name, "default") == 0 || - strcmp(name, "all") == 0 || - strcmp(name, "any") == 0) { + if (strcmp(name, "default") == 0) { if ((family == AF_DECnet) || (family == AF_MPLS)) return -1; addr->family = (family != AF_UNSPEC) ? family : AF_INET; addr->bytelen = af_byte_len(addr->family); addr->bitlen = -2; + addr->flags |= PREFIXLEN_SPECIFIED; + return 0; + } + + if (strcmp(name, "all") == 0 || + strcmp(name, "any") == 0) { + if ((family == AF_DECnet) || (family == AF_MPLS)) + return -1; + addr->family = AF_UNSPEC; + addr->bytelen = 0; + addr->bitlen = -2; return 0; } @@ -695,7 +704,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) bitlen = af_bit_len(dst->family); - flags = PREFIXLEN_SPECIFIED; + flags = 0; if (slash) { unsigned int plen; @@ -706,12 +715,11 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) if (plen > bitlen) return -1; + flags |= PREFIXLEN_SPECIFIED; bitlen = plen; } else { if (dst->bitlen == -2) bitlen = 0; - else - flags = 0; } dst->flags |= flags; From 7696f1097f79be2ce5984a8a16103fd17391cac2 Mon Sep 17 00:00:00 2001 From: Alexander Zubkov Date: Sun, 18 Mar 2018 17:50:25 +0100 Subject: [PATCH 05/12] treat "default" and "all"/"any" addresses differenty Debian maintainer found that basic command: # ip route flush all No longer worked as expected which breaks user scripts and expectations. It no longer flushed all IPv4 routes. Recently behavior of "default" prefix parameter was corrected. But at the same time behavior of "all"/"any" was altered too, because they were the same branch of the code. As those parameters mean different, they need to be treated differently in code too. This patch reflects the difference. Also after mentioned change, address parsing code was changed more and address family was set explicitly even for "all"/"any" addresses. And that broke matching conditions further. This patch fixes that too and returns AF_UNSPEC to "all"/"any" address. Now "default" is treated as top-level prefix (for example 0.0.0.0/0 in IPv4) and "all"/"any" always matches anything in exact, root and match modes. Reported-by: Luca Boccassi Signed-off-by: Alexander Zubkov --- lib/utils.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/utils.c b/lib/utils.c index 87b609f2..4fe4ac1e 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -560,14 +560,23 @@ static int __get_addr_1(inet_prefix *addr, const char *name, int family) { memset(addr, 0, sizeof(*addr)); - if (strcmp(name, "default") == 0 || - strcmp(name, "all") == 0 || - strcmp(name, "any") == 0) { + if (strcmp(name, "default") == 0) { if ((family == AF_DECnet) || (family == AF_MPLS)) return -1; addr->family = (family != AF_UNSPEC) ? family : AF_INET; addr->bytelen = af_byte_len(addr->family); addr->bitlen = -2; + addr->flags |= PREFIXLEN_SPECIFIED; + return 0; + } + + if (strcmp(name, "all") == 0 || + strcmp(name, "any") == 0) { + if ((family == AF_DECnet) || (family == AF_MPLS)) + return -1; + addr->family = AF_UNSPEC; + addr->bytelen = 0; + addr->bitlen = -2; return 0; } @@ -708,7 +717,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) bitlen = af_bit_len(dst->family); - flags = PREFIXLEN_SPECIFIED; + flags = 0; if (slash) { unsigned int plen; @@ -719,12 +728,11 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) if (plen > bitlen) return -1; + flags |= PREFIXLEN_SPECIFIED; bitlen = plen; } else { if (dst->bitlen == -2) bitlen = 0; - else - flags = 0; } dst->flags |= flags; From 990b1d90d7284a8f38648d0ac6348f94cc90ae19 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Tue, 20 Mar 2018 13:45:38 -0400 Subject: [PATCH 06/12] tc: print actual action for connmark action Signed-off-by: Roman Mashak --- tc/m_connmark.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tc/m_connmark.c b/tc/m_connmark.c index 37d71854..d5b140cb 100644 --- a/tc/m_connmark.c +++ b/tc/m_connmark.c @@ -121,7 +121,8 @@ static int print_connmark(struct action_util *au, FILE *f, struct rtattr *arg) ci = RTA_DATA(tb[TCA_CONNMARK_PARMS]); - fprintf(f, " connmark zone %d\n", ci->zone); + fprintf(f, " connmark zone %d", ci->zone); + print_action_control(f, " ", ci->action, "\n"); fprintf(f, "\t index %u ref %d bind %d", ci->index, ci->refcnt, ci->bindcnt); From 79f49f58aaefe11f677c8e072557b834a19f47f3 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 22 Mar 2018 15:00:38 +0100 Subject: [PATCH 07/12] man: ip-route.8: ssthresh parameter is NUMBER Synopsis section was inconsistent with regards to help text and later description of ssthresh parameter. Signed-off-by: Phil Sutter --- man/man8/ip-route.8.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index 487a8748..b28f3d2c 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -125,7 +125,7 @@ replace " } " .B cwnd .IR NUMBER " ] [ " .B ssthresh -.IR REALM " ] [ " +.IR NUMBER " ] [ " .B realms .IR REALM " ] [ " .B rto_min From 32ea3d54b474b25f1e959d7d43632d261afbb283 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 23 Mar 2018 09:37:05 +0100 Subject: [PATCH 08/12] ss: Fix rendering of continuous output (-E, --events) Roman Mashak reported that ss currently shows no output when it should continuously report information about terminated sockets (-E, --events switch). This happens because I missed this case in 691bd854bf4a ("ss: Buffer raw fields first, then render them as a table") and the rendering function is simply not called. To fix this, we need to: - call render() every time we need to display new socket events from generic_show_sock(), which is only used to follow events. Always call it even if specific socket display functions return errors to ensure we clean up buffers - get the screen width every time we have new events to display, thus factor out getting the screen width from main() into a function we'll call whenever we calculate columns width - reset the current field pointer after rendering, more output might come after render() is called Reported-by: Roman Mashak Fixes: 691bd854bf4a ("ss: Buffer raw fields first, then render them as a table") Signed-off-by: Stefano Brivio Tested-by: Roman Mashak Signed-off-by: Stephen Hemminger --- misc/ss.c | 61 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index e087bef7..6338820b 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -1106,15 +1106,33 @@ static void buf_free_all(void) buffer.head = NULL; } +/* Get current screen width, default to 80 columns if TIOCGWINSZ fails */ +static int render_screen_width(void) +{ + int width = 80; + + if (isatty(STDOUT_FILENO)) { + struct winsize w; + + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) { + if (w.ws_col > 0) + width = w.ws_col; + } + } + + return width; +} + /* Calculate column width from contents length. If columns don't fit on one * line, break them into the least possible amount of lines and keep them * aligned across lines. Available screen space is equally spread between fields * as additional spacing. */ -static void render_calc_width(int screen_width) +static void render_calc_width(void) { - int first, len = 0, linecols = 0; + int screen_width = render_screen_width(); struct column *c, *eol = columns - 1; + int first, len = 0, linecols = 0; /* First pass: set width for each column to measured content length */ for (first = 1, c = columns; c - columns < COL_MAX; c++) { @@ -1195,7 +1213,7 @@ newline: } /* Render buffered output with spacing and delimiters, then free up buffers */ -static void render(int screen_width) +static void render(void) { struct buf_token *token; int printed, line_started = 0; @@ -1209,7 +1227,7 @@ static void render(int screen_width) /* Ensure end alignment of last token, it wasn't necessarily flushed */ buffer.tail->end += buffer.cur->len % 2; - render_calc_width(screen_width); + render_calc_width(); /* Rewind and replay */ buffer.tail = buffer.head; @@ -1245,6 +1263,7 @@ static void render(int screen_width) } buf_free_all(); + current_field = columns; } static void sock_state_print(struct sockstat *s) @@ -4264,23 +4283,33 @@ static int generic_show_sock(const struct sockaddr_nl *addr, { struct sock_diag_msg *r = NLMSG_DATA(nlh); struct inet_diag_arg inet_arg = { .f = arg, .protocol = IPPROTO_MAX }; + int ret; switch (r->sdiag_family) { case AF_INET: case AF_INET6: inet_arg.rth = inet_arg.f->rth_for_killing; - return show_one_inet_sock(addr, nlh, &inet_arg); + ret = show_one_inet_sock(addr, nlh, &inet_arg); + break; case AF_UNIX: - return unix_show_sock(addr, nlh, arg); + ret = unix_show_sock(addr, nlh, arg); + break; case AF_PACKET: - return packet_show_sock(addr, nlh, arg); + ret = packet_show_sock(addr, nlh, arg); + break; case AF_NETLINK: - return netlink_show_sock(addr, nlh, arg); + ret = netlink_show_sock(addr, nlh, arg); + break; case AF_VSOCK: - return vsock_show_sock(addr, nlh, arg); + ret = vsock_show_sock(addr, nlh, arg); + break; default: - return -1; + ret = -1; } + + render(); + + return ret; } static int handle_follow_request(struct filter *f) @@ -4647,7 +4676,6 @@ int main(int argc, char *argv[]) FILE *filter_fp = NULL; int ch; int state_filter = 0; - int screen_width = 80; while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbEf:miA:D:F:vVzZN:KHS", @@ -4949,15 +4977,6 @@ int main(int argc, char *argv[]) if (!(current_filter.states & (current_filter.states - 1))) columns[COL_STATE].disabled = 1; - if (isatty(STDOUT_FILENO)) { - struct winsize w; - - if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) { - if (w.ws_col > 0) - screen_width = w.ws_col; - } - } - if (show_header) print_header(); @@ -4988,7 +5007,7 @@ int main(int argc, char *argv[]) if (show_users || show_proc_ctx || show_sock_ctx) user_ent_destroy(); - render(screen_width); + render(); return 0; } From da7a1aa7da650b75d7908ce12661c3469f7cf7de Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 23 Mar 2018 13:19:13 +0100 Subject: [PATCH 09/12] devlink: fix port new monitoring message typo s/net/new/ Fixes: a3c4b484a1ed ("add devlink tool") Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: Stephen Hemminger --- devlink/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devlink/devlink.c b/devlink/devlink.c index 69c3c5d9..ba02064b 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -2602,7 +2602,7 @@ static const char *cmd_name(uint8_t cmd) case DEVLINK_CMD_DEL: return "del"; case DEVLINK_CMD_PORT_GET: return "get"; case DEVLINK_CMD_PORT_SET: return "set"; - case DEVLINK_CMD_PORT_NEW: return "net"; + case DEVLINK_CMD_PORT_NEW: return "new"; case DEVLINK_CMD_PORT_DEL: return "del"; default: return ""; } From 8ee38d833ccb1863f06634e12c5236b0ef7c2d76 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 23 Mar 2018 21:18:56 +0100 Subject: [PATCH 10/12] man: tc-vlan.8: Fix for incorrect example This has to be a second match statement to the same u32 filter, not a second one (which tc-filter doesn't support at all). Signed-off-by: Phil Sutter Signed-off-by: Stephen Hemminger --- man/man8/tc-vlan.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/tc-vlan.8 b/man/man8/tc-vlan.8 index 59c81e86..f5ffc25f 100644 --- a/man/man8/tc-vlan.8 +++ b/man/man8/tc-vlan.8 @@ -103,7 +103,7 @@ into VLAN ID 123: #tc qdisc add dev eth0 handle ffff: ingress #tc filter add dev eth0 parent ffff: pref 11 protocol ip \\ u32 match ip protocol 1 0xff flowid 1:1 \\ - u32 match ip src 10.0.0.2 flowid 1:1 \\ + match ip src 10.0.0.2 flowid 1:1 \\ action vlan push id 123 .EE .RE From b2038cc0b2403e8c5126cfcf45f6ee48ac549ad0 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 24 Mar 2018 18:45:14 +0100 Subject: [PATCH 11/12] ssfilter: Eliminate shift/reduce conflicts The problematic bit was the 'expr: expr expr' rule. Fix this by making 'expr' token represent a single filter only and introduce a new token 'exprlist' to represent a combination of filters. Signed-off-by: Phil Sutter --- misc/ssfilter.y | 52 +++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/misc/ssfilter.y b/misc/ssfilter.y index 4db3c95f..88d4229a 100644 --- a/misc/ssfilter.y +++ b/misc/ssfilter.y @@ -42,7 +42,7 @@ static void yyerror(char *s) %nonassoc '!' %% -applet: null expr +applet: null exprlist { *yy_ret = $2; $$ = $2; @@ -51,6 +51,32 @@ applet: null expr ; null: /* NOTHING */ { $$ = NULL; } ; +exprlist: expr + | '!' expr + { + $$ = alloc_node(SSF_NOT, $2); + } + | '(' exprlist ')' + { + $$ = $2; + } + | exprlist '|' expr + { + $$ = alloc_node(SSF_OR, $1); + $$->post = $3; + } + | exprlist '&' expr + { + $$ = alloc_node(SSF_AND, $1); + $$->post = $3; + } + | exprlist expr + { + $$ = alloc_node(SSF_AND, $1); + $$->post = $2; + } + ; + expr: DCOND HOSTCOND { $$ = alloc_node(SSF_DCOND, $2); @@ -128,30 +154,6 @@ expr: DCOND HOSTCOND { $$ = alloc_node(SSF_S_AUTO, NULL); } - | expr '|' expr - { - $$ = alloc_node(SSF_OR, $1); - $$->post = $3; - } - | expr expr - { - $$ = alloc_node(SSF_AND, $1); - $$->post = $2; - } - | expr '&' expr - - { - $$ = alloc_node(SSF_AND, $1); - $$->post = $3; - } - | '!' expr - { - $$ = alloc_node(SSF_NOT, $2); - } - | '(' expr ')' - { - $$ = $2; - } ; %% From ba2fc55b99f8363c80ce36681bc1ec97690b66f5 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 27 Mar 2018 18:48:55 +0100 Subject: [PATCH 12/12] Drop capabilities if not running ip exec vrf with libcap ip vrf exec requires root or CAP_NET_ADMIN, CAP_SYS_ADMIN and CAP_DAC_OVERRIDE. It is not possible to run unprivileged commands like ping as non-root or non-cap-enabled due to this requirement. To allow users and administrators to safely add the required capabilities to the binary, drop all capabilities on start if not invoked with "vrf exec". Update the manpage with the requirements. Signed-off-by: Luca Boccassi Signed-off-by: Stephen Hemminger --- configure | 17 +++++++++++++++++ include/utils.h | 2 ++ ip/ip.c | 12 ++++++++++++ ip/ipvrf.c | 2 ++ lib/utils.c | 22 ++++++++++++++++++++++ man/man8/ip-vrf.8 | 8 ++++++++ 6 files changed, 63 insertions(+) diff --git a/configure b/configure index f7c2d7a7..5ef5cd4c 100755 --- a/configure +++ b/configure @@ -336,6 +336,20 @@ EOF rm -f $TMPDIR/strtest.c $TMPDIR/strtest } +check_cap() +{ + if ${PKG_CONFIG} libcap --exists + then + echo "HAVE_CAP:=y" >>$CONFIG + echo "yes" + + echo 'CFLAGS += -DHAVE_LIBCAP' `${PKG_CONFIG} libcap --cflags` >>$CONFIG + echo 'LDLIBS +=' `${PKG_CONFIG} libcap --libs` >> $CONFIG + else + echo "no" + fi +} + quiet_config() { cat <> $CONFIG echo "%.o: %.c" >> $CONFIG echo ' $(QUIET_CC)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $<' >> $CONFIG diff --git a/include/utils.h b/include/utils.h index 0394268e..e7bffe8a 100644 --- a/include/utils.h +++ b/include/utils.h @@ -299,4 +299,6 @@ size_t strlcpy(char *dst, const char *src, size_t size); size_t strlcat(char *dst, const char *src, size_t size); #endif +void drop_cap(void); + #endif /* __UTILS_H__ */ diff --git a/ip/ip.c b/ip/ip.c index e0cd96cb..e716fed8 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -174,6 +174,18 @@ int main(int argc, char **argv) char *batch_file = NULL; int color = 0; + /* to run vrf exec without root, capabilities might be set, drop them + * if not needed as the first thing. + * execv will drop them for the child command. + * vrf exec requires: + * - cap_dac_override to create the cgroup subdir in /sys + * - cap_sys_admin to load the BPF program + * - cap_net_admin to set the socket into the cgroup + */ + if (argc < 3 || strcmp(argv[1], "vrf") != 0 || + strcmp(argv[2], "exec") != 0) + drop_cap(); + basename = strrchr(argv[0], '/'); if (basename == NULL) basename = argv[0]; diff --git a/ip/ipvrf.c b/ip/ipvrf.c index f9277e1e..8a6b7f97 100644 --- a/ip/ipvrf.c +++ b/ip/ipvrf.c @@ -436,6 +436,8 @@ out2: out: free(mnt); + drop_cap(); + return rc; } diff --git a/lib/utils.c b/lib/utils.c index 4fe4ac1e..dadefb55 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -30,6 +30,9 @@ #include #include #include +#ifdef HAVE_LIBCAP +#include +#endif #include "rt_names.h" #include "utils.h" @@ -1495,3 +1498,22 @@ size_t strlcat(char *dst, const char *src, size_t size) return dlen + strlcpy(dst + dlen, src, size - dlen); } #endif + +void drop_cap(void) +{ +#ifdef HAVE_LIBCAP + /* don't harmstring root/sudo */ + if (getuid() != 0 && geteuid() != 0) { + cap_t capabilities; + + capabilities = cap_get_proc(); + if (!capabilities) + exit(EXIT_FAILURE); + if (cap_clear(capabilities) != 0) + exit(EXIT_FAILURE); + if (cap_set_proc(capabilities) != 0) + exit(EXIT_FAILURE); + cap_free(capabilities); + } +#endif +} diff --git a/man/man8/ip-vrf.8 b/man/man8/ip-vrf.8 index 18789339..1a42cebe 100644 --- a/man/man8/ip-vrf.8 +++ b/man/man8/ip-vrf.8 @@ -63,6 +63,14 @@ a VRF other than the default VRF (main table). A command can be run against the default VRF by passing the "default" as the VRF name. This is useful if the current shell is associated with another VRF (e.g, Management VRF). +This command requires the system to be booted with cgroup v2 (e.g. with systemd, +add systemd.unified_cgroup_hierarchy=1 to the kernel command line). + +This command also requires to be ran as root or with the CAP_SYS_ADMIN, +CAP_NET_ADMIN and CAP_DAC_OVERRIDE capabilities. If built with libcap and if +capabilities are added to the ip binary program via setcap, the program will +drop them as the first thing when invoked, unless the command is vrf exec. + .TP .B ip vrf identify [PID] - Report VRF association for process .sp