From 1ca2e08bd0616df57381408b8d4d801dc3425823 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 13 Feb 2017 12:21:53 -0800 Subject: [PATCH 1/8] ip route: Make name of protocol 0 consistent iproute2 can inconsistently show the name of protocol 0 if a route with a custom protocol is added. For example: dsa@cartman:~$ ip -6 ro ls table all | egrep 'proto none|proto unspec' local ::1 dev lo table local proto none metric 0 pref medium local fe80::225:90ff:fecb:1c18 dev lo table local proto none metric 0 pref medium local fe80::92e2:baff:fe5c:da5d dev lo table local proto none metric 0 pref medium protocol 0 is pretty printed as "none". Add a route with a custom protocol: dsa@cartman:~$ sudo ip -6 ro add 2001:db8:200::1/128 dev eth0 proto 123 And now display has switched from "none" to "unspec": dsa@cartman:~$ ip -6 ro ls table all | egrep 'proto none|proto unspec' local ::1 dev lo table local proto unspec metric 0 pref medium local fe80::225:90ff:fecb:1c18 dev lo table local proto unspec metric 0 pref medium local fe80::92e2:baff:fe5c:da5d dev lo table local proto unspec metric 0 pref medium The rt_protos file has the id to name mapping as "unspec" while rtnl_rtprot_tab[0] has "none". The presence of a custom protocol id triggers reading the rt_protos file and overwriting the string in rtnl_rtprot_tab. All of this is logic from 2004 and earlier. Update rtnl_rtprot_tab to "unspec" to match the enum value. Signed-off-by: David Ahern --- lib/rt_names.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rt_names.c b/lib/rt_names.c index 3a16d5cf..04c15ff5 100644 --- a/lib/rt_names.c +++ b/lib/rt_names.c @@ -119,7 +119,7 @@ static void rtnl_tab_initialize(const char *file, char **tab, int size) } static char *rtnl_rtprot_tab[256] = { - [RTPROT_UNSPEC] = "none", + [RTPROT_UNSPEC] = "unspec", [RTPROT_REDIRECT] = "redirect", [RTPROT_KERNEL] = "kernel", [RTPROT_BOOT] = "boot", From afdc1fed243f5499a53f5fda202031cf4c4d4044 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 9 Feb 2017 15:10:14 +0200 Subject: [PATCH 2/8] tc: matchall: Print skip flags when dumping a filter Print the skip flags when we dump a filter. Signed-off-by: Or Gerlitz Acked by: Yotam Gigi Reviewed-by: Simon Horman --- tc/f_matchall.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tc/f_matchall.c b/tc/f_matchall.c index 04e524e3..ac486308 100644 --- a/tc/f_matchall.c +++ b/tc/f_matchall.c @@ -130,6 +130,15 @@ static int matchall_print_opt(struct filter_util *qu, FILE *f, sprint_tc_classid(rta_getattr_u32(tb[TCA_MATCHALL_CLASSID]), b1)); } + if (tb[TCA_MATCHALL_FLAGS]) { + __u32 flags = rta_getattr_u32(tb[TCA_MATCHALL_FLAGS]); + + if (flags & TCA_CLS_FLAGS_SKIP_HW) + fprintf(f, "\n skip_hw"); + if (flags & TCA_CLS_FLAGS_SKIP_SW) + fprintf(f, "\n skip_sw"); + } + if (tb[TCA_MATCHALL_ACT]) tc_print_action(f, tb[TCA_MATCHALL_ACT]); From 3064a44c6979805a4bef836f00e854d437cc89cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Wed, 15 Feb 2017 21:26:41 +0000 Subject: [PATCH 3/8] testsuite: refactor kernel config search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Asbjørn Sloth Tønnesen --- testsuite/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/testsuite/Makefile b/testsuite/Makefile index 50a7bafa..fc693368 100644 --- a/testsuite/Makefile +++ b/testsuite/Makefile @@ -17,8 +17,9 @@ ifneq (,$(wildcard /proc/config.gz)) KENV := $(shell cat /proc/config.gz | gunzip | grep ^CONFIG) else KVER := $(shell uname -r) -KCPATH := /lib/modules/${KVER}/config -ifneq (,$(wildcard ${KCPATH})) +KCPATHS := /lib/modules/$(KVER)/config +KCPATH := $(firstword $(wildcard $(KCPATHS))) +ifneq (,$(KCPATH)) KENV := $(shell cat ${KCPATH} | grep ^CONFIG) endif endif From d754a64aed7f29cbe917f2013cdaf2dda0407cb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Wed, 15 Feb 2017 21:26:42 +0000 Subject: [PATCH 4/8] testsuite: search for kernel config in /boot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for finding the kernel config in Debian and derivatives. Signed-off-by: Asbjørn Sloth Tønnesen --- testsuite/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/Makefile b/testsuite/Makefile index fc693368..055136b5 100644 --- a/testsuite/Makefile +++ b/testsuite/Makefile @@ -17,7 +17,7 @@ ifneq (,$(wildcard /proc/config.gz)) KENV := $(shell cat /proc/config.gz | gunzip | grep ^CONFIG) else KVER := $(shell uname -r) -KCPATHS := /lib/modules/$(KVER)/config +KCPATHS := /lib/modules/$(KVER)/config /boot/config-$(KVER) KCPATH := $(firstword $(wildcard $(KCPATHS))) ifneq (,$(KCPATH)) KENV := $(shell cat ${KCPATH} | grep ^CONFIG) From 46afa6947b4ab89c306703386bbf3379ad464a1f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 16 Feb 2017 08:58:55 -0800 Subject: [PATCH 5/8] ip vrf: Handle vrf in a cgroup hierarchy Add support for VRF in a pre-existing hierarchy. For example, if the current process is running in CGRP/foo/bar, the 'ip vrf exec NAME CMD' should run CMD in the cgroup CGRP/foo/bar/vrf/NAME. When listing process ids in a VRF, search for the directory vrf/NAME regardless of base path (foo/bar/vrf/NAME and vrf/NAME) are still running against the same vrf NAME. Reported-by: Andy Lutomirski Signed-off-by: David Ahern --- ip/ipvrf.c | 173 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 145 insertions(+), 28 deletions(-) diff --git a/ip/ipvrf.c b/ip/ipvrf.c index 8bd99d62..8d61d071 100644 --- a/ip/ipvrf.c +++ b/ip/ipvrf.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,10 @@ static void usage(void) exit(-1); } +/* + * parse process based cgroup file looking for PATH/vrf/NAME where + * NAME is the name of the vrf the process is associated with + */ static int vrf_identify(pid_t pid, char *name, size_t len) { char path[PATH_MAX]; @@ -55,9 +60,13 @@ static int vrf_identify(pid_t pid, char *name, size_t len) memset(name, 0, len); while (fgets(buf, sizeof(buf), fp)) { - vrf = strstr(buf, "::/vrf/"); + /* want the controller-less cgroup */ + if (strstr(buf, "::/") == NULL) + continue; + + vrf = strstr(buf, "/vrf/"); if (vrf) { - vrf += 7; /* skip past "::/vrf/" */ + vrf += 5; /* skip past "/vrf/" */ end = strchr(vrf, '\n'); if (end) *end = '\0'; @@ -97,13 +106,82 @@ static int ipvrf_identify(int argc, char **argv) return rc; } -static int ipvrf_pids(int argc, char **argv) +/* read PATH/vrf/NAME/cgroup.procs file */ +static void read_cgroup_pids(const char *base_path, char *name) { char path[PATH_MAX]; char buf[4096]; - char *mnt, *vrf; - int fd, rc = -1; ssize_t n; + int fd; + + if (snprintf(path, sizeof(path), "%s/vrf/%s%s", + base_path, name, CGRP_PROC_FILE) >= sizeof(path)) + return; + + fd = open(path, O_RDONLY); + if (fd < 0) + return; /* no cgroup file, nothing to show */ + + /* dump contents (pids) of cgroup.procs */ + while (1) { + n = read(fd, buf, sizeof(buf) - 1); + if (n <= 0) + break; + + printf("%s", buf); + } + + close(fd); +} + +/* recurse path looking for PATH/vrf/NAME */ +static int recurse_dir(char *base_path, char *name) +{ + char path[PATH_MAX]; + struct dirent *de; + struct stat fstat; + int rc; + DIR *d; + + d = opendir(base_path); + if (!d) + return -1; + + while ((de = readdir(d)) != NULL) { + if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, "..")) + continue; + + if (!strcmp(de->d_name, "vrf")) { + read_cgroup_pids(base_path, name); + continue; + } + + /* is this a subdir that needs to be walked */ + if (snprintf(path, sizeof(path), "%s/%s", + base_path, de->d_name) >= sizeof(path)) + continue; + + if (lstat(path, &fstat) < 0) + continue; + + if (S_ISDIR(fstat.st_mode)) { + rc = recurse_dir(path, name); + if (rc != 0) + goto out; + } + } + + rc = 0; +out: + closedir(d); + + return rc; +} + +static int ipvrf_pids(int argc, char **argv) +{ + char *mnt, *vrf; + int ret; if (argc != 1) { fprintf(stderr, "Invalid arguments\n"); @@ -116,29 +194,11 @@ static int ipvrf_pids(int argc, char **argv) if (!mnt) return -1; - snprintf(path, sizeof(path), "%s/vrf/%s%s", mnt, vrf, CGRP_PROC_FILE); + ret = recurse_dir(mnt, vrf); + free(mnt); - fd = open(path, O_RDONLY); - if (fd < 0) - return 0; /* no cgroup file, nothing to show */ - while (1) { - n = read(fd, buf, sizeof(buf) - 1); - if (n < 0) { - fprintf(stderr, - "Failed to read cgroups file: %s\n", - strerror(errno)); - break; - } else if (n == 0) { - rc = 0; - break; - } - printf("%s", buf); - } - - close(fd); - - return rc; + return ret; } /* load BPF program to set sk_bound_dev_if for sockets */ @@ -203,9 +263,60 @@ out: return rc; } +/* get base path for controller-less cgroup for a process. + * path returned does not include /vrf/NAME if it exists + */ +static int vrf_path(char *vpath, size_t len) +{ + char path[PATH_MAX]; + char buf[4096]; + char *vrf; + FILE *fp; + + snprintf(path, sizeof(path), "/proc/%d/cgroup", getpid()); + fp = fopen(path, "r"); + if (!fp) + return -1; + + vpath[0] = '\0'; + + while (fgets(buf, sizeof(buf), fp)) { + char *start, *nl; + + start = strstr(buf, "::/"); + if (!start) + continue; + + /* advance past '::' */ + start += 2; + + nl = strchr(start, '\n'); + if (nl) + *nl = '\0'; + + vrf = strstr(start, "/vrf"); + if (vrf) + *vrf = '\0'; + + strncpy(vpath, start, len - 1); + vpath[len - 1] = '\0'; + + /* if vrf path is just / then return nothing */ + if (!strcmp(vpath, "/")) + vpath[0] = '\0'; + + break; + } + + fclose(fp); + + return 0; +} + static int vrf_switch(const char *name) { char path[PATH_MAX], *mnt, pid[16]; + char vpath[PATH_MAX]; int ifindex = 0; int rc = -1, len, fd = -1; @@ -221,11 +332,17 @@ static int vrf_switch(const char *name) if (!mnt) return -1; + if (vrf_path(vpath, sizeof(vpath)) < 0) { + fprintf(stderr, "Failed to get base cgroup path: %s\n", + strerror(errno)); + return -1; + } + /* path to cgroup; make sure buffer has room to cat "/cgroup.procs" * to the end of the path */ - len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE), "%s/vrf/%s", - mnt, ifindex ? name : ""); + len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE), + "%s%s/vrf/%s", mnt, vpath, ifindex ? name : ""); if (len > sizeof(path) - sizeof(CGRP_PROC_FILE)) { fprintf(stderr, "Invalid path to cgroup2 mount\n"); goto out; From 9c49438a6716c7ac5fc2d7ff757bbe6cd9805ba8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 16 Feb 2017 08:58:56 -0800 Subject: [PATCH 6/8] ip netns: refactor netns_identify Move guts of netns_identify into a standalone function that returns the netns name in a given buffer. Signed-off-by: David Ahern --- ip/ip_common.h | 1 + ip/ipnetns.c | 47 +++++++++++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/ip/ip_common.h b/ip/ip_common.h index ab6a8343..e8642a18 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -59,6 +59,7 @@ int do_ipnetconf(int argc, char **argv); int do_iptoken(int argc, char **argv); int do_ipvrf(int argc, char **argv); void vrf_reset(void); +int netns_identify_pid(const char *pidstr, char *name, int len); int iplink_get(unsigned int flags, char *name, __u32 filt_mask); diff --git a/ip/ipnetns.c b/ip/ipnetns.c index 8201b94a..0b0378ab 100644 --- a/ip/ipnetns.c +++ b/ip/ipnetns.c @@ -468,28 +468,15 @@ static int netns_pids(int argc, char **argv) } -static int netns_identify(int argc, char **argv) +int netns_identify_pid(const char *pidstr, char *name, int len) { - const char *pidstr; char net_path[PATH_MAX]; int netns; struct stat netst; DIR *dir; struct dirent *entry; - if (argc < 1) { - pidstr = "self"; - } else if (argc > 1) { - fprintf(stderr, "extra arguments specified\n"); - return -1; - } else { - pidstr = argv[0]; - if (!is_pid(pidstr)) { - fprintf(stderr, "Specified string '%s' is not a pid\n", - pidstr); - return -1; - } - } + name[0] = '\0'; snprintf(net_path, sizeof(net_path), "/proc/%s/ns/net", pidstr); netns = open(net_path, O_RDONLY); @@ -531,7 +518,8 @@ static int netns_identify(int argc, char **argv) if ((st.st_dev == netst.st_dev) && (st.st_ino == netst.st_ino)) { - printf("%s\n", entry->d_name); + strncpy(name, entry->d_name, len - 1); + name[len - 1] = '\0'; } } closedir(dir); @@ -539,6 +527,33 @@ static int netns_identify(int argc, char **argv) } +static int netns_identify(int argc, char **argv) +{ + const char *pidstr; + char name[256]; + int rc; + + if (argc < 1) { + pidstr = "self"; + } else if (argc > 1) { + fprintf(stderr, "extra arguments specified\n"); + return -1; + } else { + pidstr = argv[0]; + if (!is_pid(pidstr)) { + fprintf(stderr, "Specified string '%s' is not a pid\n", + pidstr); + return -1; + } + } + + rc = netns_identify_pid(pidstr, name, sizeof(name)); + if (!rc) + printf("%s\n", name); + + return rc; +} + static int on_netns_del(char *nsname, void *arg) { char netns_path[PATH_MAX]; From 6a9783831c081c1b759ff3bcd704c37bcc3775de Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 16 Feb 2017 08:58:57 -0800 Subject: [PATCH 7/8] ip vrf: Handle VRF nesting in namespace Since cgroups are not namespace aware, the directory heirarchy used by ip vrf should account for network namespaces. In this case, change the path from CGRP/BASE/vrf/NAME to CGRP/BASE/NETNS/vrf/NAME where CGRP is the cgroup2 mount path, BASE in any base heirarchy inherited before VRF is applied and NAME is the VRF name. The intent is as follows: a user logs into the box into some namespace with a name known to iproute2. Some other policy may have put the process into a BASE heirarchy. From there the user executes a task in a VRF and in doing so the task heirarchy becomes CGRP/BASE/NETNS/vrf/NAME. The namespace level is omitted for the default namespace. Reported-by: Andy Lutomirski Signed-off-by: David Ahern --- ip/ipvrf.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/ip/ipvrf.c b/ip/ipvrf.c index 8d61d071..cb7f9fa6 100644 --- a/ip/ipvrf.c +++ b/ip/ipvrf.c @@ -134,8 +134,8 @@ static void read_cgroup_pids(const char *base_path, char *name) close(fd); } -/* recurse path looking for PATH/vrf/NAME */ -static int recurse_dir(char *base_path, char *name) +/* recurse path looking for PATH[/NETNS]/vrf/NAME */ +static int recurse_dir(char *base_path, char *name, const char *netns) { char path[PATH_MAX]; struct dirent *de; @@ -152,7 +152,15 @@ static int recurse_dir(char *base_path, char *name) continue; if (!strcmp(de->d_name, "vrf")) { - read_cgroup_pids(base_path, name); + const char *pdir = strrchr(base_path, '/'); + + /* found a 'vrf' directory. if it is for the given + * namespace then dump the cgroup pids + */ + if (*netns == '\0' || + (pdir && !strcmp(pdir+1, netns))) + read_cgroup_pids(base_path, name); + continue; } @@ -165,7 +173,7 @@ static int recurse_dir(char *base_path, char *name) continue; if (S_ISDIR(fstat.st_mode)) { - rc = recurse_dir(path, name); + rc = recurse_dir(path, name, netns); if (rc != 0) goto out; } @@ -178,10 +186,25 @@ out: return rc; } +static int ipvrf_get_netns(char *netns, int len) +{ + if (netns_identify_pid("self", netns, len-3)) { + fprintf(stderr, "Failed to get name of network namespace: %s\n", + strerror(errno)); + return -1; + } + + if (*netns != '\0') + strcat(netns, "-ns"); + + return 0; +} + static int ipvrf_pids(int argc, char **argv) { char *mnt, *vrf; - int ret; + char netns[256]; + int ret = -1; if (argc != 1) { fprintf(stderr, "Invalid arguments\n"); @@ -194,8 +217,12 @@ static int ipvrf_pids(int argc, char **argv) if (!mnt) return -1; - ret = recurse_dir(mnt, vrf); + if (ipvrf_get_netns(netns, sizeof(netns)) < 0) + goto out; + ret = recurse_dir(mnt, vrf, netns); + +out: free(mnt); return ret; @@ -316,7 +343,7 @@ static int vrf_path(char *vpath, size_t len) static int vrf_switch(const char *name) { char path[PATH_MAX], *mnt, pid[16]; - char vpath[PATH_MAX]; + char vpath[PATH_MAX], netns[256]; int ifindex = 0; int rc = -1, len, fd = -1; @@ -332,17 +359,37 @@ static int vrf_switch(const char *name) if (!mnt) return -1; + /* -1 on length to add '/' to the end */ + if (ipvrf_get_netns(netns, sizeof(netns) - 1) < 0) + return -1; + if (vrf_path(vpath, sizeof(vpath)) < 0) { fprintf(stderr, "Failed to get base cgroup path: %s\n", strerror(errno)); return -1; } + /* if path already ends in netns then don't add it again */ + if (*netns != '\0') { + char *pdir = strrchr(vpath, '/'); + + if (!pdir) + pdir = vpath; + else + pdir++; + + if (strcmp(pdir, netns) == 0) + *pdir = '\0'; + + strcat(netns, "/"); + } + /* path to cgroup; make sure buffer has room to cat "/cgroup.procs" * to the end of the path */ len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE), - "%s%s/vrf/%s", mnt, vpath, ifindex ? name : ""); + "%s%s/%svrf/%s", + mnt, vpath, netns, ifindex ? name : ""); if (len > sizeof(path) - sizeof(CGRP_PROC_FILE)) { fprintf(stderr, "Invalid path to cgroup2 mount\n"); goto out; From b5377431df6de037b668eba418dca28f1113d303 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 16 Feb 2017 08:58:58 -0800 Subject: [PATCH 8/8] ip vrf: Detect invalid vrf name in pids command Verify VRF name is valid before attempting to read cgroups files. Signed-off-by: David Ahern --- ip/ipvrf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ip/ipvrf.c b/ip/ipvrf.c index cb7f9fa6..5e204a9e 100644 --- a/ip/ipvrf.c +++ b/ip/ipvrf.c @@ -212,6 +212,10 @@ static int ipvrf_pids(int argc, char **argv) } vrf = argv[0]; + if (!name_is_vrf(vrf)) { + fprintf(stderr, "Invalid VRF name\n"); + return -1; + } mnt = find_cgroup2_mount(); if (!mnt)