From 79928fd0552b520aa36a22e71144d10a32f7e4fe Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Thu, 20 Jul 2017 00:36:32 +0200
Subject: [PATCH 1/5] netns: avoid directory traversal

ip netns keeps track of created namespaces with bind mounts named
/var/run/netns/<namespace>. No input sanitization is done, allowing creation and
deletion of files relatives to /var/run/netns or, if the path is non existent or
invalid, allows to create "untracked" namespaces (invisible to the tool).

This commit denies creation or deletion of namespaces with names contaning
"/" or matching exactly "." or "..".

Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
 ip/ipnetns.c | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index 0b0378ab..42549944 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -766,6 +766,11 @@ static int netns_monitor(int argc, char **argv)
 	return 0;
 }
 
+static int invalid_name(const char *name)
+{
+	return strchr(name, '/') || !strcmp(name, ".") || !strcmp(name, "..");
+}
+
 int do_netns(int argc, char **argv)
 {
 	netns_nsid_socket_init();
@@ -775,6 +780,11 @@ int do_netns(int argc, char **argv)
 		return netns_list(0, NULL);
 	}
 
+	if (argc > 1 && invalid_name(argv[1])) {
+		fprintf(stderr, "Invalid netns name \"%s\"\n", argv[1]);
+		exit(-1);
+	}
+
 	if ((matches(*argv, "list") == 0) || (matches(*argv, "show") == 0) ||
 	    (matches(*argv, "lst") == 0)) {
 		netns_map_init();

From adbb2965945d3501063a06df6a3ecad6fbf313c4 Mon Sep 17 00:00:00 2001
From: Philip Prindeville <philipp@redfish-solutions.com>
Date: Thu, 20 Jul 2017 13:06:10 -0600
Subject: [PATCH 2/5] iproute2: add support for GRE ignore-df knob

In the presence of firewalls which improperly block ICMP Unreachable
(including Fragmentation Required) messages, Path MTU Discovery is
prevented from working.

The workaround is to handle IPv4 payloads opaquely, ignoring the DF
bit.

Kernel commit 22a59be8b7693eb2d0897a9638f5991f2f8e4ddd ("net: ipv4:
Add ability to have GRE ignore DF bit in IPv4 payloads") is
complemented by this user-space changeset which exposes control of
this setting.

Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Philip Prindeville <philipp@redfish-solutions.com>
---
 doc/ip-cref.tex      |  7 +++++++
 ip/link_gre.c        | 19 +++++++++++++++++++
 man/man8/ip-tunnel.8 |  9 +++++++++
 3 files changed, 35 insertions(+)

diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex
index 242cc266..179baa2f 100644
--- a/doc/ip-cref.tex
+++ b/doc/ip-cref.tex
@@ -2524,6 +2524,13 @@ It must be an address on another interface of this host.
 	It is enabled by default. Note that a fixed ttl is incompatible
 	with this option: tunnelling with a fixed ttl always makes pmtu discovery.
 
+\item \verb|ignore-df|
+
+--- (only GRE tunnels) enable IPv4 DF flag suppression on this tunnel.
+	If is disabled by default. Enabling this option will cause IPv4
+	payloads to be handled like any other GRE payload,
+	regardless of the DF flag.
+
 \item \verb|key K|, \verb|ikey K|, \verb|okey K|
 
 --- (only GRE tunnels) use keyed GRE with key \verb|K|. \verb|K| is
diff --git a/ip/link_gre.c b/ip/link_gre.c
index 82df9006..c2ec5f26 100644
--- a/ip/link_gre.c
+++ b/ip/link_gre.c
@@ -34,6 +34,7 @@ static void print_usage(FILE *f)
 		"                            [ ttl TTL ]\n"
 		"                            [ tos TOS ]\n"
 		"                            [ [no]pmtudisc ]\n"
+		"                            [ [no]ignore-df ]\n"
 		"                            [ dev PHYS_DEV ]\n"
 		"                            [ noencap ]\n"
 		"                            [ encap { fou | gue | none } ]\n"
@@ -93,6 +94,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
 	__u16 encapsport = 0;
 	__u16 encapdport = 0;
 	__u8 metadata = 0;
+	__u8 ignore_df = 0;
 	__u32 fwmark = 0;
 
 	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
@@ -164,6 +166,10 @@ get_failed:
 		if (greinfo[IFLA_GRE_COLLECT_METADATA])
 			metadata = 1;
 
+		if (greinfo[IFLA_GRE_IGNORE_DF])
+			ignore_df =
+				!!rta_getattr_u8(greinfo[IFLA_GRE_IGNORE_DF]);
+
 		if (greinfo[IFLA_GRE_FWMARK])
 			fwmark = rta_getattr_u32(greinfo[IFLA_GRE_FWMARK]);
 	}
@@ -311,6 +317,13 @@ get_failed:
 			encapflags |= ~TUNNEL_ENCAP_FLAG_REMCSUM;
 		} else if (strcmp(*argv, "external") == 0) {
 			metadata = 1;
+		} else if (strcmp(*argv, "ignore-df") == 0) {
+			ignore_df = 1;
+		} else if (strcmp(*argv, "noignore-df") == 0) {
+			/*
+			 *only the lsb is significant, use 2 for presence
+			 */
+			ignore_df = 2;
 		} else if (strcmp(*argv, "fwmark") == 0) {
 			NEXT_ARG();
 			if (get_u32(&fwmark, *argv, 0))
@@ -355,6 +368,9 @@ get_failed:
 	addattr16(n, 1024, IFLA_GRE_ENCAP_SPORT, htons(encapsport));
 	addattr16(n, 1024, IFLA_GRE_ENCAP_DPORT, htons(encapdport));
 
+	if (ignore_df)
+		addattr8(n, 1024, IFLA_GRE_IGNORE_DF, ignore_df & 1);
+
 	return 0;
 }
 
@@ -454,6 +470,9 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
 	else
 		fputs("external ", f);
 
+	if (tb[IFLA_GRE_IGNORE_DF] && rta_getattr_u8(tb[IFLA_GRE_IGNORE_DF]))
+		fputs("ignore-df ", f);
+
 	if (tb[IFLA_GRE_ENCAP_TYPE] &&
 	    rta_getattr_u16(tb[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) {
 		__u16 type = rta_getattr_u16(tb[IFLA_GRE_ENCAP_TYPE]);
diff --git a/man/man8/ip-tunnel.8 b/man/man8/ip-tunnel.8
index 4938c740..7ddbffb2 100644
--- a/man/man8/ip-tunnel.8
+++ b/man/man8/ip-tunnel.8
@@ -49,6 +49,7 @@ ip-tunnel - tunnel configuration
 .BR 6rd-reset " ]"
 .br
 .RB "[ [" no "]" pmtudisc " ]"
+.RB "[ [" no "]" ignore-df " ]"
 .RB "[ " dev
 .IR PHYS_DEV " ]"
 
@@ -175,6 +176,14 @@ It is enabled by default. Note that a fixed ttl is incompatible
 with this option: tunneling with a fixed ttl always makes pmtu
 discovery.
 
+.TP
+.B ignore-df
+enable IPv4 DF suppression on this tunnel.
+Normally datagrams that exceed the MTU will be fragmented; the presence
+of the DF flag inhibits this, resulting instead in an ICMP Unreachable
+(Fragmentation Required) message.  Enabling this attribute casues the
+DF flag to be ignored.
+
 .TP
 .BI key " K"
 .TP

From 2f406f2d0b4ef6e62fc33173219fb419a42a66b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89lie=20Bouttier?= <elie@bouttier.eu>
Date: Sun, 23 Jul 2017 00:42:02 +0200
Subject: [PATCH 3/5] ip route: replace exits with returns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch replaces exits with returns in ip route
commands.

Allows to continue when invoked with ip -batch.

Signed-off-by: Élie Bouttier <elie@bouttier.eu>
---
 ip/iproute.c | 47 ++++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/ip/iproute.c b/ip/iproute.c
index a735d281..cb695ad4 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -137,7 +137,7 @@ static int flush_update(void)
 {
 	if (rtnl_send_check(&rth, filter.flushb, filter.flushp) < 0) {
 		perror("Failed to send flush request");
-		return -1;
+		return -2;
 	}
 	filter.flushp = 0;
 	return 0;
@@ -319,6 +319,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 	struct rtattr *tb[RTA_MAX+1];
 	int host_len, family;
 	__u32 table;
+	int ret;
 
 	SPRINT_BUF(b1);
 	static int hz;
@@ -348,8 +349,8 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 		struct nlmsghdr *fn;
 
 		if (NLMSG_ALIGN(filter.flushp) + n->nlmsg_len > filter.flushe) {
-			if (flush_update())
-				return -1;
+			if ((ret = flush_update()) < 0)
+				return ret;
 		}
 		fn = (struct nlmsghdr *)(filter.flushb + NLMSG_ALIGN(filter.flushp));
 		memcpy(fn, n, n->nlmsg_len);
@@ -764,7 +765,7 @@ static int parse_one_nh(struct nlmsghdr *n, struct rtmsg *r,
 			NEXT_ARG();
 			if ((rtnh->rtnh_ifindex = ll_name_to_index(*argv)) == 0) {
 				fprintf(stderr, "Cannot find device \"%s\"\n", *argv);
-				exit(1);
+				return -1;
 			}
 		} else if (strcmp(*argv, "weight") == 0) {
 			unsigned int w;
@@ -1396,6 +1397,7 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
 	char *od = NULL;
 	unsigned int mark = 0;
 	rtnl_filter_t filter_fn;
+	int ret;
 
 	if (action == IPROUTE_SAVE) {
 		if (save_route_prep())
@@ -1604,12 +1606,12 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
 		for (;;) {
 			if (rtnl_wilddump_request(&rth, do_ipv6, RTM_GETROUTE) < 0) {
 				perror("Cannot send dump request");
-				exit(1);
+				return -2;
 			}
 			filter.flushed = 0;
 			if (rtnl_dump_filter(&rth, filter_fn, stdout) < 0) {
 				fprintf(stderr, "Flush terminated\n");
-				exit(1);
+				return -2;
 			}
 			if (filter.flushed == 0) {
 				if (show_stats) {
@@ -1622,13 +1624,13 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
 				return 0;
 			}
 			round++;
-			if (flush_update() < 0)
-				exit(1);
+			if ((ret = flush_update()) < 0)
+				return ret;
 
 			if (time(0) - start > 30) {
 				printf("\n*** Flush not completed after %ld seconds, %d entries remain ***\n",
 				       (long)(time(0) - start), filter.flushed);
-				exit(1);
+				return -1;
 			}
 
 			if (show_stats) {
@@ -1641,21 +1643,21 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
 	if (!filter.cloned) {
 		if (rtnl_wilddump_request(&rth, do_ipv6, RTM_GETROUTE) < 0) {
 			perror("Cannot send dump request");
-			exit(1);
+			return -2;
 		}
 	} else {
 		if (rtnl_rtcache_request(&rth, do_ipv6) < 0) {
 			perror("Cannot send dump request");
-			exit(1);
+			return -2;
 		}
 	}
 
 	if (rtnl_dump_filter(&rth, filter_fn, stdout) < 0) {
 		fprintf(stderr, "Dump terminated\n");
-		exit(1);
+		return -2;
 	}
 
-	exit(0);
+	return 0;
 }
 
 
@@ -1761,7 +1763,7 @@ static int iproute_get(int argc, char **argv)
 
 	if (req.r.rtm_dst_len == 0) {
 		fprintf(stderr, "need at least a destination address\n");
-		exit(1);
+		return -1;
 	}
 
 	if (idev || odev)  {
@@ -1918,12 +1920,12 @@ static int iproute_restore(void)
 	int pos, prio;
 
 	if (route_dump_check_magic())
-		exit(-1);
+		return -1;
 
 	pos = ftell(stdin);
 	if (pos == -1) {
 		perror("Failed to restore: ftell");
-		exit(-1);
+		return -1;
 	}
 
 	for (prio = 0; prio < 3; prio++) {
@@ -1931,15 +1933,15 @@ static int iproute_restore(void)
 
 		err = rtnl_from_file(stdin, &restore_handler, &prio);
 		if (err)
-			exit(err);
+			return -2;
 
 		if (fseek(stdin, pos, SEEK_SET) == -1) {
 			perror("Failed to restore: fseek");
-			exit(-1);
+			return -1;
 		}
 	}
 
-	exit(0);
+	return 0;
 }
 
 static int show_handler(const struct sockaddr_nl *nl,
@@ -1953,9 +1955,12 @@ static int show_handler(const struct sockaddr_nl *nl,
 static int iproute_showdump(void)
 {
 	if (route_dump_check_magic())
-		exit(-1);
+		return -1;
 
-	exit(rtnl_from_file(stdin, &show_handler, NULL));
+	if (rtnl_from_file(stdin, &show_handler, NULL))
+		return -2;
+
+	return 0;
 }
 
 void iproute_reset_filter(int ifindex)

From ecb05c0f997dc94d3e811041bfd9ba18f08de06b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 23 Jul 2017 01:22:18 +0200
Subject: [PATCH 4/5] bpf: improve error reporting around tail calls

Currently, it's still quite hard to figure out if a prog passed the
verifier, but later gets rejected due to different tail call ownership.
Figure out whether that is the case and provide appropriate error
messages to the user.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 lib/bpf.c | 226 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 169 insertions(+), 57 deletions(-)

diff --git a/lib/bpf.c b/lib/bpf.c
index 7eb5cd96..5d9f0a59 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -344,15 +344,24 @@ static void bpf_map_pin_report(const struct bpf_elf_map *pin,
 	fprintf(stderr, "\n");
 }
 
-static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
-				    int length, enum bpf_prog_type type)
+struct bpf_prog_data {
+	unsigned int type;
+	unsigned int jited;
+};
+
+struct bpf_map_ext {
+	struct bpf_prog_data owner;
+};
+
+static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map,
+					  struct bpf_map_ext *ext)
 {
+	unsigned int val, owner_type = 0, owner_jited = 0;
 	char file[PATH_MAX], buff[4096];
-	struct bpf_elf_map tmp = {}, zero = {};
-	unsigned int val, owner_type = 0;
 	FILE *fp;
 
 	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+	memset(map, 0, sizeof(*map));
 
 	fp = fopen(file, "r");
 	if (!fp) {
@@ -362,27 +371,48 @@ static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
 
 	while (fgets(buff, sizeof(buff), fp)) {
 		if (sscanf(buff, "map_type:\t%u", &val) == 1)
-			tmp.type = val;
+			map->type = val;
 		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
-			tmp.size_key = val;
+			map->size_key = val;
 		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
-			tmp.size_value = val;
+			map->size_value = val;
 		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
-			tmp.max_elem = val;
+			map->max_elem = val;
 		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
-			tmp.flags = val;
+			map->flags = val;
 		else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1)
 			owner_type = val;
+		else if (sscanf(buff, "owner_jited:\t%i", &val) == 1)
+			owner_jited = val;
 	}
 
 	fclose(fp);
+	if (ext) {
+		memset(ext, 0, sizeof(*ext));
+		ext->owner.type  = owner_type;
+		ext->owner.jited = owner_jited;
+	}
+
+	return 0;
+}
+
+static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
+				    struct bpf_map_ext *ext, int length,
+				    enum bpf_prog_type type)
+{
+	struct bpf_elf_map tmp, zero = {};
+	int ret;
+
+	ret = bpf_derive_elf_map_from_fdinfo(fd, &tmp, ext);
+	if (ret < 0)
+		return ret;
 
 	/* The decision to reject this is on kernel side eventually, but
 	 * at least give the user a chance to know what's wrong.
 	 */
-	if (owner_type && owner_type != type)
+	if (ext->owner.type && ext->owner.type != type)
 		fprintf(stderr, "Program array map owner types differ: %u (obj) != %u (pin)\n",
-			type, owner_type);
+			type, ext->owner.type);
 
 	if (!memcmp(&tmp, map, length)) {
 		return 0;
@@ -882,6 +912,7 @@ int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
 		.argc		= argc,
 		.argv		= argv,
 	};
+	struct bpf_map_ext ext = {};
 	int ret, prog_fd, map_fd;
 	enum bpf_mode mode;
 	uint32_t map_key;
@@ -908,7 +939,7 @@ int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
 		goto out_prog;
 	}
 
-	ret = bpf_map_selfcheck_pinned(map_fd, &test,
+	ret = bpf_map_selfcheck_pinned(map_fd, &test, &ext,
 				       offsetof(struct bpf_elf_map, max_elem),
 				       type);
 	if (ret < 0) {
@@ -981,7 +1012,12 @@ struct bpf_hash_entry {
 	struct bpf_hash_entry	*next;
 };
 
+struct bpf_config {
+	unsigned int		jit_enabled;
+};
+
 struct bpf_elf_ctx {
+	struct bpf_config	cfg;
 	Elf			*elf_fd;
 	GElf_Ehdr		elf_hdr;
 	Elf_Data		*sym_tab;
@@ -989,6 +1025,7 @@ struct bpf_elf_ctx {
 	int			obj_fd;
 	int			map_fds[ELF_MAX_MAPS];
 	struct bpf_elf_map	maps[ELF_MAX_MAPS];
+	struct bpf_map_ext	maps_ext[ELF_MAX_MAPS];
 	int			sym_num;
 	int			map_num;
 	int			map_len;
@@ -1425,39 +1462,6 @@ static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id)
 	return -ENOENT;
 }
 
-static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map)
-{
-	char file[PATH_MAX], buff[4096];
-	unsigned int val;
-	FILE *fp;
-
-	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
-
-	memset(map, 0, sizeof(*map));
-
-	fp = fopen(file, "r");
-	if (!fp) {
-		fprintf(stderr, "No procfs support?!\n");
-		return -EIO;
-	}
-
-	while (fgets(buff, sizeof(buff), fp)) {
-		if (sscanf(buff, "map_type:\t%u", &val) == 1)
-			map->type = val;
-		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
-			map->size_key = val;
-		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
-			map->size_value = val;
-		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
-			map->max_elem = val;
-		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
-			map->flags = val;
-	}
-
-	fclose(fp);
-	return 0;
-}
-
 static void bpf_report_map_in_map(int outer_fd, int inner_fd, uint32_t idx)
 {
 	struct bpf_elf_map outer_map;
@@ -1465,7 +1469,7 @@ static void bpf_report_map_in_map(int outer_fd, int inner_fd, uint32_t idx)
 
 	fprintf(stderr, "Cannot insert map into map! ");
 
-	ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map);
+	ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map, NULL);
 	if (!ret) {
 		if (idx >= outer_map.max_elem &&
 		    outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
@@ -1484,14 +1488,15 @@ static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map)
 	       map->type == BPF_MAP_TYPE_HASH_OF_MAPS;
 }
 
-static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
-			  struct bpf_elf_ctx *ctx, int *have_map_in_map)
+static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx,
+			  const struct bpf_elf_map *map, struct bpf_map_ext *ext,
+			  int *have_map_in_map)
 {
 	int fd, ret, map_inner_fd = 0;
 
 	fd = bpf_probe_pinned(name, ctx, map->pinning);
 	if (fd > 0) {
-		ret = bpf_map_selfcheck_pinned(fd, map,
+		ret = bpf_map_selfcheck_pinned(fd, map, ext,
 					       offsetof(struct bpf_elf_map,
 							id), ctx->type);
 		if (ret < 0) {
@@ -1581,8 +1586,8 @@ static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
 		if (!map_name)
 			return -EIO;
 
-		fd = bpf_map_attach(map_name, &ctx->maps[i], ctx,
-				    &have_map_in_map);
+		fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
+				    &ctx->maps_ext[i], &have_map_in_map);
 		if (fd < 0)
 			return fd;
 
@@ -1597,8 +1602,8 @@ static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
 		if (!map_name)
 			return -EIO;
 
-		fd = bpf_map_attach(map_name, &ctx->maps[i], ctx,
-				    NULL);
+		fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
+				    &ctx->maps_ext[i], NULL);
 		if (fd < 0)
 			return fd;
 
@@ -1901,9 +1906,15 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
 	return fd;
 }
 
+struct bpf_tail_call_props {
+	unsigned int total;
+	unsigned int jited;
+};
+
 static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
 			       struct bpf_elf_sec_data *data_relo,
-			       struct bpf_elf_sec_data *data_insn)
+			       struct bpf_elf_sec_data *data_insn,
+			       struct bpf_tail_call_props *props)
 {
 	Elf_Data *idata = data_insn->sec_data;
 	GElf_Shdr *rhdr = &data_relo->sec_hdr;
@@ -1943,6 +1954,13 @@ static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
 			return -EINVAL;
 		if (!ctx->map_fds[rmap])
 			return -EINVAL;
+		if (ctx->maps[rmap].type == BPF_MAP_TYPE_PROG_ARRAY) {
+			props->total++;
+			if (ctx->maps_ext[rmap].owner.jited ||
+			    (ctx->maps_ext[rmap].owner.type == 0 &&
+			     ctx->cfg.jit_enabled))
+				props->jited++;
+		}
 
 		if (ctx->verbose)
 			fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
@@ -1964,6 +1982,8 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
 	int ret, idx, i, fd = -1;
 
 	for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+		struct bpf_tail_call_props props = {};
+
 		ret = bpf_fill_section_data(ctx, i, &data_relo);
 		if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
 			continue;
@@ -1979,7 +1999,7 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
 
 		*sseen = true;
 
-		ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
+		ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn, &props);
 		if (ret < 0) {
 			*lderr = true;
 			return ret;
@@ -1994,6 +2014,16 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
 		fd = bpf_prog_attach(section, &prog, ctx);
 		if (fd < 0) {
 			*lderr = true;
+			if (props.total) {
+				if (ctx->cfg.jit_enabled &&
+				    props.total != props.jited)
+					fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n",
+						props.jited, props.total);
+				if (!ctx->cfg.jit_enabled &&
+				    props.jited)
+					fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n",
+						props.jited, props.total);
+			}
 			return fd;
 		}
 
@@ -2031,6 +2061,51 @@ static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
 	return -1;
 }
 
+struct bpf_jited_aux {
+	int prog_fd;
+	int map_fd;
+	struct bpf_prog_data prog;
+	struct bpf_map_ext map;
+};
+
+static int bpf_derive_prog_from_fdinfo(int fd, struct bpf_prog_data *prog)
+{
+	char file[PATH_MAX], buff[4096];
+	unsigned int val;
+	FILE *fp;
+
+	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+	memset(prog, 0, sizeof(*prog));
+
+	fp = fopen(file, "r");
+	if (!fp) {
+		fprintf(stderr, "No procfs support?!\n");
+		return -EIO;
+	}
+
+	while (fgets(buff, sizeof(buff), fp)) {
+		if (sscanf(buff, "prog_type:\t%u", &val) == 1)
+			prog->type = val;
+		else if (sscanf(buff, "prog_jited:\t%u", &val) == 1)
+			prog->jited = val;
+	}
+
+	fclose(fp);
+	return 0;
+}
+
+static int bpf_tail_call_get_aux(struct bpf_jited_aux *aux)
+{
+	struct bpf_elf_map tmp;
+	int ret;
+
+	ret = bpf_derive_elf_map_from_fdinfo(aux->map_fd, &tmp, &aux->map);
+	if (!ret)
+		ret = bpf_derive_prog_from_fdinfo(aux->prog_fd, &aux->prog);
+
+	return ret;
+}
+
 static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
 {
 	struct bpf_elf_sec_data data;
@@ -2060,10 +2135,31 @@ static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
 		ret = bpf_map_update(ctx->map_fds[idx], &key_id,
 				     &fd, BPF_ANY);
 		if (ret < 0) {
-			if (errno == E2BIG)
+			struct bpf_jited_aux aux = {};
+
+			ret = -errno;
+			if (errno == E2BIG) {
 				fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
 					key_id, map_id);
-			return -errno;
+				return ret;
+			}
+
+			aux.map_fd  = ctx->map_fds[idx];
+			aux.prog_fd = fd;
+
+			if (bpf_tail_call_get_aux(&aux))
+				return ret;
+			if (!aux.map.owner.type)
+				return ret;
+
+			if (aux.prog.type != aux.map.owner.type)
+				fprintf(stderr, "Tail call map owned by prog type %u, but prog type is %u!\n",
+					aux.map.owner.type, aux.prog.type);
+			if (aux.prog.jited != aux.map.owner.jited)
+				fprintf(stderr, "Tail call map %s jited, but prog %s!\n",
+					aux.map.owner.jited ? "is" : "not",
+					aux.prog.jited ? "is" : "not");
+			return ret;
 		}
 
 		ctx->sec_done[i] = true;
@@ -2221,6 +2317,21 @@ static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
 	return 0;
 }
 
+static void bpf_get_cfg(struct bpf_elf_ctx *ctx)
+{
+	static const char *path_jit = "/proc/sys/net/core/bpf_jit_enable";
+	int fd;
+
+	fd = open(path_jit, O_RDONLY);
+	if (fd > 0) {
+		char tmp[16] = {};
+
+		if (read(fd, tmp, sizeof(tmp)) > 0)
+			ctx->cfg.jit_enabled = atoi(tmp);
+		close(fd);
+	}
+}
+
 static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
 			    enum bpf_prog_type type, bool verbose)
 {
@@ -2231,6 +2342,7 @@ static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
 		return ret;
 
 	memset(ctx, 0, sizeof(*ctx));
+	bpf_get_cfg(ctx);
 	ctx->verbose = verbose;
 	ctx->type    = type;
 

From 95ae9a4870e7dbf6a01c894da5aec146b59c0486 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 23 Jul 2017 01:22:19 +0200
Subject: [PATCH 5/5] bpf: fix mnt path when from env

When bpf fs mount path is from env, behavior is currently broken as
we continue to search in default paths, thus fix this up.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 lib/bpf.c | 55 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 10 deletions(-)

diff --git a/lib/bpf.c b/lib/bpf.c
index 5d9f0a59..e7a4d12f 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -459,6 +459,24 @@ static int bpf_mnt_fs(const char *target)
 	return 0;
 }
 
+static int bpf_mnt_check_target(const char *target)
+{
+	struct stat sb = {};
+	int ret;
+
+	ret = stat(target, &sb);
+	if (ret) {
+		ret = mkdir(target, S_IRWXU);
+		if (ret) {
+			fprintf(stderr, "mkdir %s failed: %s\n", target,
+				strerror(errno));
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
 {
 	struct statfs st_fs;
@@ -471,6 +489,21 @@ static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
 	return 0;
 }
 
+static const char *bpf_find_mntpt_single(unsigned long magic, char *mnt,
+					 int len, const char *mntpt)
+{
+	int ret;
+
+	ret = bpf_valid_mntpt(mntpt, magic);
+	if (!ret) {
+		strncpy(mnt, mntpt, len - 1);
+		mnt[len - 1] = 0;
+		return mnt;
+	}
+
+	return NULL;
+}
+
 static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
 				  char *mnt, int len,
 				  const char * const *known_mnts)
@@ -482,11 +515,8 @@ static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
 	if (known_mnts) {
 		ptr = known_mnts;
 		while (*ptr) {
-			if (bpf_valid_mntpt(*ptr, magic) == 0) {
-				strncpy(mnt, *ptr, len - 1);
-				mnt[len - 1] = 0;
+			if (bpf_find_mntpt_single(magic, mnt, len, *ptr))
 				return mnt;
-			}
 			ptr++;
 		}
 	}
@@ -664,6 +694,7 @@ static const char *bpf_get_work_dir(enum bpf_prog_type type)
 	static char bpf_wrk_dir[PATH_MAX];
 	static const char *mnt;
 	static bool bpf_mnt_cached;
+	const char *mnt_env = getenv(BPF_ENV_MNT);
 	static const char * const bpf_known_mnts[] = {
 		BPF_DIR_MNT,
 		"/bpf",
@@ -682,13 +713,17 @@ static const char *bpf_get_work_dir(enum bpf_prog_type type)
 		return out;
 	}
 
-	mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, sizeof(bpf_tmp),
-			     bpf_known_mnts);
+	if (mnt_env)
+		mnt = bpf_find_mntpt_single(BPF_FS_MAGIC, bpf_tmp,
+					    sizeof(bpf_tmp), mnt_env);
+	else
+		mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp,
+				     sizeof(bpf_tmp), bpf_known_mnts);
 	if (!mnt) {
-		mnt = getenv(BPF_ENV_MNT);
-		if (!mnt)
-			mnt = BPF_DIR_MNT;
-		ret = bpf_mnt_fs(mnt);
+		mnt = mnt_env ? : BPF_DIR_MNT;
+		ret = bpf_mnt_check_target(mnt);
+		if (!ret)
+			ret = bpf_mnt_fs(mnt);
 		if (ret) {
 			mnt = NULL;
 			goto out;