diff --git a/examples/bpf/bpf_agent.c b/examples/bpf/bpf_agent.c
deleted file mode 100644
index f9b9ce3c..00000000
--- a/examples/bpf/bpf_agent.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * eBPF user space agent part
- *
- * Simple, _self-contained_ user space agent for the eBPF kernel
- * ebpf_prog.c program, which gets all map fds passed from tc via unix
- * domain socket in one transaction and can thus keep referencing
- * them from user space in order to read out (or possibly modify)
- * map data. Here, just as a minimal example to display counters.
- *
- * The agent only uses the bpf(2) syscall API to read or possibly
- * write to eBPF maps, it doesn't need to be aware of the low-level
- * bytecode parts and/or ELF parsing bits.
- *
- * ! For more details, see header comment in bpf_prog.c !
- *
- * gcc bpf_agent.c -o bpf_agent -Wall -O2
- *
- * For example, a more complex user space agent could run on each
- * host, reading and writing into eBPF maps used by tc classifier
- * and actions. It would thus allow for implementing a distributed
- * tc architecture, for example, which would push down central
- * policies into eBPF maps, and thus altering run-time behaviour.
- *
- *   -- Happy eBPF hacking! ;)
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <assert.h>
-
-#include <sys/un.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-
-/* Just some misc macros as min(), offsetof(), etc. */
-#include "../../include/utils.h"
-/* Common code from fd passing. */
-#include "../../include/bpf_scm.h"
-/* Common, shared definitions with ebpf_prog.c */
-#include "bpf_shared.h"
-/* Mini syscall wrapper */
-#include "bpf_sys.h"
-
-static void bpf_dump_drops(int fd)
-{
-	int cpu, max;
-
-	max = sysconf(_SC_NPROCESSORS_ONLN);
-
-	printf(" `- number of drops:");
-	for (cpu = 0; cpu < max; cpu++) {
-		long drops;
-
-		assert(bpf_lookup_elem(fd, &cpu, &drops) == 0);
-		printf("\tcpu%d: %5ld", cpu, drops);
-	}
-	printf("\n");
-}
-
-static void bpf_dump_queue(int fd)
-{
-	/* Just for the same of the example. */
-	int max_queue = 4, i;
-
-	printf("  | nic queues:");
-	for (i = 0; i < max_queue; i++) {
-		struct count_queue cq;
-		int ret;
-
-		memset(&cq, 0, sizeof(cq));
-		ret = bpf_lookup_elem(fd, &i, &cq);
-		assert(ret == 0 || (ret < 0 && errno == ENOENT));
-
-		printf("\tq%d:[pkts: %ld, mis: %ld]",
-		       i, cq.total, cq.mismatch);
-	}
-	printf("\n");
-}
-
-static void bpf_dump_proto(int fd)
-{
-	uint8_t protos[] = { IPPROTO_TCP, IPPROTO_UDP, IPPROTO_ICMP };
-	char *names[] = { "tcp", "udp", "icmp" };
-	int i;
-
-	printf("  ` protos:");
-	for (i = 0; i < ARRAY_SIZE(protos); i++) {
-		struct count_tuple ct;
-		int ret;
-
-		memset(&ct, 0, sizeof(ct));
-		ret = bpf_lookup_elem(fd, &protos[i], &ct);
-		assert(ret == 0 || (ret < 0 && errno == ENOENT));
-
-		printf("\t%s:[pkts: %ld, bytes: %ld]",
-		       names[i], ct.packets, ct.bytes);
-	}
-	printf("\n");
-}
-
-static void bpf_dump_map_data(int *tfd)
-{
-	int i;
-
-	for (i = 0; i < 30; i++) {
-		const int period = 5;
-
-		printf("data, period: %dsec\n", period);
-
-		bpf_dump_drops(tfd[BPF_MAP_ID_DROPS]);
-		bpf_dump_queue(tfd[BPF_MAP_ID_QUEUE]);
-		bpf_dump_proto(tfd[BPF_MAP_ID_PROTO]);
-
-		sleep(period);
-	}
-}
-
-static void bpf_info_loop(int *fds, struct bpf_map_aux *aux)
-{
-	int i, tfd[BPF_MAP_ID_MAX];
-
-	printf("ver: %d\nobj: %s\ndev: %lu\nino: %lu\nmaps: %u\n",
-	       aux->uds_ver, aux->obj_name, aux->obj_st.st_dev,
-	       aux->obj_st.st_ino, aux->num_ent);
-
-	for (i = 0; i < aux->num_ent; i++) {
-		printf("map%d:\n", i);
-		printf(" `- fd: %u\n", fds[i]);
-		printf("  | serial: %u\n", aux->ent[i].id);
-		printf("  | type: %u\n", aux->ent[i].type);
-		printf("  | max elem: %u\n", aux->ent[i].max_elem);
-		printf("  | size key: %u\n", aux->ent[i].size_key);
-		printf("  ` size val: %u\n", aux->ent[i].size_value);
-
-		tfd[aux->ent[i].id] = fds[i];
-	}
-
-	bpf_dump_map_data(tfd);
-}
-
-static void bpf_map_get_from_env(int *tfd)
-{
-	char key[64], *val;
-	int i;
-
-	for (i = 0; i < BPF_MAP_ID_MAX; i++) {
-		memset(key, 0, sizeof(key));
-		snprintf(key, sizeof(key), "BPF_MAP%d", i);
-
-		val = getenv(key);
-		assert(val != NULL);
-
-		tfd[i] = atoi(val);
-	}
-}
-
-static int bpf_map_set_recv(int fd, int *fds,  struct bpf_map_aux *aux,
-			    unsigned int entries)
-{
-	struct bpf_map_set_msg msg;
-	int *cmsg_buf, min_fd, i;
-	char *amsg_buf, *mmsg_buf;
-
-	cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
-	amsg_buf = (char *)msg.aux.ent;
-	mmsg_buf = (char *)&msg.aux;
-
-	for (i = 0; i < entries; i += min_fd) {
-		struct cmsghdr *cmsg;
-		int ret;
-
-		min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
-
-		bpf_map_set_init_single(&msg, min_fd);
-
-		ret = recvmsg(fd, &msg.hdr, 0);
-		if (ret <= 0)
-			return ret ? : -1;
-
-		cmsg = CMSG_FIRSTHDR(&msg.hdr);
-		if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
-			return -EINVAL;
-		if (msg.hdr.msg_flags & MSG_CTRUNC)
-			return -EIO;
-
-		min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
-		if (min_fd > entries || min_fd <= 0)
-			return -1;
-
-		memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
-		memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
-		memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
-
-		if (i + min_fd == aux->num_ent)
-			break;
-	}
-
-	return 0;
-}
-
-int main(int argc, char **argv)
-{
-	int fds[BPF_SCM_MAX_FDS];
-	struct bpf_map_aux aux;
-	struct sockaddr_un addr;
-	int fd, ret, i;
-
-	/* When arguments are being passed, we take it as a path
-	 * to a Unix domain socket, otherwise we grab the fds
-	 * from the environment to demonstrate both possibilities.
-	 */
-	if (argc == 1) {
-		int tfd[BPF_MAP_ID_MAX];
-
-		bpf_map_get_from_env(tfd);
-		bpf_dump_map_data(tfd);
-
-		return 0;
-	}
-
-	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
-	if (fd < 0) {
-		fprintf(stderr, "Cannot open socket: %s\n",
-			strerror(errno));
-		exit(1);
-	}
-
-	memset(&addr, 0, sizeof(addr));
-	addr.sun_family = AF_UNIX;
-	strncpy(addr.sun_path, argv[argc - 1], sizeof(addr.sun_path));
-
-	ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
-	if (ret < 0) {
-		fprintf(stderr, "Cannot bind to socket: %s\n",
-			strerror(errno));
-		exit(1);
-	}
-
-	memset(fds, 0, sizeof(fds));
-	memset(&aux, 0, sizeof(aux));
-
-	ret = bpf_map_set_recv(fd, fds, &aux, BPF_SCM_MAX_FDS);
-	if (ret >= 0)
-		bpf_info_loop(fds, &aux);
-
-	for (i = 0; i < aux.num_ent; i++)
-		close(fds[i]);
-
-	close(fd);
-	return 0;
-}
diff --git a/examples/bpf/bpf_map_in_map.c b/examples/bpf/bpf_map_in_map.c
new file mode 100644
index 00000000..ff0e623a
--- /dev/null
+++ b/examples/bpf/bpf_map_in_map.c
@@ -0,0 +1,56 @@
+#include "../../include/bpf_api.h"
+
+#define MAP_INNER_ID	42
+
+struct bpf_elf_map __section_maps map_inner = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.id		= MAP_INNER_ID,
+	.inner_idx	= 0,
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+struct bpf_elf_map __section_maps map_outer = {
+	.type		= BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.inner_id	= MAP_INNER_ID,
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+__section("egress")
+int emain(struct __sk_buff *skb)
+{
+	struct bpf_elf_map *map_inner;
+	int key = 0, *val;
+
+	map_inner = map_lookup_elem(&map_outer, &key);
+	if (map_inner) {
+		val = map_lookup_elem(map_inner, &key);
+		if (val)
+			lock_xadd(val, 1);
+	}
+
+	return BPF_H_DEFAULT;
+}
+
+__section("ingress")
+int imain(struct __sk_buff *skb)
+{
+	struct bpf_elf_map *map_inner;
+	int key = 0, *val;
+
+	map_inner = map_lookup_elem(&map_outer, &key);
+	if (map_inner) {
+		val = map_lookup_elem(map_inner, &key);
+		if (val)
+			printt("map val: %d\n", *val);
+	}
+
+	return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_prog.c b/examples/bpf/bpf_prog.c
deleted file mode 100644
index d6caf374..00000000
--- a/examples/bpf/bpf_prog.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
- * eBPF kernel space program part
- *
- * Toy eBPF program for demonstration purposes, some parts derived from
- * kernel tree's samples/bpf/sockex2_kern.c example.
- *
- * More background on eBPF, kernel tree: Documentation/networking/filter.txt
- *
- * Note, this file is rather large, and most classifier and actions are
- * likely smaller to accomplish one specific use-case and are tailored
- * for high performance. For performance reasons, you might also have the
- * classifier and action already merged inside the classifier.
- *
- * In order to show various features it serves as a bigger programming
- * example, which you should feel free to rip apart and experiment with.
- *
- * Compilation, configuration example:
- *
- *  Note: as long as the BPF backend in LLVM is still experimental,
- *  you need to build LLVM with LLVM with --enable-experimental-targets=BPF
- *  Also, make sure your 4.1+ kernel is compiled with CONFIG_BPF_SYSCALL=y,
- *  and you have libelf.h and gelf.h headers and can link tc against -lelf.
- *
- *  In case you need to sync kernel headers, go to your kernel source tree:
- *  # make headers_install INSTALL_HDR_PATH=/usr/
- *
- *  $ export PATH=/home/<...>/llvm/Debug+Asserts/bin/:$PATH
- *  $ clang -O2 -emit-llvm -c bpf_prog.c -o - | llc -march=bpf -filetype=obj -o bpf.o
- *  $ objdump -h bpf.o
- *  [...]
- *  3 classifier    000007f8  0000000000000000  0000000000000000  00000040  2**3
- *                  CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
- *  4 action-mark   00000088  0000000000000000  0000000000000000  00000838  2**3
- *                  CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
- *  5 action-rand   00000098  0000000000000000  0000000000000000  000008c0  2**3
- *                  CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
- *  6 maps          00000030  0000000000000000  0000000000000000  00000958  2**2
- *                  CONTENTS, ALLOC, LOAD, DATA
- *  7 license       00000004  0000000000000000  0000000000000000  00000988  2**0
- *                  CONTENTS, ALLOC, LOAD, DATA
- *  [...]
- *  # echo 1 > /proc/sys/net/core/bpf_jit_enable
- *  $ gcc bpf_agent.c -o bpf_agent -Wall -O2
- *  # ./bpf_agent /tmp/bpf-uds      (e.g. on a different terminal)
- *  # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
- *                             action bpf obj bpf.o sec action-mark            \
- *                             action bpf obj bpf.o sec action-rand ok
- *  # tc filter show dev em1
- *  filter parent 1: protocol all pref 49152 bpf
- *  filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid 1:1 bpf.o:[classifier]
- *    action order 1: bpf bpf.o:[action-mark] default-action pipe
- *    index 52 ref 1 bind 1
- *
- *    action order 2: bpf bpf.o:[action-rand] default-action pipe
- *    index 53 ref 1 bind 1
- *
- *    action order 3: gact action pass
- *    random type none pass val 0
- *    index 38 ref 1 bind 1
- *
- * The same program can also be installed on ingress side (as opposed to above
- * egress configuration), e.g.:
- *
- * # tc qdisc add dev em1 handle ffff: ingress
- * # tc filter add dev em1 parent ffff: bpf obj ...
- *
- * Notes on BPF agent:
- *
- * In the above example, the bpf_agent creates the unix domain socket
- * natively. "tc exec" can also spawn a shell and hold the socktes there:
- *
- *  # tc exec bpf imp /tmp/bpf-uds
- *  # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
- *                             action bpf obj bpf.o sec action-mark            \
- *                             action bpf obj bpf.o sec action-rand ok
- *  sh-4.2# (shell spawned from tc exec)
- *  sh-4.2# bpf_agent
- *  [...]
- *
- * This will read out fds over environment and produce the same data dump
- * as below. This has the advantage that the spawned shell owns the fds
- * and thus if the agent is restarted, it can reattach to the same fds, also
- * various programs can easily read/modify the data simultaneously from user
- * space side.
- *
- * If the shell is unnecessary, the agent can also just be spawned directly
- * via tc exec:
- *
- *  # tc exec bpf imp /tmp/bpf-uds run bpf_agent
- *  # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
- *                             action bpf obj bpf.o sec action-mark            \
- *                             action bpf obj bpf.o sec action-rand ok
- *
- * BPF agent example output:
- *
- * ver: 1
- * obj: bpf.o
- * dev: 64770
- * ino: 6045133
- * maps: 3
- * map0:
- *  `- fd: 4
- *   | serial: 1
- *   | type: 1
- *   | max elem: 256
- *   | size key: 1
- *   ` size val: 16
- * map1:
- *  `- fd: 5
- *   | serial: 2
- *   | type: 1
- *   | max elem: 1024
- *   | size key: 4
- *   ` size val: 16
- * map2:
- *  `- fd: 6
- *   | serial: 3
- *   | type: 2
- *   | max elem: 64
- *   | size key: 4
- *   ` size val: 8
- * data, period: 5sec
- *  `- number of drops:	cpu0:     0	cpu1:     0	cpu2:     0	cpu3:     0
- *   | nic queues:	q0:[pkts: 0, mis: 0]	q1:[pkts: 0, mis: 0]	q2:[pkts: 0, mis: 0]	q3:[pkts: 0, mis: 0]
- *   ` protos:	tcp:[pkts: 0, bytes: 0]	udp:[pkts: 0, bytes: 0]	icmp:[pkts: 0, bytes: 0]
- * data, period: 5sec
- *  `- number of drops:	cpu0:     5	cpu1:     0	cpu2:     0	cpu3:     1
- *   | nic queues:	q0:[pkts: 0, mis: 0]	q1:[pkts: 0, mis: 0]	q2:[pkts: 24, mis: 14]	q3:[pkts: 0, mis: 0]
- *   ` protos:	tcp:[pkts: 13, bytes: 1989]	udp:[pkts: 10, bytes: 710]	icmp:[pkts: 0, bytes: 0]
- * data, period: 5sec
- *  `- number of drops:	cpu0:     5	cpu1:     0	cpu2:     3	cpu3:     3
- *   | nic queues:	q0:[pkts: 0, mis: 0]	q1:[pkts: 0, mis: 0]	q2:[pkts: 39, mis: 21]	q3:[pkts: 0, mis: 0]
- *   ` protos:	tcp:[pkts: 20, bytes: 3549]	udp:[pkts: 18, bytes: 1278]	icmp:[pkts: 0, bytes: 0]
- * [...]
- *
- * This now means, the below classifier and action pipeline has been loaded
- * as eBPF bytecode into the kernel, the kernel has verified that the
- * execution of the bytecode is "safe", and it has JITed the programs
- * afterwards, so that upon invocation they're running on native speed. tc
- * has transferred all map file descriptors to the bpf_agent via IPC and
- * even after tc exits, the agent can read out or modify all map data.
- *
- * Note that the export to the uds is done only once in the classifier and
- * not in the action. It's enough to export the (here) shared descriptors
- * once.
- *
- * If you need to disassemble the generated JIT image (echo with 2), the
- * kernel tree has under tools/net/ a small helper, you can invoke e.g.
- * `bpf_jit_disasm -o`.
- *
- * Please find in the code below further comments.
- *
- *   -- Happy eBPF hacking! ;)
- */
-#include <stdint.h>
-#include <stdbool.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <asm/types.h>
-#include <linux/in.h>
-#include <linux/if.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/if_tunnel.h>
-#include <linux/filter.h>
-#include <linux/bpf.h>
-
-/* Common, shared definitions with ebpf_agent.c. */
-#include "bpf_shared.h"
-/* BPF helper functions for our example. */
-#include "../../include/bpf_api.h"
-
-/* Could be defined here as well, or included from the header. */
-#define TC_ACT_UNSPEC		(-1)
-#define TC_ACT_OK		0
-#define TC_ACT_RECLASSIFY	1
-#define TC_ACT_SHOT		2
-#define TC_ACT_PIPE		3
-#define TC_ACT_STOLEN		4
-#define TC_ACT_QUEUED		5
-#define TC_ACT_REPEAT		6
-
-/* Other, misc stuff. */
-#define IP_MF			0x2000
-#define IP_OFFSET		0x1FFF
-
-/* eBPF map definitions, all placed in section "maps". */
-struct bpf_elf_map __section("maps") map_proto = {
-	.type		=	BPF_MAP_TYPE_HASH,
-	.id		=	BPF_MAP_ID_PROTO,
-	.size_key	=	sizeof(uint8_t),
-	.size_value	=	sizeof(struct count_tuple),
-	.max_elem	=	256,
-	.flags		=	BPF_F_NO_PREALLOC,
-};
-
-struct bpf_elf_map __section("maps") map_queue = {
-	.type		=	BPF_MAP_TYPE_HASH,
-	.id		=	BPF_MAP_ID_QUEUE,
-	.size_key	=	sizeof(uint32_t),
-	.size_value	=	sizeof(struct count_queue),
-	.max_elem	=	1024,
-	.flags		=	BPF_F_NO_PREALLOC,
-};
-
-struct bpf_elf_map __section("maps") map_drops = {
-	.type		=	BPF_MAP_TYPE_ARRAY,
-	.id		=	BPF_MAP_ID_DROPS,
-	.size_key	=	sizeof(uint32_t),
-	.size_value	=	sizeof(long),
-	.max_elem	=	64,
-};
-
-/* Helper functions and definitions for the flow dissector used by the
- * example classifier. This resembles the kernel's flow dissector to
- * some extend and is just used as an example to show what's possible
- * with eBPF.
- */
-struct sockaddr;
-
-struct vlan_hdr {
-	__be16 h_vlan_TCI;
-	__be16 h_vlan_encapsulated_proto;
-};
-
-struct flow_keys {
-	__u32 src;
-	__u32 dst;
-	union {
-		__u32 ports;
-		__u16 port16[2];
-	};
-	__s32 th_off;
-	__u8 ip_proto;
-};
-
-static __inline__ int flow_ports_offset(__u8 ip_proto)
-{
-	switch (ip_proto) {
-	case IPPROTO_TCP:
-	case IPPROTO_UDP:
-	case IPPROTO_DCCP:
-	case IPPROTO_ESP:
-	case IPPROTO_SCTP:
-	case IPPROTO_UDPLITE:
-	default:
-		return 0;
-	case IPPROTO_AH:
-		return 4;
-	}
-}
-
-static __inline__ bool flow_is_frag(struct __sk_buff *skb, int nh_off)
-{
-	return !!(load_half(skb, nh_off + offsetof(struct iphdr, frag_off)) &
-		  (IP_MF | IP_OFFSET));
-}
-
-static __inline__ int flow_parse_ipv4(struct __sk_buff *skb, int nh_off,
-				      __u8 *ip_proto, struct flow_keys *flow)
-{
-	__u8 ip_ver_len;
-
-	if (unlikely(flow_is_frag(skb, nh_off)))
-		*ip_proto = 0;
-	else
-		*ip_proto = load_byte(skb, nh_off + offsetof(struct iphdr,
-							     protocol));
-	if (*ip_proto != IPPROTO_GRE) {
-		flow->src = load_word(skb, nh_off + offsetof(struct iphdr, saddr));
-		flow->dst = load_word(skb, nh_off + offsetof(struct iphdr, daddr));
-	}
-
-	ip_ver_len = load_byte(skb, nh_off + 0 /* offsetof(struct iphdr, ihl) */);
-	if (likely(ip_ver_len == 0x45))
-		nh_off += 20;
-	else
-		nh_off += (ip_ver_len & 0xF) << 2;
-
-	return nh_off;
-}
-
-static __inline__ __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, int off)
-{
-	__u32 w0 = load_word(skb, off);
-	__u32 w1 = load_word(skb, off + sizeof(w0));
-	__u32 w2 = load_word(skb, off + sizeof(w0) * 2);
-	__u32 w3 = load_word(skb, off + sizeof(w0) * 3);
-
-	return w0 ^ w1 ^ w2 ^ w3;
-}
-
-static __inline__ int flow_parse_ipv6(struct __sk_buff *skb, int nh_off,
-				      __u8 *ip_proto, struct flow_keys *flow)
-{
-	*ip_proto = load_byte(skb, nh_off + offsetof(struct ipv6hdr, nexthdr));
-
-	flow->src = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, saddr));
-	flow->dst = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, daddr));
-
-	return nh_off + sizeof(struct ipv6hdr);
-}
-
-static __inline__ bool flow_dissector(struct __sk_buff *skb,
-				      struct flow_keys *flow)
-{
-	int poff, nh_off = BPF_LL_OFF + ETH_HLEN;
-	__be16 proto = skb->protocol;
-	__u8 ip_proto;
-
-	/* TODO: check for skb->vlan_tci, skb->vlan_proto first */
-	if (proto == htons(ETH_P_8021AD)) {
-		proto = load_half(skb, nh_off +
-				  offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
-		nh_off += sizeof(struct vlan_hdr);
-	}
-	if (proto == htons(ETH_P_8021Q)) {
-		proto = load_half(skb, nh_off +
-				  offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
-		nh_off += sizeof(struct vlan_hdr);
-	}
-
-	if (likely(proto == htons(ETH_P_IP)))
-		nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
-	else if (proto == htons(ETH_P_IPV6))
-		nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
-	else
-		return false;
-
-	switch (ip_proto) {
-	case IPPROTO_GRE: {
-		struct gre_hdr {
-			__be16 flags;
-			__be16 proto;
-		};
-
-		__u16 gre_flags = load_half(skb, nh_off +
-					    offsetof(struct gre_hdr, flags));
-		__u16 gre_proto = load_half(skb, nh_off +
-					    offsetof(struct gre_hdr, proto));
-
-		if (gre_flags & (GRE_VERSION | GRE_ROUTING))
-			break;
-
-		nh_off += 4;
-		if (gre_flags & GRE_CSUM)
-			nh_off += 4;
-		if (gre_flags & GRE_KEY)
-			nh_off += 4;
-		if (gre_flags & GRE_SEQ)
-			nh_off += 4;
-
-		if (gre_proto == ETH_P_8021Q) {
-			gre_proto = load_half(skb, nh_off +
-					      offsetof(struct vlan_hdr,
-						       h_vlan_encapsulated_proto));
-			nh_off += sizeof(struct vlan_hdr);
-		}
-		if (gre_proto == ETH_P_IP)
-			nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
-		else if (gre_proto == ETH_P_IPV6)
-			nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
-		else
-			return false;
-		break;
-	}
-	case IPPROTO_IPIP:
-		nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
-		break;
-	case IPPROTO_IPV6:
-		nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
-	default:
-		break;
-	}
-
-	nh_off += flow_ports_offset(ip_proto);
-
-	flow->ports = load_word(skb, nh_off);
-	flow->th_off = nh_off;
-	flow->ip_proto = ip_proto;
-
-	return true;
-}
-
-static __inline__ void cls_update_proto_map(const struct __sk_buff *skb,
-					    const struct flow_keys *flow)
-{
-	uint8_t proto = flow->ip_proto;
-	struct count_tuple *ct, _ct;
-
-	ct = map_lookup_elem(&map_proto, &proto);
-	if (likely(ct)) {
-		lock_xadd(&ct->packets, 1);
-		lock_xadd(&ct->bytes, skb->len);
-		return;
-	}
-
-	/* No hit yet, we need to create a new entry. */
-	_ct.packets = 1;
-	_ct.bytes = skb->len;
-
-	map_update_elem(&map_proto, &proto, &_ct, BPF_ANY);
-}
-
-static __inline__ void cls_update_queue_map(const struct __sk_buff *skb)
-{
-	uint32_t queue = skb->queue_mapping;
-	struct count_queue *cq, _cq;
-	bool mismatch;
-
-	mismatch = skb->queue_mapping != get_smp_processor_id();
-
-	cq = map_lookup_elem(&map_queue, &queue);
-	if (likely(cq)) {
-		lock_xadd(&cq->total, 1);
-		if (mismatch)
-			lock_xadd(&cq->mismatch, 1);
-		return;
-	}
-
-	/* No hit yet, we need to create a new entry. */
-	_cq.total = 1;
-	_cq.mismatch = mismatch ? 1 : 0;
-
-	map_update_elem(&map_queue, &queue, &_cq, BPF_ANY);
-}
-
-/* eBPF program definitions, placed in various sections, which can
- * have custom section names. If custom names are in use, it's
- * required to point tc to the correct section, e.g.
- *
- *     tc filter add [...] bpf obj cls.o sec cls-tos [...]
- *
- * in case the program resides in __section("cls-tos").
- *
- * Default section for cls_bpf is: "classifier", for act_bpf is:
- * "action". Naturally, if for example multiple actions are present
- * in the same file, they need to have distinct section names.
- *
- * It is however not required to have multiple programs sharing
- * a file.
- */
-__section("classifier")
-int cls_main(struct __sk_buff *skb)
-{
-	struct flow_keys flow;
-
-	if (!flow_dissector(skb, &flow))
-		return 0; /* No match in cls_bpf. */
-
-	cls_update_proto_map(skb, &flow);
-	cls_update_queue_map(skb);
-
-	return flow.ip_proto;
-}
-
-static __inline__ void act_update_drop_map(void)
-{
-	uint32_t *count, cpu = get_smp_processor_id();
-
-	count = map_lookup_elem(&map_drops, &cpu);
-	if (count)
-		/* Only this cpu is accessing this element. */
-		(*count)++;
-}
-
-__section("action-mark")
-int act_mark_main(struct __sk_buff *skb)
-{
-	/* You could also mangle skb data here with the helper function
-	 * BPF_FUNC_skb_store_bytes, etc. Or, alternatively you could
-	 * do that already in the classifier itself as a merged combination
-	 * of classifier'n'action model.
-	 */
-
-	if (skb->mark == 0xcafe) {
-		act_update_drop_map();
-		return TC_ACT_SHOT;
-	}
-
-	/* Default configured tc opcode. */
-	return TC_ACT_UNSPEC;
-}
-
-__section("action-rand")
-int act_rand_main(struct __sk_buff *skb)
-{
-	/* Sorry, we're near event horizon ... */
-	if ((get_prandom_u32() & 3) == 0) {
-		act_update_drop_map();
-		return TC_ACT_SHOT;
-	}
-
-	return TC_ACT_UNSPEC;
-}
-
-/* Last but not least, the file contains a license. Some future helper
- * functions may only be available with a GPL license.
- */
-BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_shared.h b/examples/bpf/bpf_shared.h
deleted file mode 100644
index a24038dd..00000000
--- a/examples/bpf/bpf_shared.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef __BPF_SHARED__
-#define __BPF_SHARED__
-
-enum {
-	BPF_MAP_ID_PROTO,
-	BPF_MAP_ID_QUEUE,
-	BPF_MAP_ID_DROPS,
-	__BPF_MAP_ID_MAX,
-#define BPF_MAP_ID_MAX	__BPF_MAP_ID_MAX
-};
-
-struct count_tuple {
-	long packets; /* type long for lock_xadd() */
-	long bytes;
-};
-
-struct count_queue {
-	long total;
-	long mismatch;
-};
-
-#endif /* __BPF_SHARED__ */
diff --git a/examples/bpf/bpf_sys.h b/examples/bpf/bpf_sys.h
deleted file mode 100644
index 6e4f09e2..00000000
--- a/examples/bpf/bpf_sys.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __BPF_SYS__
-#define __BPF_SYS__
-
-#include <sys/syscall.h>
-#include <linux/bpf.h>
-
-static inline __u64 bpf_ptr_to_u64(const void *ptr)
-{
-	return (__u64) (unsigned long) ptr;
-}
-
-static inline int bpf_lookup_elem(int fd, void *key, void *value)
-{
-	union bpf_attr attr = {
-		.map_fd		= fd,
-		.key		= bpf_ptr_to_u64(key),
-		.value		= bpf_ptr_to_u64(value),
-	};
-
-	return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
-}
-
-#endif /* __BPF_SYS__ */
diff --git a/include/bpf_elf.h b/include/bpf_elf.h
index 239a0f36..406c3087 100644
--- a/include/bpf_elf.h
+++ b/include/bpf_elf.h
@@ -36,6 +36,8 @@ struct bpf_elf_map {
 	__u32 flags;
 	__u32 id;
 	__u32 pinning;
+	__u32 inner_id;
+	__u32 inner_idx;
 };
 
 #endif /* __BPF_ELF__ */
diff --git a/include/bpf_util.h b/include/bpf_util.h
index 5361dab1..6582ec8c 100644
--- a/include/bpf_util.h
+++ b/include/bpf_util.h
@@ -261,6 +261,8 @@ int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
 int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type);
 int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type);
 
+void bpf_dump_prog_info(FILE *f, uint32_t id);
+
 #ifdef HAVE_ELF
 int bpf_send_map_fds(const char *path, const char *obj);
 int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index f06f5829..cf8ef818 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -18,7 +18,6 @@
 #include <fcntl.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
-#include <sys/ioctl.h>
 #include <sys/param.h>
 #include <errno.h>
 #include <netinet/in.h>
diff --git a/ip/iplink.c b/ip/iplink.c
index 9674cb65..5aff2fde 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -26,7 +26,6 @@
 #include <arpa/inet.h>
 #include <string.h>
 #include <sys/ioctl.h>
-#include <linux/sockios.h>
 #include <stdbool.h>
 #include <linux/mpls.h>
 
diff --git a/ip/iproute.c b/ip/iproute.c
index 4e022d77..a735d281 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -1731,6 +1731,16 @@ static int iproute_get(int argc, char **argv)
 			addattr32(&req.n, sizeof(req), RTA_UID, uid);
 		} else if (matches(*argv, "fibmatch") == 0) {
 			fib_match = 1;
+		} else if (strcmp(*argv, "as") == 0) {
+			inet_prefix addr;
+
+			NEXT_ARG();
+			if (strcmp(*argv, "to") == 0)
+				NEXT_ARG();
+			get_addr(&addr, *argv, req.r.rtm_family);
+			if (req.r.rtm_family == AF_UNSPEC)
+				req.r.rtm_family = addr.family;
+			addattr_l(&req.n, sizeof(req), RTA_NEWDST, &addr.data, addr.bytelen);
 		} else {
 			inet_prefix addr;
 
diff --git a/lib/bpf.c b/lib/bpf.c
index 6b5a96d0..7eb5cd96 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -152,6 +152,54 @@ static int bpf_map_update(int fd, const void *key, const void *value,
 	return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
 }
 
+static int bpf_prog_fd_by_id(uint32_t id)
+{
+	union bpf_attr attr = {};
+
+	attr.prog_id = id;
+
+	return bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+static int bpf_prog_info_by_fd(int fd, struct bpf_prog_info *info,
+			       uint32_t *info_len)
+{
+	union bpf_attr attr = {};
+	int ret;
+
+	attr.info.bpf_fd = fd;
+	attr.info.info = bpf_ptr_to_u64(info);
+	attr.info.info_len = *info_len;
+
+	*info_len = 0;
+	ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+	if (!ret)
+		*info_len = attr.info.info_len;
+
+	return ret;
+}
+
+void bpf_dump_prog_info(FILE *f, uint32_t id)
+{
+	struct bpf_prog_info info = {};
+	uint32_t len = sizeof(info);
+	int fd, ret;
+
+	fprintf(f, "id %u ", id);
+
+	fd = bpf_prog_fd_by_id(id);
+	if (fd < 0)
+		return;
+
+	ret = bpf_prog_info_by_fd(fd, &info, &len);
+	if (!ret && len) {
+		if (info.jited_prog_len)
+			fprintf(f, "jited ");
+	}
+
+	close(fd);
+}
+
 static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
 			    char **bpf_string, bool *need_release,
 			    const char separator)
@@ -1023,15 +1071,16 @@ static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
 
 static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
 			  uint32_t size_value, uint32_t max_elem,
-			  uint32_t flags)
+			  uint32_t flags, int inner_fd)
 {
 	union bpf_attr attr = {};
 
 	attr.map_type = type;
 	attr.key_size = size_key;
-	attr.value_size = size_value;
+	attr.value_size = inner_fd ? sizeof(int) : size_value;
 	attr.max_entries = max_elem;
 	attr.map_flags = flags;
+	attr.inner_map_fd = inner_fd;
 
 	return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
@@ -1343,7 +1392,7 @@ retry:
 
 static void bpf_map_report(int fd, const char *name,
 			   const struct bpf_elf_map *map,
-			   struct bpf_elf_ctx *ctx)
+			   struct bpf_elf_ctx *ctx, int inner_fd)
 {
 	fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
 		fd < 0 ? "rejected: " : "loaded",
@@ -1354,15 +1403,91 @@ static void bpf_map_report(int fd, const char *name,
 	fprintf(stderr, " - Identifier:   %u\n", map->id);
 	fprintf(stderr, " - Pinning:      %u\n", map->pinning);
 	fprintf(stderr, " - Size key:     %u\n", map->size_key);
-	fprintf(stderr, " - Size value:   %u\n", map->size_value);
+	fprintf(stderr, " - Size value:   %u\n",
+		inner_fd ? (int)sizeof(int) : map->size_value);
 	fprintf(stderr, " - Max elems:    %u\n", map->max_elem);
 	fprintf(stderr, " - Flags:        %#x\n\n", map->flags);
 }
 
-static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
-			  struct bpf_elf_ctx *ctx)
+static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id)
 {
-	int fd, ret;
+	int i;
+
+	for (i = 0; i < ctx->map_num; i++) {
+		if (ctx->maps[i].id != id)
+			continue;
+		if (ctx->map_fds[i] < 0)
+			return -EINVAL;
+
+		return ctx->map_fds[i];
+	}
+
+	return -ENOENT;
+}
+
+static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map)
+{
+	char file[PATH_MAX], buff[4096];
+	unsigned int val;
+	FILE *fp;
+
+	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+
+	memset(map, 0, sizeof(*map));
+
+	fp = fopen(file, "r");
+	if (!fp) {
+		fprintf(stderr, "No procfs support?!\n");
+		return -EIO;
+	}
+
+	while (fgets(buff, sizeof(buff), fp)) {
+		if (sscanf(buff, "map_type:\t%u", &val) == 1)
+			map->type = val;
+		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+			map->size_key = val;
+		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+			map->size_value = val;
+		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+			map->max_elem = val;
+		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
+			map->flags = val;
+	}
+
+	fclose(fp);
+	return 0;
+}
+
+static void bpf_report_map_in_map(int outer_fd, int inner_fd, uint32_t idx)
+{
+	struct bpf_elf_map outer_map;
+	int ret;
+
+	fprintf(stderr, "Cannot insert map into map! ");
+
+	ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map);
+	if (!ret) {
+		if (idx >= outer_map.max_elem &&
+		    outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
+			fprintf(stderr, "Outer map has %u elements, index %u is invalid!\n",
+				outer_map.max_elem, idx);
+			return;
+		}
+	}
+
+	fprintf(stderr, "Different map specs used for outer and inner map?\n");
+}
+
+static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map)
+{
+	return map->type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+	       map->type == BPF_MAP_TYPE_HASH_OF_MAPS;
+}
+
+static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
+			  struct bpf_elf_ctx *ctx, int *have_map_in_map)
+{
+	int fd, ret, map_inner_fd = 0;
 
 	fd = bpf_probe_pinned(name, ctx, map->pinning);
 	if (fd > 0) {
@@ -1381,11 +1506,29 @@ static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
 		return fd;
 	}
 
+	if (have_map_in_map && bpf_is_map_in_map_type(map)) {
+		(*have_map_in_map)++;
+		if (map->inner_id)
+			return 0;
+		fprintf(stderr, "Map \'%s\' cannot be created since no inner map ID defined!\n",
+			name);
+		return -EINVAL;
+	}
+
+	if (!have_map_in_map && bpf_is_map_in_map_type(map)) {
+		map_inner_fd = bpf_find_map_id(ctx, map->inner_id);
+		if (map_inner_fd < 0) {
+			fprintf(stderr, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n",
+				name, map->inner_id);
+			return -EINVAL;
+		}
+	}
+
 	errno = 0;
 	fd = bpf_map_create(map->type, map->size_key, map->size_value,
-			    map->max_elem, map->flags);
+			    map->max_elem, map->flags, map_inner_fd);
 	if (fd < 0 || ctx->verbose) {
-		bpf_map_report(fd, name, map, ctx);
+		bpf_map_report(fd, name, map, ctx, map_inner_fd);
 		if (fd < 0)
 			return fd;
 	}
@@ -1430,21 +1573,63 @@ static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
 
 static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
 {
+	int i, j, ret, fd, inner_fd, inner_idx, have_map_in_map = 0;
 	const char *map_name;
-	int i, fd;
 
 	for (i = 0; i < ctx->map_num; i++) {
 		map_name = bpf_map_fetch_name(ctx, i);
 		if (!map_name)
 			return -EIO;
 
-		fd = bpf_map_attach(map_name, &ctx->maps[i], ctx);
+		fd = bpf_map_attach(map_name, &ctx->maps[i], ctx,
+				    &have_map_in_map);
+		if (fd < 0)
+			return fd;
+
+		ctx->map_fds[i] = !fd ? -1 : fd;
+	}
+
+	for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
+		if (ctx->map_fds[i] >= 0)
+			continue;
+
+		map_name = bpf_map_fetch_name(ctx, i);
+		if (!map_name)
+			return -EIO;
+
+		fd = bpf_map_attach(map_name, &ctx->maps[i], ctx,
+				    NULL);
 		if (fd < 0)
 			return fd;
 
 		ctx->map_fds[i] = fd;
 	}
 
+	for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
+		if (!ctx->maps[i].id ||
+		    ctx->maps[i].inner_id ||
+		    ctx->maps[i].inner_idx == -1)
+			continue;
+
+		inner_fd  = ctx->map_fds[i];
+		inner_idx = ctx->maps[i].inner_idx;
+
+		for (j = 0; j < ctx->map_num; j++) {
+			if (!bpf_is_map_in_map_type(&ctx->maps[j]))
+				continue;
+			if (ctx->maps[j].inner_id != ctx->maps[i].id)
+				continue;
+
+			ret = bpf_map_update(ctx->map_fds[j], &inner_idx,
+					     &inner_fd, BPF_ANY);
+			if (ret < 0) {
+				bpf_report_map_in_map(ctx->map_fds[j],
+						      inner_fd, inner_idx);
+				return ret;
+			}
+		}
+	}
+
 	return 0;
 }
 
diff --git a/lib/ll_addr.c b/lib/ll_addr.c
index 465ed6fa..5b5caf3d 100644
--- a/lib/ll_addr.c
+++ b/lib/ll_addr.c
@@ -16,7 +16,6 @@
 #include <fcntl.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
-#include <sys/ioctl.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <string.h>
diff --git a/lib/ll_proto.c b/lib/ll_proto.c
index e094d9f8..ef5a5b7b 100644
--- a/lib/ll_proto.c
+++ b/lib/ll_proto.c
@@ -16,7 +16,6 @@
 #include <fcntl.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
-#include <sys/ioctl.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <string.h>
diff --git a/lib/ll_types.c b/lib/ll_types.c
index eca617f3..8f294515 100644
--- a/lib/ll_types.c
+++ b/lib/ll_types.c
@@ -16,7 +16,6 @@
 #include <fcntl.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
-#include <sys/ioctl.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <string.h>
diff --git a/man/man8/tc-csum.8 b/man/man8/tc-csum.8
index 718301de..409ab717 100644
--- a/man/man8/tc-csum.8
+++ b/man/man8/tc-csum.8
@@ -29,9 +29,9 @@ csum - checksum update action
 The
 .B csum
 action triggers checksum recalculation of specified packet headers. It is
-commonly used after packet editing using the
+commonly used to fix incorrect checksums after the
 .B pedit
-action to fix for then incorrect checksums.
+action has modified the packet content.
 .SH OPTIONS
 .TP
 .I TARGET
diff --git a/netem/paretonormal.c b/netem/paretonormal.c
index 83ec87d4..9773e370 100644
--- a/netem/paretonormal.c
+++ b/netem/paretonormal.c
@@ -11,7 +11,6 @@
  */
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdlib.h>
 #include <string.h>
 #include <math.h>
 #include <limits.h>
diff --git a/tc/f_bpf.c b/tc/f_bpf.c
index 75c44c06..2f8d12a6 100644
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -230,6 +230,9 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
 				      b, sizeof(b)));
 	}
 
+	if (tb[TCA_BPF_ID])
+		bpf_dump_prog_info(f, rta_getattr_u32(tb[TCA_BPF_ID]));
+
 	if (tb[TCA_BPF_POLICE]) {
 		fprintf(f, "\n");
 		tc_print_police(f, tb[TCA_BPF_POLICE]);
diff --git a/tc/m_bpf.c b/tc/m_bpf.c
index 57283030..df559bcc 100644
--- a/tc/m_bpf.c
+++ b/tc/m_bpf.c
@@ -186,6 +186,9 @@ static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg)
 				      b, sizeof(b)));
 	}
 
+        if (tb[TCA_ACT_BPF_ID])
+                bpf_dump_prog_info(f, rta_getattr_u32(tb[TCA_ACT_BPF_ID]));
+
 	print_action_control(f, "default-action ", parm->action, "\n");
 	fprintf(f, "\tindex %u ref %d bind %d", parm->index, parm->refcnt,
 		parm->bindcnt);