diff --git a/configure b/configure index 307912aa..2c363d3b 100755 --- a/configure +++ b/configure @@ -2,6 +2,11 @@ # SPDX-License-Identifier: GPL-2.0 # This is not an autoconf generated configure # +# Influential LIBBPF environment variables: +# LIBBPF_FORCE={on,off} on: require link against libbpf; +# off: disable libbpf probing +# LIBBPF_DIR Path to libbpf DESTDIR to use + INCLUDE=${1:-"$PWD/include"} # Output file which is input to Makefile @@ -240,6 +245,111 @@ check_elf() fi } +have_libbpf_basic() +{ + cat >$TMPDIR/libbpf_test.c < +int main(int argc, char **argv) { + bpf_program__set_autoload(NULL, false); + bpf_map__ifindex(NULL); + bpf_map__set_pin_path(NULL, NULL); + bpf_object__open_file(NULL, NULL); + return 0; +} +EOF + + $CC -o $TMPDIR/libbpf_test $TMPDIR/libbpf_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1 + local ret=$? + + rm -f $TMPDIR/libbpf_test.c $TMPDIR/libbpf_test + return $ret +} + +have_libbpf_sec_name() +{ + cat >$TMPDIR/libbpf_sec_test.c < +int main(int argc, char **argv) { + void *ptr; + bpf_program__section_name(NULL); + return 0; +} +EOF + + $CC -o $TMPDIR/libbpf_sec_test $TMPDIR/libbpf_sec_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1 + local ret=$? + + rm -f $TMPDIR/libbpf_sec_test.c $TMPDIR/libbpf_sec_test + return $ret +} + +check_force_libbpf_on() +{ + # if set LIBBPF_FORCE=on but no libbpf support, just exist the config + # process to make sure we don't build without libbpf. + if [ "$LIBBPF_FORCE" = on ]; then + echo " LIBBPF_FORCE=on set, but couldn't find a usable libbpf" + exit 1 + fi +} + +check_libbpf() +{ + # if set LIBBPF_FORCE=off, disable libbpf entirely + if [ "$LIBBPF_FORCE" = off ]; then + echo "no" + return + fi + + if ! ${PKG_CONFIG} libbpf --exists && [ -z "$LIBBPF_DIR" ] ; then + echo "no" + check_force_libbpf_on + return + fi + + if [ $(uname -m) = x86_64 ]; then + local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib64" + else + local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib" + fi + + if [ -n "$LIBBPF_DIR" ]; then + LIBBPF_CFLAGS="-I${LIBBPF_DIR}/usr/include" + LIBBPF_LDLIBS="${LIBBPF_LIBDIR}/libbpf.a -lz -lelf" + LIBBPF_VERSION=$(PKG_CONFIG_LIBDIR=${LIBBPF_LIBDIR}/pkgconfig ${PKG_CONFIG} libbpf --modversion) + else + LIBBPF_CFLAGS=$(${PKG_CONFIG} libbpf --cflags) + LIBBPF_LDLIBS=$(${PKG_CONFIG} libbpf --libs) + LIBBPF_VERSION=$(${PKG_CONFIG} libbpf --modversion) + fi + + if ! have_libbpf_basic; then + echo "no" + echo " libbpf version $LIBBPF_VERSION is too low, please update it to at least 0.1.0" + check_force_libbpf_on + return + else + echo "HAVE_LIBBPF:=y" >> $CONFIG + echo 'CFLAGS += -DHAVE_LIBBPF ' $LIBBPF_CFLAGS >> $CONFIG + echo "CFLAGS += -DLIBBPF_VERSION=\\\"$LIBBPF_VERSION\\\"" >> $CONFIG + echo 'LDLIBS += ' $LIBBPF_LDLIBS >> $CONFIG + + if [ -z "$LIBBPF_DIR" ]; then + echo "CFLAGS += -DLIBBPF_DYNAMIC" >> $CONFIG + fi + fi + + # bpf_program__title() is deprecated since libbpf 0.2.0, use + # bpf_program__section_name() instead if we support + if have_libbpf_sec_name; then + echo "HAVE_LIBBPF_SECTION_NAME:=y" >> $CONFIG + echo 'CFLAGS += -DHAVE_LIBBPF_SECTION_NAME ' >> $CONFIG + fi + + echo "yes" + echo " libbpf version $LIBBPF_VERSION" +} + check_selinux() # SELinux is a compile time option in the ss utility { @@ -385,6 +495,9 @@ check_setns echo -n "SELinux support: " check_selinux +echo -n "libbpf support: " +check_libbpf + echo -n "ELF support: " check_elf diff --git a/examples/bpf/README b/examples/bpf/README index 1bbdda3f..b7261191 100644 --- a/examples/bpf/README +++ b/examples/bpf/README @@ -1,8 +1,18 @@ eBPF toy code examples (running in kernel) to familiarize yourself with syntax and features: - - bpf_shared.c -> Ingress/egress map sharing example - - bpf_tailcall.c -> Using tail call chains - - bpf_cyclic.c -> Simple cycle as tail calls +- BTF defined map examples - bpf_graft.c -> Demo on altering runtime behaviour - - bpf_map_in_map.c -> Using map in map example + - bpf_shared.c -> Ingress/egress map sharing example + - bpf_map_in_map.c -> Using map in map example + +- legacy struct bpf_elf_map defined map examples + - legacy/bpf_shared.c -> Ingress/egress map sharing example + - legacy/bpf_tailcall.c -> Using tail call chains + - legacy/bpf_cyclic.c -> Simple cycle as tail calls + - legacy/bpf_graft.c -> Demo on altering runtime behaviour + - legacy/bpf_map_in_map.c -> Using map in map example + +Note: Users should use new BTF way to defined the maps, the examples +in legacy folder which is using struct bpf_elf_map defined maps is not +recommanded. diff --git a/examples/bpf/bpf_graft.c b/examples/bpf/bpf_graft.c index 07113d4a..8066dcce 100644 --- a/examples/bpf/bpf_graft.c +++ b/examples/bpf/bpf_graft.c @@ -33,13 +33,13 @@ * [...] */ -struct bpf_elf_map __section_maps jmp_tc = { - .type = BPF_MAP_TYPE_PROG_ARRAY, - .size_key = sizeof(uint32_t), - .size_value = sizeof(uint32_t), - .pinning = PIN_GLOBAL_NS, - .max_elem = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint32_t)); + __uint(max_entries, 1); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} jmp_tc __section(".maps"); __section("aaa") int cls_aaa(struct __sk_buff *skb) diff --git a/examples/bpf/bpf_map_in_map.c b/examples/bpf/bpf_map_in_map.c index ff0e623a..39c86268 100644 --- a/examples/bpf/bpf_map_in_map.c +++ b/examples/bpf/bpf_map_in_map.c @@ -1,24 +1,23 @@ #include "../../include/bpf_api.h" -#define MAP_INNER_ID 42 +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint32_t)); + __uint(max_entries, 1); +} map_inner __section(".maps"); -struct bpf_elf_map __section_maps map_inner = { - .type = BPF_MAP_TYPE_ARRAY, - .size_key = sizeof(uint32_t), - .size_value = sizeof(uint32_t), - .id = MAP_INNER_ID, - .inner_idx = 0, - .pinning = PIN_GLOBAL_NS, - .max_elem = 1, -}; - -struct bpf_elf_map __section_maps map_outer = { - .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, - .size_key = sizeof(uint32_t), - .size_value = sizeof(uint32_t), - .inner_id = MAP_INNER_ID, - .pinning = PIN_GLOBAL_NS, - .max_elem = 1, +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint32_t)); + __uint(max_entries, 1); + __uint(pinning, LIBBPF_PIN_BY_NAME); + __array(values, struct inner_map); +} map_outer __section(".maps") = { + .values = { + [0] = &map_inner, + }, }; __section("egress") diff --git a/examples/bpf/bpf_shared.c b/examples/bpf/bpf_shared.c index 21fe6f1e..99a332f4 100644 --- a/examples/bpf/bpf_shared.c +++ b/examples/bpf/bpf_shared.c @@ -18,13 +18,13 @@ * instance is being created. */ -struct bpf_elf_map __section_maps map_sh = { - .type = BPF_MAP_TYPE_ARRAY, - .size_key = sizeof(uint32_t), - .size_value = sizeof(uint32_t), - .pinning = PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */ - .max_elem = 1, -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(uint32_t)); + __uint(value_size, sizeof(uint32_t)); + __uint(max_entries, 1); + __uint(pinning, LIBBPF_PIN_BY_NAME); /* or LIBBPF_PIN_NONE */ +} map_sh __section(".maps"); __section("egress") int emain(struct __sk_buff *skb) diff --git a/examples/bpf/bpf_cyclic.c b/examples/bpf/legacy/bpf_cyclic.c similarity index 95% rename from examples/bpf/bpf_cyclic.c rename to examples/bpf/legacy/bpf_cyclic.c index 11d1c061..33590730 100644 --- a/examples/bpf/bpf_cyclic.c +++ b/examples/bpf/legacy/bpf_cyclic.c @@ -1,4 +1,4 @@ -#include "../../include/bpf_api.h" +#include "../../../include/bpf_api.h" /* Cyclic dependency example to test the kernel's runtime upper * bound on loops. Also demonstrates on how to use direct-actions, diff --git a/examples/bpf/legacy/bpf_graft.c b/examples/bpf/legacy/bpf_graft.c new file mode 100644 index 00000000..f4c920cc --- /dev/null +++ b/examples/bpf/legacy/bpf_graft.c @@ -0,0 +1,66 @@ +#include "../../../include/bpf_api.h" + +/* This example demonstrates how classifier run-time behaviour + * can be altered with tail calls. We start out with an empty + * jmp_tc array, then add section aaa to the array slot 0, and + * later on atomically replace it with section bbb. Note that + * as shown in other examples, the tc loader can prepopulate + * tail called sections, here we start out with an empty one + * on purpose to show it can also be done this way. + * + * tc filter add dev foo parent ffff: bpf obj graft.o + * tc exec bpf dbg + * [...] + * Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough + * -0 [001] ..s. 138993.202265: : fallthrough + * Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough + * [...] + * + * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa + * tc exec bpf dbg + * [...] + * Socket Thread-19818 [002] ..s. 139012.053587: : aaa + * -0 [002] ..s. 139012.172359: : aaa + * Socket Thread-19818 [001] ..s. 139012.173556: : aaa + * [...] + * + * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb + * tc exec bpf dbg + * [...] + * Socket Thread-19818 [002] ..s. 139022.102967: : bbb + * -0 [002] ..s. 139022.155640: : bbb + * Socket Thread-19818 [001] ..s. 139022.156730: : bbb + * [...] + */ + +struct bpf_elf_map __section_maps jmp_tc = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +__section("aaa") +int cls_aaa(struct __sk_buff *skb) +{ + printt("aaa\n"); + return TC_H_MAKE(1, 42); +} + +__section("bbb") +int cls_bbb(struct __sk_buff *skb) +{ + printt("bbb\n"); + return TC_H_MAKE(1, 43); +} + +__section_cls_entry +int cls_entry(struct __sk_buff *skb) +{ + tail_call(skb, &jmp_tc, 0); + printt("fallthrough\n"); + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/legacy/bpf_map_in_map.c b/examples/bpf/legacy/bpf_map_in_map.c new file mode 100644 index 00000000..575f8812 --- /dev/null +++ b/examples/bpf/legacy/bpf_map_in_map.c @@ -0,0 +1,56 @@ +#include "../../../include/bpf_api.h" + +#define MAP_INNER_ID 42 + +struct bpf_elf_map __section_maps map_inner = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .id = MAP_INNER_ID, + .inner_idx = 0, + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +struct bpf_elf_map __section_maps map_outer = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .inner_id = MAP_INNER_ID, + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +__section("egress") +int emain(struct __sk_buff *skb) +{ + struct bpf_elf_map *map_inner; + int key = 0, *val; + + map_inner = map_lookup_elem(&map_outer, &key); + if (map_inner) { + val = map_lookup_elem(map_inner, &key); + if (val) + lock_xadd(val, 1); + } + + return BPF_H_DEFAULT; +} + +__section("ingress") +int imain(struct __sk_buff *skb) +{ + struct bpf_elf_map *map_inner; + int key = 0, *val; + + map_inner = map_lookup_elem(&map_outer, &key); + if (map_inner) { + val = map_lookup_elem(map_inner, &key); + if (val) + printt("map val: %d\n", *val); + } + + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/legacy/bpf_shared.c b/examples/bpf/legacy/bpf_shared.c new file mode 100644 index 00000000..05b2b9ef --- /dev/null +++ b/examples/bpf/legacy/bpf_shared.c @@ -0,0 +1,53 @@ +#include "../../../include/bpf_api.h" + +/* Minimal, stand-alone toy map pinning example: + * + * clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c + * tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress + * tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress + * + * Both classifier will share the very same map instance in this example, + * so map content can be accessed from ingress *and* egress side! + * + * This example has a pinning of PIN_OBJECT_NS, so it's private and + * thus shared among various program sections within the object. + * + * A setting of PIN_GLOBAL_NS would place it into a global namespace, + * so that it can be shared among different object files. A setting + * of PIN_NONE (= 0) means no sharing, so each tc invocation a new map + * instance is being created. + */ + +struct bpf_elf_map __section_maps map_sh = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */ + .max_elem = 1, +}; + +__section("egress") +int emain(struct __sk_buff *skb) +{ + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + lock_xadd(val, 1); + + return BPF_H_DEFAULT; +} + +__section("ingress") +int imain(struct __sk_buff *skb) +{ + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + printt("map val: %d\n", *val); + + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/bpf_tailcall.c b/examples/bpf/legacy/bpf_tailcall.c similarity index 98% rename from examples/bpf/bpf_tailcall.c rename to examples/bpf/legacy/bpf_tailcall.c index 161eb606..8ebc554c 100644 --- a/examples/bpf/bpf_tailcall.c +++ b/examples/bpf/legacy/bpf_tailcall.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include "../../include/bpf_api.h" +#include "../../../include/bpf_api.h" #define ENTRY_INIT 3 #define ENTRY_0 0 diff --git a/include/bpf_api.h b/include/bpf_api.h index 89d3488d..82c47089 100644 --- a/include/bpf_api.h +++ b/include/bpf_api.h @@ -19,6 +19,19 @@ #include "bpf_elf.h" +/** libbpf pin type. */ +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + +/** Type helper macros. */ + +#define __uint(name, val) int (*name)[val] +#define __type(name, val) typeof(val) *name +#define __array(name, val) typeof(val) *name[] + /** Misc macros. */ #ifndef __stringify diff --git a/include/bpf_util.h b/include/bpf_util.h index 63db07ca..53acc410 100644 --- a/include/bpf_util.h +++ b/include/bpf_util.h @@ -274,12 +274,16 @@ int bpf_trace_pipe(void); void bpf_print_ops(struct rtattr *bpf_ops, __u16 len); -int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t size_insns, const char *license, char *log, - size_t size_log); +int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, __u32 ifindex, + char *log, size_t size_log); +int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, char *log, + size_t size_log); int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type); int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type); +int bpf_program_attach(int prog_fd, int target_fd, enum bpf_attach_type type); int bpf_dump_prog_info(FILE *f, uint32_t id); @@ -287,6 +291,16 @@ int bpf_dump_prog_info(FILE *f, uint32_t id); int bpf_send_map_fds(const char *path, const char *obj); int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, unsigned int entries); +#ifdef HAVE_LIBBPF +int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg); +int iproute2_bpf_fetch_ancillary(void); +int iproute2_get_root_path(char *root_path, size_t len); +bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname); +bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap, + struct bpf_elf_map *omap, char *omap_name); +int iproute2_find_map_name_by_id(unsigned int map_id, char *name); +int iproute2_load_libbpf(struct bpf_cfg_in *cfg); +#endif /* HAVE_LIBBPF */ #else static inline int bpf_send_map_fds(const char *path, const char *obj) { @@ -299,5 +313,15 @@ static inline int bpf_recv_map_fds(const char *path, int *fds, { return -1; } +#ifdef HAVE_LIBBPF +static inline int iproute2_load_libbpf(struct bpf_cfg_in *cfg) +{ + fprintf(stderr, "No ELF library support compiled in.\n"); + return -1; +} +#endif /* HAVE_LIBBPF */ #endif /* HAVE_ELF */ + +const char *get_libbpf_version(void); + #endif /* __BPF_UTIL__ */ diff --git a/ip/ip.c b/ip/ip.c index 5e31957f..466dbb52 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -24,6 +24,7 @@ #include "namespace.h" #include "color.h" #include "rt_names.h" +#include "bpf_util.h" int preferred_family = AF_UNSPEC; int human_readable; @@ -147,8 +148,9 @@ static int batch(const char *name) int main(int argc, char **argv) { - char *basename; + const char *libbpf_version; char *batch_file = NULL; + char *basename; int color = 0; /* to run vrf exec without root, capabilities might be set, drop them @@ -229,7 +231,11 @@ int main(int argc, char **argv) ++timestamp; ++timestamp_short; } else if (matches(opt, "-Version") == 0) { - printf("ip utility, iproute2-%s\n", version); + printf("ip utility, iproute2-%s", version); + libbpf_version = get_libbpf_version(); + if (libbpf_version) + printf(", libbpf %s", libbpf_version); + printf("\n"); exit(0); } else if (matches(opt, "-force") == 0) { ++force; diff --git a/ip/ipvrf.c b/ip/ipvrf.c index 28dd8e25..42779e5c 100644 --- a/ip/ipvrf.c +++ b/ip/ipvrf.c @@ -256,8 +256,8 @@ static int prog_load(int idx) BPF_EXIT_INSN(), }; - return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), - "GPL", bpf_log_buf, sizeof(bpf_log_buf)); + return bpf_program_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), + "GPL", bpf_log_buf, sizeof(bpf_log_buf)); } static int vrf_configure_cgroup(const char *path, int ifindex) @@ -288,7 +288,7 @@ static int vrf_configure_cgroup(const char *path, int ifindex) goto out; } - if (bpf_prog_attach_fd(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) { + if (bpf_program_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) { fprintf(stderr, "Failed to attach prog to cgroup: '%s'\n", strerror(errno)); goto out; diff --git a/lib/Makefile b/lib/Makefile index 13f4ee15..e37585c6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -5,7 +5,13 @@ CFLAGS += -fPIC UTILOBJ = utils.o rt_names.o ll_map.o ll_types.o ll_proto.o ll_addr.o \ inet_proto.o namespace.o json_writer.o json_print.o \ - names.o color.o bpf.o exec.o fs.o cg_map.o + names.o color.o bpf_legacy.o bpf_glue.o exec.o fs.o cg_map.o + +ifeq ($(HAVE_ELF),y) +ifeq ($(HAVE_LIBBPF),y) +UTILOBJ += bpf_libbpf.o +endif +endif NLOBJ=libgenl.o libnetlink.o mnl_utils.o diff --git a/lib/bpf_glue.c b/lib/bpf_glue.c new file mode 100644 index 00000000..fa609bfe --- /dev/null +++ b/lib/bpf_glue.c @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * bpf_glue.c: BPF code to call both legacy and libbpf code + * Authors: Hangbin Liu + * + */ +#include "bpf_util.h" +#ifdef HAVE_LIBBPF +#include +#endif + +int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, char *log, + size_t size_log) +{ +#ifdef HAVE_LIBBPF + return bpf_load_program(type, insns, size_insns, license, 0, log, size_log); +#else + return bpf_prog_load_dev(type, insns, size_insns, license, 0, log, size_log); +#endif +} + +int bpf_program_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +{ +#ifdef HAVE_LIBBPF + return bpf_prog_attach(prog_fd, target_fd, type, 0); +#else + return bpf_prog_attach_fd(prog_fd, target_fd, type); +#endif +} + +#ifdef HAVE_LIBBPF +static const char *_libbpf_compile_version = LIBBPF_VERSION; +static char _libbpf_version[10] = {}; + +const char *get_libbpf_version(void) +{ + /* Start by copying compile-time version into buffer so we have a + * fallback value in case we are dynamically linked, or can't find a + * version in /proc/self/maps below. + */ + strncpy(_libbpf_version, _libbpf_compile_version, + sizeof(_libbpf_version)-1); +#ifdef LIBBPF_DYNAMIC + char buf[PATH_MAX], *s; + bool found = false; + FILE *fp; + + /* When dynamically linking against libbpf, we can't be sure that the + * version we discovered at compile time is actually the one we are + * using at runtime. This can lead to hard-to-debug errors, so we try to + * discover the correct version at runtime. + * + * The simple solution to this would be if libbpf itself exported a + * version in its API. But since it doesn't, we work around this by + * parsing the mappings of the binary at runtime, looking for the full + * filename of libbpf.so and using that. + */ + fp = fopen("/proc/self/maps", "r"); + if (fp == NULL) + goto out; + + while ((s = fgets(buf, sizeof(buf), fp)) != NULL) { + if ((s = strstr(buf, "libbpf.so.")) != NULL) { + strncpy(_libbpf_version, s+10, sizeof(_libbpf_version)-1); + strtok(_libbpf_version, "\n"); + found = true; + break; + } + } + + fclose(fp); +out: + if (!found) + fprintf(stderr, "Couldn't find runtime libbpf version - falling back to compile-time value!\n"); +#endif /* LIBBPF_DYNAMIC */ + + _libbpf_version[sizeof(_libbpf_version)-1] = '\0'; + return _libbpf_version; +} +#else +const char *get_libbpf_version(void) +{ + return NULL; +} +#endif /* HAVE_LIBBPF */ diff --git a/lib/bpf.c b/lib/bpf_legacy.c similarity index 94% rename from lib/bpf.c rename to lib/bpf_legacy.c index c7d45077..bc869c3f 100644 --- a/lib/bpf.c +++ b/lib/bpf_legacy.c @@ -940,6 +940,9 @@ static int bpf_do_parse(struct bpf_cfg_in *cfg, const bool *opt_tbl) static int bpf_do_load(struct bpf_cfg_in *cfg) { if (cfg->mode == EBPF_OBJECT) { +#ifdef HAVE_LIBBPF + return iproute2_load_libbpf(cfg); +#endif cfg->prog_fd = bpf_obj_open(cfg->object, cfg->type, cfg->section, cfg->ifindex, cfg->verbose); @@ -1087,10 +1090,9 @@ int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type) return bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); } -static int bpf_prog_load_dev(enum bpf_prog_type type, - const struct bpf_insn *insns, size_t size_insns, - const char *license, __u32 ifindex, - char *log, size_t size_log) +int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, __u32 ifindex, + char *log, size_t size_log) { union bpf_attr attr = {}; @@ -1109,14 +1111,6 @@ static int bpf_prog_load_dev(enum bpf_prog_type type, return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } -int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t size_insns, const char *license, char *log, - size_t size_log) -{ - return bpf_prog_load_dev(type, insns, size_insns, license, 0, - log, size_log); -} - #ifdef HAVE_ELF struct bpf_elf_prog { enum bpf_prog_type type; @@ -3164,4 +3158,179 @@ int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, close(fd); return ret; } + +#ifdef HAVE_LIBBPF +/* The following functions are wrapper functions for libbpf code to be + * compatible with the legacy format. So all the functions have prefix + * with iproute2_ + */ +int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg) +{ + struct bpf_elf_ctx *ctx = &__ctx; + + return bpf_elf_ctx_init(ctx, cfg->object, cfg->type, cfg->ifindex, cfg->verbose); +} + +int iproute2_bpf_fetch_ancillary(void) +{ + struct bpf_elf_ctx *ctx = &__ctx; + struct bpf_elf_sec_data data; + int i, ret = 0; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0) + continue; + + if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ELF_SECTION_MAPS)) + ret = bpf_fetch_maps_begin(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_SYMTAB && + !strcmp(data.sec_name, ".symtab")) + ret = bpf_fetch_symtab(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_STRTAB && + !strcmp(data.sec_name, ".strtab")) + ret = bpf_fetch_strtab(ctx, i, &data); + if (ret < 0) { + fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", + i); + return ret; + } + } + + if (bpf_has_map_data(ctx)) { + ret = bpf_fetch_maps_end(ctx); + if (ret < 0) { + fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n"); + return ret; + } + } + + return ret; +} + +int iproute2_get_root_path(char *root_path, size_t len) +{ + struct bpf_elf_ctx *ctx = &__ctx; + int ret = 0; + + snprintf(root_path, len, "%s/%s", + bpf_get_work_dir(ctx->type), BPF_DIR_GLOBALS); + + ret = mkdir(root_path, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", root_path, strerror(errno)); + return ret; + } + + return 0; +} + +bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname) +{ + struct bpf_elf_ctx *ctx = &__ctx; + const char *map_name, *tmp; + unsigned int pinning; + int i, ret = 0; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].pinning == PIN_OBJECT_NS && + ctx->noafalg) { + fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n"); + return false; + } + + map_name = bpf_map_fetch_name(ctx, i); + if (!map_name) { + return false; + } + + if (strcmp(libbpf_map_name, map_name)) + continue; + + pinning = ctx->maps[i].pinning; + + if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type)) + return false; + + if (pinning == PIN_OBJECT_NS) + ret = bpf_make_obj_path(ctx); + else if ((tmp = bpf_custom_pinning(ctx, pinning))) + ret = bpf_make_custom_path(ctx, tmp); + if (ret < 0) + return false; + + bpf_make_pathname(pathname, PATH_MAX, map_name, ctx, pinning); + + return true; + } + + return false; +} + +bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap, + struct bpf_elf_map *omap, char *omap_name) +{ + struct bpf_elf_ctx *ctx = &__ctx; + const char *inner_map_name, *outer_map_name; + int i, j; + + for (i = 0; i < ctx->map_num; i++) { + inner_map_name = bpf_map_fetch_name(ctx, i); + if (!inner_map_name) { + return false; + } + + if (strcmp(libbpf_map_name, inner_map_name)) + continue; + + if (!ctx->maps[i].id || + ctx->maps[i].inner_id || + ctx->maps[i].inner_idx == -1) + continue; + + *imap = ctx->maps[i]; + + for (j = 0; j < ctx->map_num; j++) { + if (!bpf_is_map_in_map_type(&ctx->maps[j])) + continue; + if (ctx->maps[j].inner_id != ctx->maps[i].id) + continue; + + *omap = ctx->maps[j]; + outer_map_name = bpf_map_fetch_name(ctx, j); + memcpy(omap_name, outer_map_name, strlen(outer_map_name) + 1); + + return true; + } + } + + return false; +} + +int iproute2_find_map_name_by_id(unsigned int map_id, char *name) +{ + struct bpf_elf_ctx *ctx = &__ctx; + const char *map_name; + int i, idx = -1; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].id == map_id && + ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) { + idx = i; + break; + } + } + + if (idx < 0) + return -1; + + map_name = bpf_map_fetch_name(ctx, idx); + if (!map_name) + return -1; + + memcpy(name, map_name, strlen(map_name) + 1); + return 0; +} +#endif /* HAVE_LIBBPF */ #endif /* HAVE_ELF */ diff --git a/lib/bpf_libbpf.c b/lib/bpf_libbpf.c new file mode 100644 index 00000000..d05737a4 --- /dev/null +++ b/lib/bpf_libbpf.c @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * bpf_libbpf.c BPF code relay on libbpf + * Authors: Hangbin Liu + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "bpf_util.h" + +static int verbose_print(enum libbpf_print_level level, const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} + +static int silent_print(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level > LIBBPF_WARN) + return 0; + + /* Skip warning from bpf_object__init_user_maps() for legacy maps */ + if (strstr(format, "has unrecognized, non-zero options")) + return 0; + + return vfprintf(stderr, format, args); +} + +static const char *get_bpf_program__section_name(const struct bpf_program *prog) +{ +#ifdef HAVE_LIBBPF_SECTION_NAME + return bpf_program__section_name(prog); +#else + return bpf_program__title(prog, false); +#endif +} + +static int create_map(const char *name, struct bpf_elf_map *map, + __u32 ifindex, int inner_fd) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map->type; + map_attr.map_flags = map->flags; + map_attr.key_size = map->size_key; + map_attr.value_size = map->size_value; + map_attr.max_entries = map->max_elem; + map_attr.map_ifindex = ifindex; + map_attr.inner_map_fd = inner_fd; + + return bpf_create_map_xattr(&map_attr); +} + +static int create_map_in_map(struct bpf_object *obj, struct bpf_map *map, + struct bpf_elf_map *elf_map, int inner_fd, + bool *reuse_pin_map) +{ + char pathname[PATH_MAX]; + const char *map_name; + bool pin_map = false; + int map_fd, ret = 0; + + map_name = bpf_map__name(map); + + if (iproute2_is_pin_map(map_name, pathname)) { + pin_map = true; + + /* Check if there already has a pinned map */ + map_fd = bpf_obj_get(pathname); + if (map_fd > 0) { + if (reuse_pin_map) + *reuse_pin_map = true; + close(map_fd); + return bpf_map__set_pin_path(map, pathname); + } + } + + map_fd = create_map(map_name, elf_map, bpf_map__ifindex(map), inner_fd); + if (map_fd < 0) { + fprintf(stderr, "create map %s failed\n", map_name); + return map_fd; + } + + ret = bpf_map__reuse_fd(map, map_fd); + if (ret < 0) { + fprintf(stderr, "map %s reuse fd failed\n", map_name); + goto err_out; + } + + if (pin_map) { + ret = bpf_map__set_pin_path(map, pathname); + if (ret < 0) + goto err_out; + } + + return 0; +err_out: + close(map_fd); + return ret; +} + +static int +handle_legacy_map_in_map(struct bpf_object *obj, struct bpf_map *inner_map, + const char *inner_map_name) +{ + int inner_fd, outer_fd, inner_idx, ret = 0; + struct bpf_elf_map imap, omap; + struct bpf_map *outer_map; + /* What's the size limit of map name? */ + char outer_map_name[128]; + bool reuse_pin_map = false; + + /* Deal with map-in-map */ + if (iproute2_is_map_in_map(inner_map_name, &imap, &omap, outer_map_name)) { + ret = create_map_in_map(obj, inner_map, &imap, -1, NULL); + if (ret < 0) + return ret; + + inner_fd = bpf_map__fd(inner_map); + outer_map = bpf_object__find_map_by_name(obj, outer_map_name); + ret = create_map_in_map(obj, outer_map, &omap, inner_fd, &reuse_pin_map); + if (ret < 0) + return ret; + + if (!reuse_pin_map) { + inner_idx = imap.inner_idx; + outer_fd = bpf_map__fd(outer_map); + ret = bpf_map_update_elem(outer_fd, &inner_idx, &inner_fd, 0); + if (ret < 0) + fprintf(stderr, "Cannot update inner_idx into outer_map\n"); + } + } + + return ret; +} + +static int find_legacy_tail_calls(struct bpf_program *prog, struct bpf_object *obj) +{ + unsigned int map_id, key_id; + const char *sec_name; + struct bpf_map *map; + char map_name[128]; + int ret; + + /* Handle iproute2 tail call */ + sec_name = get_bpf_program__section_name(prog); + ret = sscanf(sec_name, "%i/%i", &map_id, &key_id); + if (ret != 2) + return -1; + + ret = iproute2_find_map_name_by_id(map_id, map_name); + if (ret < 0) { + fprintf(stderr, "unable to find map id %u for tail call\n", map_id); + return ret; + } + + map = bpf_object__find_map_by_name(obj, map_name); + if (!map) + return -1; + + /* Save the map here for later updating */ + bpf_program__set_priv(prog, map, NULL); + + return 0; +} + +static int update_legacy_tail_call_maps(struct bpf_object *obj) +{ + int prog_fd, map_fd, ret = 0; + unsigned int map_id, key_id; + struct bpf_program *prog; + const char *sec_name; + struct bpf_map *map; + + bpf_object__for_each_program(prog, obj) { + map = bpf_program__priv(prog); + if (!map) + continue; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) + continue; + + sec_name = get_bpf_program__section_name(prog); + ret = sscanf(sec_name, "%i/%i", &map_id, &key_id); + if (ret != 2) + continue; + + map_fd = bpf_map__fd(map); + ret = bpf_map_update_elem(map_fd, &key_id, &prog_fd, 0); + if (ret < 0) { + fprintf(stderr, "Cannot update map key for tail call!\n"); + return ret; + } + } + + return 0; +} + +static int handle_legacy_maps(struct bpf_object *obj) +{ + char pathname[PATH_MAX]; + struct bpf_map *map; + const char *map_name; + int map_fd, ret = 0; + + bpf_object__for_each_map(map, obj) { + map_name = bpf_map__name(map); + + ret = handle_legacy_map_in_map(obj, map, map_name); + if (ret) + return ret; + + /* If it is a iproute2 legacy pin maps, just set pin path + * and let bpf_object__load() to deal with the map creation. + * We need to ignore map-in-maps which have pinned maps manually + */ + map_fd = bpf_map__fd(map); + if (map_fd < 0 && iproute2_is_pin_map(map_name, pathname)) { + ret = bpf_map__set_pin_path(map, pathname); + if (ret) { + fprintf(stderr, "map '%s': couldn't set pin path.\n", map_name); + break; + } + } + + } + + return ret; +} + +static int load_bpf_object(struct bpf_cfg_in *cfg) +{ + struct bpf_program *p, *prog = NULL; + struct bpf_object *obj; + char root_path[PATH_MAX]; + struct bpf_map *map; + int prog_fd, ret = 0; + + ret = iproute2_get_root_path(root_path, PATH_MAX); + if (ret) + return ret; + + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, + .relaxed_maps = true, + .pin_root_path = root_path, + ); + + obj = bpf_object__open_file(cfg->object, &open_opts); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return -ENOENT; + } + + bpf_object__for_each_program(p, obj) { + /* Only load the programs that will either be subsequently + * attached or inserted into a tail call map */ + if (find_legacy_tail_calls(p, obj) < 0 && cfg->section && + strcmp(get_bpf_program__section_name(p), cfg->section)) { + ret = bpf_program__set_autoload(p, false); + if (ret) + return -EINVAL; + continue; + } + + bpf_program__set_type(p, cfg->type); + bpf_program__set_ifindex(p, cfg->ifindex); + if (!prog) + prog = p; + } + + bpf_object__for_each_map(map, obj) { + if (!bpf_map__is_offload_neutral(map)) + bpf_map__set_ifindex(map, cfg->ifindex); + } + + if (!prog) { + fprintf(stderr, "object file doesn't contain sec %s\n", cfg->section); + return -ENOENT; + } + + /* Handle iproute2 legacy pin maps and map-in-maps */ + ret = handle_legacy_maps(obj); + if (ret) + goto unload_obj; + + ret = bpf_object__load(obj); + if (ret) + goto unload_obj; + + ret = update_legacy_tail_call_maps(obj); + if (ret) + goto unload_obj; + + prog_fd = fcntl(bpf_program__fd(prog), F_DUPFD_CLOEXEC, 1); + if (prog_fd < 0) + ret = -errno; + else + cfg->prog_fd = prog_fd; + +unload_obj: + /* Close obj as we don't need it */ + bpf_object__close(obj); + return ret; +} + +/* Load ebpf and return prog fd */ +int iproute2_load_libbpf(struct bpf_cfg_in *cfg) +{ + int ret = 0; + + if (cfg->verbose) + libbpf_set_print(verbose_print); + else + libbpf_set_print(silent_print); + + ret = iproute2_bpf_elf_ctx_init(cfg); + if (ret < 0) { + fprintf(stderr, "Cannot initialize ELF context!\n"); + return ret; + } + + ret = iproute2_bpf_fetch_ancillary(); + if (ret < 0) { + fprintf(stderr, "Error fetching ELF ancillary data!\n"); + return ret; + } + + ret = load_bpf_object(cfg); + if (ret) + return ret; + + return cfg->prog_fd; +} diff --git a/tc/tc.c b/tc/tc.c index af9b21da..7557b977 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -30,6 +30,7 @@ #include "tc_common.h" #include "namespace.h" #include "rt_names.h" +#include "bpf_util.h" int show_stats; int show_details; @@ -259,8 +260,9 @@ static int batch(const char *name) int main(int argc, char **argv) { - int ret; + const char *libbpf_version; char *batch_file = NULL; + int ret; while (argc > 1) { if (argv[1][0] != '-') @@ -277,7 +279,11 @@ int main(int argc, char **argv) } else if (matches(argv[1], "-graph") == 0) { show_graph = 1; } else if (matches(argv[1], "-Version") == 0) { - printf("tc utility, iproute2-%s\n", version); + printf("tc utility, iproute2-%s", version); + libbpf_version = get_libbpf_version(); + if (libbpf_version) + printf(", libbpf %s", libbpf_version); + printf("\n"); return 0; } else if (matches(argv[1], "-iec") == 0) { ++use_iec;