diff --git a/etc/iproute2/bpf_pinning b/etc/iproute2/bpf_pinning new file mode 100644 index 00000000..2b39c709 --- /dev/null +++ b/etc/iproute2/bpf_pinning @@ -0,0 +1,6 @@ +# +# subpath mappings from mount point for pinning +# +#3 tracing +#4 foo/bar +#5 tc/cls1 diff --git a/examples/bpf/README b/examples/bpf/README new file mode 100644 index 00000000..42472578 --- /dev/null +++ b/examples/bpf/README @@ -0,0 +1,13 @@ +eBPF toy code examples (running in kernel) to familiarize yourself +with syntax and features: + + - bpf_prog.c -> Classifier examples with using maps + - bpf_shared.c -> Ingress/egress map sharing example + - bpf_tailcall.c -> Using tail call chains + - bpf_cyclic.c -> Simple cycle as tail calls + - bpf_graft.c -> Demo on altering runtime behaviour + +User space code example: + + - bpf_agent.c -> Counterpart to bpf_prog.c for user + space to transfer/read out map data diff --git a/examples/bpf/bpf_cyclic.c b/examples/bpf/bpf_cyclic.c new file mode 100644 index 00000000..c66cbecc --- /dev/null +++ b/examples/bpf/bpf_cyclic.c @@ -0,0 +1,30 @@ +#include "../../include/bpf_api.h" + +/* Cyclic dependency example to test the kernel's runtime upper + * bound on loops. Also demonstrates on how to use direct-actions, + * loaded as: tc filter add [...] bpf da obj [...] + */ +#define JMP_MAP_ID 0xabccba + +BPF_PROG_ARRAY(jmp_tc, JMP_MAP_ID, PIN_OBJECT_NS, 1); + +__section_tail(JMP_MAP_ID, 0) +int cls_loop(struct __sk_buff *skb) +{ + char fmt[] = "cb: %u\n"; + + trace_printk(fmt, sizeof(fmt), skb->cb[0]++); + tail_call(skb, &jmp_tc, 0); + + skb->tc_classid = TC_H_MAKE(1, 42); + return TC_ACT_OK; +} + +__section_cls_entry +int cls_entry(struct __sk_buff *skb) +{ + tail_call(skb, &jmp_tc, 0); + return TC_ACT_SHOT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/bpf_funcs.h b/examples/bpf/bpf_funcs.h deleted file mode 100644 index 1545fa9d..00000000 --- a/examples/bpf/bpf_funcs.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef __BPF_FUNCS__ -#define __BPF_FUNCS__ - -/* Misc macros. */ -#ifndef __maybe_unused -# define __maybe_unused __attribute__ ((__unused__)) -#endif - -#ifndef __section -# define __section(NAME) __attribute__((section(NAME), used)) -#endif - -#ifndef offsetof -# define offsetof __builtin_offsetof -#endif - -#ifndef htons -# define htons(x) __constant_htons((x)) -#endif - -#ifndef likely -# define likely(x) __builtin_expect(!!(x), 1) -#endif - -#ifndef unlikely -# define unlikely(x) __builtin_expect(!!(x), 0) -#endif - -/* The verifier will translate them to actual function calls. */ -static void *(*bpf_map_lookup_elem)(void *map, void *key) __maybe_unused = - (void *) BPF_FUNC_map_lookup_elem; - -static int (*bpf_map_update_elem)(void *map, void *key, void *value, - unsigned long long flags) __maybe_unused = - (void *) BPF_FUNC_map_update_elem; - -static int (*bpf_map_delete_elem)(void *map, void *key) __maybe_unused = - (void *) BPF_FUNC_map_delete_elem; - -static unsigned int (*get_smp_processor_id)(void) __maybe_unused = - (void *) BPF_FUNC_get_smp_processor_id; - -static unsigned int (*get_prandom_u32)(void) __maybe_unused = - (void *) BPF_FUNC_get_prandom_u32; - -/* LLVM built-in functions that an eBPF C program may use to emit - * BPF_LD_ABS and BPF_LD_IND instructions. - */ -unsigned long long load_byte(void *skb, unsigned long long off) - asm ("llvm.bpf.load.byte"); - -unsigned long long load_half(void *skb, unsigned long long off) - asm ("llvm.bpf.load.half"); - -unsigned long long load_word(void *skb, unsigned long long off) - asm ("llvm.bpf.load.word"); - -#endif /* __BPF_FUNCS__ */ diff --git a/examples/bpf/bpf_graft.c b/examples/bpf/bpf_graft.c new file mode 100644 index 00000000..f48fd028 --- /dev/null +++ b/examples/bpf/bpf_graft.c @@ -0,0 +1,67 @@ +#include "../../include/bpf_api.h" + +/* This example demonstrates how classifier run-time behaviour + * can be altered with tail calls. We start out with an empty + * jmp_tc array, then add section aaa to the array slot 0, and + * later on atomically replace it with section bbb. Note that + * as shown in other examples, the tc loader can prepopulate + * tail called sections, here we start out with an empty one + * on purpose to show it can also be done this way. + * + * tc filter add dev foo parent ffff: bpf obj graft.o + * tc exec bpf dbg + * [...] + * Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough + * -0 [001] ..s. 138993.202265: : fallthrough + * Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough + * [...] + * + * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa + * tc exec bpf dbg + * [...] + * Socket Thread-19818 [002] ..s. 139012.053587: : aaa + * -0 [002] ..s. 139012.172359: : aaa + * Socket Thread-19818 [001] ..s. 139012.173556: : aaa + * [...] + * + * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb + * tc exec bpf dbg + * [...] + * Socket Thread-19818 [002] ..s. 139022.102967: : bbb + * -0 [002] ..s. 139022.155640: : bbb + * Socket Thread-19818 [001] ..s. 139022.156730: : bbb + * [...] + */ + +BPF_PROG_ARRAY(jmp_tc, 0, PIN_GLOBAL_NS, 1); + +__section("aaa") +int cls_aaa(struct __sk_buff *skb) +{ + char fmt[] = "aaa\n"; + + trace_printk(fmt, sizeof(fmt)); + return TC_H_MAKE(1, 42); +} + +__section("bbb") +int cls_bbb(struct __sk_buff *skb) +{ + char fmt[] = "bbb\n"; + + trace_printk(fmt, sizeof(fmt)); + return TC_H_MAKE(1, 43); +} + +__section_cls_entry +int cls_entry(struct __sk_buff *skb) +{ + char fmt[] = "fallthrough\n"; + + tail_call(skb, &jmp_tc, 0); + trace_printk(fmt, sizeof(fmt)); + + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/bpf_prog.c b/examples/bpf/bpf_prog.c index 009febd0..47280492 100644 --- a/examples/bpf/bpf_prog.c +++ b/examples/bpf/bpf_prog.c @@ -168,8 +168,8 @@ /* Common, shared definitions with ebpf_agent.c. */ #include "bpf_shared.h" -/* Selection of BPF helper functions for our example. */ -#include "bpf_funcs.h" +/* BPF helper functions for our example. */ +#include "../../include/bpf_api.h" /* Could be defined here as well, or included from the header. */ #define TC_ACT_UNSPEC (-1) @@ -387,10 +387,10 @@ static inline void cls_update_proto_map(const struct __sk_buff *skb, uint8_t proto = flow->ip_proto; struct count_tuple *ct, _ct; - ct = bpf_map_lookup_elem(&map_proto, &proto); + ct = map_lookup_elem(&map_proto, &proto); if (likely(ct)) { - __sync_fetch_and_add(&ct->packets, 1); - __sync_fetch_and_add(&ct->bytes, skb->len); + lock_xadd(&ct->packets, 1); + lock_xadd(&ct->bytes, skb->len); return; } @@ -398,7 +398,7 @@ static inline void cls_update_proto_map(const struct __sk_buff *skb, _ct.packets = 1; _ct.bytes = skb->len; - bpf_map_update_elem(&map_proto, &proto, &_ct, BPF_ANY); + map_update_elem(&map_proto, &proto, &_ct, BPF_ANY); } static inline void cls_update_queue_map(const struct __sk_buff *skb) @@ -409,11 +409,11 @@ static inline void cls_update_queue_map(const struct __sk_buff *skb) mismatch = skb->queue_mapping != get_smp_processor_id(); - cq = bpf_map_lookup_elem(&map_queue, &queue); + cq = map_lookup_elem(&map_queue, &queue); if (likely(cq)) { - __sync_fetch_and_add(&cq->total, 1); + lock_xadd(&cq->total, 1); if (mismatch) - __sync_fetch_and_add(&cq->mismatch, 1); + lock_xadd(&cq->mismatch, 1); return; } @@ -421,7 +421,7 @@ static inline void cls_update_queue_map(const struct __sk_buff *skb) _cq.total = 1; _cq.mismatch = mismatch ? 1 : 0; - bpf_map_update_elem(&map_queue, &queue, &_cq, BPF_ANY); + map_update_elem(&map_queue, &queue, &_cq, BPF_ANY); } /* eBPF program definitions, placed in various sections, which can @@ -439,7 +439,8 @@ static inline void cls_update_queue_map(const struct __sk_buff *skb) * It is however not required to have multiple programs sharing * a file. */ -__section("classifier") int cls_main(struct __sk_buff *skb) +__section("classifier") +int cls_main(struct __sk_buff *skb) { struct flow_keys flow; @@ -456,13 +457,14 @@ static inline void act_update_drop_map(void) { uint32_t *count, cpu = get_smp_processor_id(); - count = bpf_map_lookup_elem(&map_drops, &cpu); + count = map_lookup_elem(&map_drops, &cpu); if (count) /* Only this cpu is accessing this element. */ (*count)++; } -__section("action-mark") int act_mark_main(struct __sk_buff *skb) +__section("action-mark") +int act_mark_main(struct __sk_buff *skb) { /* You could also mangle skb data here with the helper function * BPF_FUNC_skb_store_bytes, etc. Or, alternatively you could @@ -479,7 +481,8 @@ __section("action-mark") int act_mark_main(struct __sk_buff *skb) return TC_ACT_UNSPEC; } -__section("action-rand") int act_rand_main(struct __sk_buff *skb) +__section("action-rand") +int act_rand_main(struct __sk_buff *skb) { /* Sorry, we're near event horizon ... */ if ((get_prandom_u32() & 3) == 0) { @@ -493,4 +496,4 @@ __section("action-rand") int act_rand_main(struct __sk_buff *skb) /* Last but not least, the file contains a license. Some future helper * functions may only be available with a GPL license. */ -char __license[] __section("license") = "GPL"; +BPF_LICENSE("GPL"); diff --git a/examples/bpf/bpf_shared.c b/examples/bpf/bpf_shared.c new file mode 100644 index 00000000..accc0adf --- /dev/null +++ b/examples/bpf/bpf_shared.c @@ -0,0 +1,48 @@ +#include "../../include/bpf_api.h" + +/* Minimal, stand-alone toy map pinning example: + * + * clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c + * tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress + * tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress + * + * Both classifier will share the very same map instance in this example, + * so map content can be accessed from ingress *and* egress side! + * + * This example has a pinning of PIN_OBJECT_NS, so it's private and + * thus shared among various program sections within the object. + * + * A setting of PIN_GLOBAL_NS would place it into a global namespace, + * so that it can be shared among different object files. A setting + * of PIN_NONE (= 0) means no sharing, so each tc invocation a new map + * instance is being created. + */ + +BPF_ARRAY4(map_sh, 0, PIN_OBJECT_NS, 1); /* or PIN_GLOBAL_NS, or PIN_NONE */ + +__section("egress") +int emain(struct __sk_buff *skb) +{ + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + lock_xadd(val, 1); + + return BPF_H_DEFAULT; +} + +__section("ingress") +int imain(struct __sk_buff *skb) +{ + char fmt[] = "map val: %d\n"; + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + trace_printk(fmt, sizeof(fmt), *val); + + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/examples/bpf/bpf_shared.h b/examples/bpf/bpf_shared.h index 46423eca..a24038dd 100644 --- a/examples/bpf/bpf_shared.h +++ b/examples/bpf/bpf_shared.h @@ -1,10 +1,6 @@ #ifndef __BPF_SHARED__ #define __BPF_SHARED__ -#include - -#include "../../include/bpf_elf.h" - enum { BPF_MAP_ID_PROTO, BPF_MAP_ID_QUEUE, @@ -14,7 +10,7 @@ enum { }; struct count_tuple { - long packets; /* type long for __sync_fetch_and_add() */ + long packets; /* type long for lock_xadd() */ long bytes; }; diff --git a/examples/bpf/bpf_tailcall.c b/examples/bpf/bpf_tailcall.c new file mode 100644 index 00000000..040790d0 --- /dev/null +++ b/examples/bpf/bpf_tailcall.c @@ -0,0 +1,99 @@ +#include "../../include/bpf_api.h" + +#define ENTRY_INIT 3 +#define ENTRY_0 0 +#define ENTRY_1 1 +#define MAX_JMP_SIZE 2 + +#define FOO 42 +#define BAR 43 + +/* This example doesn't really do anything useful, but it's purpose is to + * demonstrate eBPF tail calls on a very simple example. + * + * cls_entry() is our classifier entry point, from there we jump based on + * skb->hash into cls_case1() or cls_case2(). They are both part of the + * program array jmp_tc. Indicated via __section_tail(), the tc loader + * populates the program arrays with the loaded file descriptors already. + * + * To demonstrate nested jumps, cls_case2() jumps within the same jmp_tc + * array to cls_case1(). And whenever we arrive at cls_case1(), we jump + * into cls_exit(), part of the jump array jmp_ex. + * + * Also, to show it's possible, all programs share map_sh and dump the value + * that the entry point incremented. The sections that are loaded into a + * program array can be atomically replaced during run-time, e.g. to change + * classifier behaviour. + */ + +BPF_PROG_ARRAY(jmp_tc, FOO, PIN_OBJECT_NS, MAX_JMP_SIZE); +BPF_PROG_ARRAY(jmp_ex, BAR, PIN_OBJECT_NS, 1); + +BPF_ARRAY4(map_sh, 0, PIN_OBJECT_NS, 1); + +__section_tail(FOO, ENTRY_0) +int cls_case1(struct __sk_buff *skb) +{ + char fmt[] = "case1: map-val: %d from:%u\n"; + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + trace_printk(fmt, sizeof(fmt), *val, skb->cb[0]); + + skb->cb[0] = ENTRY_0; + tail_call(skb, &jmp_ex, ENTRY_0); + + return BPF_H_DEFAULT; +} + +__section_tail(FOO, ENTRY_1) +int cls_case2(struct __sk_buff *skb) +{ + char fmt[] = "case2: map-val: %d from:%u\n"; + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + trace_printk(fmt, sizeof(fmt), *val, skb->cb[0]); + + skb->cb[0] = ENTRY_1; + tail_call(skb, &jmp_tc, ENTRY_0); + + return BPF_H_DEFAULT; +} + +__section_tail(BAR, ENTRY_0) +int cls_exit(struct __sk_buff *skb) +{ + char fmt[] = "exit: map-val: %d from:%u\n"; + int key = 0, *val; + + val = map_lookup_elem(&map_sh, &key); + if (val) + trace_printk(fmt, sizeof(fmt), *val, skb->cb[0]); + + /* Termination point. */ + return BPF_H_DEFAULT; +} + +__section_cls_entry +int cls_entry(struct __sk_buff *skb) +{ + char fmt[] = "fallthrough\n"; + int key = 0, *val; + + /* For transferring state, we can use skb->cb[0] ... skb->cb[4]. */ + val = map_lookup_elem(&map_sh, &key); + if (val) { + lock_xadd(val, 1); + + skb->cb[0] = ENTRY_INIT; + tail_call(skb, &jmp_tc, skb->hash & (MAX_JMP_SIZE - 1)); + } + + trace_printk(fmt, sizeof(fmt)); + return BPF_H_DEFAULT; +} + +BPF_LICENSE("GPL"); diff --git a/genl/genl.c b/genl/genl.c index 49b65960..e33fafdf 100644 --- a/genl/genl.c +++ b/genl/genl.c @@ -54,7 +54,7 @@ static int parse_nofopt(struct genl_util *f, int argc, char **argv) return 0; } -static struct genl_util *get_genl_kind(char *str) +static struct genl_util *get_genl_kind(const char *str) { void *dlh; char buf[256]; diff --git a/include/bpf_api.h b/include/bpf_api.h new file mode 100644 index 00000000..0666a312 --- /dev/null +++ b/include/bpf_api.h @@ -0,0 +1,225 @@ +#ifndef __BPF_API__ +#define __BPF_API__ + +/* Note: + * + * This file can be included into eBPF kernel programs. It contains + * a couple of useful helper functions, map/section ABI (bpf_elf.h), + * misc macros and some eBPF specific LLVM built-ins. + */ + +#include + +#include +#include +#include + +#include + +#include "bpf_elf.h" + +/** Misc macros. */ + +#ifndef __stringify +# define __stringify(X) #X +#endif + +#ifndef __maybe_unused +# define __maybe_unused __attribute__((__unused__)) +#endif + +#ifndef offsetof +# define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) +#endif + +#ifndef likely +# define likely(X) __builtin_expect(!!(X), 1) +#endif + +#ifndef unlikely +# define unlikely(X) __builtin_expect(!!(X), 0) +#endif + +#ifndef htons +# define htons(X) __constant_htons((X)) +#endif + +#ifndef ntohs +# define ntohs(X) __constant_ntohs((X)) +#endif + +#ifndef htonl +# define htonl(X) __constant_htonl((X)) +#endif + +#ifndef ntohl +# define ntohl(X) __constant_ntohl((X)) +#endif + +/** Section helper macros. */ + +#ifndef __section +# define __section(NAME) \ + __attribute__((section(NAME), used)) +#endif + +#ifndef __section_tail +# define __section_tail(ID, KEY) \ + __section(__stringify(ID) "/" __stringify(KEY)) +#endif + +#ifndef __section_cls_entry +# define __section_cls_entry \ + __section(ELF_SECTION_CLASSIFIER) +#endif + +#ifndef __section_act_entry +# define __section_act_entry \ + __section(ELF_SECTION_ACTION) +#endif + +#ifndef __section_license +# define __section_license \ + __section(ELF_SECTION_LICENSE) +#endif + +#ifndef __section_maps +# define __section_maps \ + __section(ELF_SECTION_MAPS) +#endif + +/** Declaration helper macros. */ + +#ifndef BPF_LICENSE +# define BPF_LICENSE(NAME) \ + char ____license[] __section_license = NAME +#endif + +#ifndef __BPF_MAP +# define __BPF_MAP(NAME, TYPE, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \ + struct bpf_elf_map __section_maps NAME = { \ + .type = (TYPE), \ + .id = (ID), \ + .size_key = (SIZE_KEY), \ + .size_value = (SIZE_VALUE), \ + .pinning = (PIN), \ + .max_elem = (MAX_ELEM), \ + } +#endif + +#ifndef BPF_HASH +# define BPF_HASH(NAME, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \ + __BPF_MAP(NAME, BPF_MAP_TYPE_HASH, ID, SIZE_KEY, SIZE_VALUE, \ + PIN, MAX_ELEM) +#endif + +#ifndef BPF_ARRAY +# define BPF_ARRAY(NAME, ID, SIZE_VALUE, PIN, MAX_ELEM) \ + __BPF_MAP(NAME, BPF_MAP_TYPE_ARRAY, ID, sizeof(uint32_t), \ + SIZE_VALUE, PIN, MAX_ELEM) +#endif + +#ifndef BPF_ARRAY2 +# define BPF_ARRAY2(NAME, ID, PIN, MAX_ELEM) \ + BPF_ARRAY(NAME, ID, sizeof(uint16_t), PIN, MAX_ELEM) +#endif + +#ifndef BPF_ARRAY4 +# define BPF_ARRAY4(NAME, ID, PIN, MAX_ELEM) \ + BPF_ARRAY(NAME, ID, sizeof(uint32_t), PIN, MAX_ELEM) +#endif + +#ifndef BPF_ARRAY8 +# define BPF_ARRAY8(NAME, ID, PIN, MAX_ELEM) \ + BPF_ARRAY(NAME, ID, sizeof(uint64_t), PIN, MAX_ELEM) +#endif + +#ifndef BPF_PROG_ARRAY +# define BPF_PROG_ARRAY(NAME, ID, PIN, MAX_ELEM) \ + __BPF_MAP(NAME, BPF_MAP_TYPE_PROG_ARRAY, ID, sizeof(uint32_t), \ + sizeof(uint32_t), PIN, MAX_ELEM) +#endif + +/** Classifier helper */ + +#ifndef BPF_H_DEFAULT +# define BPF_H_DEFAULT -1 +#endif + +/** BPF helper functions for tc. */ + +#ifndef BPF_FUNC +# define BPF_FUNC(NAME, ...) \ + (* NAME)(__VA_ARGS__) __maybe_unused = (void *) BPF_FUNC_##NAME +#endif + +/* Map access/manipulation */ +static void *BPF_FUNC(map_lookup_elem, void *map, const void *key); +static int BPF_FUNC(map_update_elem, void *map, const void *key, + const void *value, uint32_t flags); +static int BPF_FUNC(map_delete_elem, void *map, const void *key); + +/* Time access */ +static uint64_t BPF_FUNC(ktime_get_ns); + +/* Debugging */ +static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...); + +/* Random numbers */ +static uint32_t BPF_FUNC(get_prandom_u32); + +/* Tail calls */ +static void BPF_FUNC(tail_call, struct __sk_buff *skb, void *map, + uint32_t index); + +/* System helpers */ +static uint32_t BPF_FUNC(get_smp_processor_id); + +/* Packet misc meta data */ +static uint32_t BPF_FUNC(get_cgroup_classid, struct __sk_buff *skb); +static uint32_t BPF_FUNC(get_route_realm, struct __sk_buff *skb); + +/* Packet redirection */ +static int BPF_FUNC(redirect, int ifindex, uint32_t flags); +static int BPF_FUNC(clone_redirect, struct __sk_buff *skb, int ifindex, + uint32_t flags); + +/* Packet manipulation */ +#define BPF_PSEUDO_HDR 0x10 +#define BPF_HAS_PSEUDO_HDR(flags) ((flags) & BPF_PSEUDO_HDR) +#define BPF_HDR_FIELD_SIZE(flags) ((flags) & 0x0f) + +static int BPF_FUNC(skb_store_bytes, struct __sk_buff *skb, uint32_t off, + void *from, uint32_t len, uint32_t flags); +static int BPF_FUNC(l3_csum_replace, struct __sk_buff *skb, uint32_t off, + uint32_t from, uint32_t to, uint32_t flags); +static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off, + uint32_t from, uint32_t to, uint32_t flags); + +/* Packet vlan encap/decap */ +static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto, + uint16_t vlan_tci); +static int BPF_FUNC(skb_vlan_pop, struct __sk_buff *skb); + +/* Packet tunnel encap/decap */ +static int BPF_FUNC(skb_get_tunnel_key, struct __sk_buff *skb, + struct bpf_tunnel_key *to, uint32_t size, uint32_t flags); +static int BPF_FUNC(skb_set_tunnel_key, struct __sk_buff *skb, + struct bpf_tunnel_key *from, uint32_t size, uint32_t flags); + +/** LLVM built-ins */ + +#ifndef lock_xadd +# define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val)) +#endif + +unsigned long long load_byte(void *skb, unsigned long long off) + asm ("llvm.bpf.load.byte"); + +unsigned long long load_half(void *skb, unsigned long long off) + asm ("llvm.bpf.load.half"); + +unsigned long long load_word(void *skb, unsigned long long off) + asm ("llvm.bpf.load.word"); + +#endif /* __BPF_API__ */ diff --git a/include/bpf_elf.h b/include/bpf_elf.h index 4bd6bb00..31a89743 100644 --- a/include/bpf_elf.h +++ b/include/bpf_elf.h @@ -21,6 +21,11 @@ #define ELF_MAX_MAPS 64 #define ELF_MAX_LICENSE_LEN 128 +/* Object pinning settings */ +#define PIN_NONE 0 +#define PIN_OBJECT_NS 1 +#define PIN_GLOBAL_NS 2 + /* ELF map definition */ struct bpf_elf_map { __u32 type; @@ -28,6 +33,7 @@ struct bpf_elf_map { __u32 size_value; __u32 max_elem; __u32 id; + __u32 pinning; }; #endif /* __BPF_ELF__ */ diff --git a/include/ip6tables.h b/include/ip6tables.h index 1050593a..5f1c5b65 100644 --- a/include/ip6tables.h +++ b/include/ip6tables.h @@ -1,141 +1,20 @@ #ifndef _IP6TABLES_USER_H #define _IP6TABLES_USER_H -#include "iptables_common.h" -#include "libiptc/libip6tc.h" - -struct ip6tables_rule_match -{ - struct ip6tables_rule_match *next; - - struct ip6tables_match *match; -}; - -/* Include file for additions: new matches and targets. */ -struct ip6tables_match -{ - struct ip6tables_match *next; - - ip6t_chainlabel name; - - const char *version; - - /* Size of match data. */ - size_t size; - - /* Size of match data relevent for userspace comparison purposes */ - size_t userspacesize; - - /* Function which prints out usage message. */ - void (*help)(void); - - /* Initialize the match. */ - void (*init)(struct ip6t_entry_match *m, unsigned int *nfcache); - - /* Function which parses command options; returns true if it - ate an option */ - int (*parse)(int c, char **argv, int invert, unsigned int *flags, - const struct ip6t_entry *entry, - unsigned int *nfcache, - struct ip6t_entry_match **match); - - /* Final check; exit if not ok. */ - void (*final_check)(unsigned int flags); - - /* Prints out the match iff non-NULL: put space at end */ - void (*print)(const struct ip6t_ip6 *ip, - const struct ip6t_entry_match *match, int numeric); - - /* Saves the union ipt_matchinfo in parsable form to stdout. */ - void (*save)(const struct ip6t_ip6 *ip, - const struct ip6t_entry_match *match); - - /* Pointer to list of extra command-line options */ - const struct option *extra_opts; - - /* Ignore these men behind the curtain: */ - unsigned int option_offset; - struct ip6t_entry_match *m; - unsigned int mflags; -#ifdef NO_SHARED_LIBS - unsigned int loaded; /* simulate loading so options are merged properly */ -#endif -}; - -struct ip6tables_target -{ - struct ip6tables_target *next; - - ip6t_chainlabel name; - - const char *version; - - /* Size of target data. */ - size_t size; - - /* Size of target data relevent for userspace comparison purposes */ - size_t userspacesize; - - /* Function which prints out usage message. */ - void (*help)(void); - - /* Initialize the target. */ - void (*init)(struct ip6t_entry_target *t, unsigned int *nfcache); - - /* Function which parses command options; returns true if it - ate an option */ - int (*parse)(int c, char **argv, int invert, unsigned int *flags, - const struct ip6t_entry *entry, - struct ip6t_entry_target **target); - - /* Final check; exit if not ok. */ - void (*final_check)(unsigned int flags); - - /* Prints out the target iff non-NULL: put space at end */ - void (*print)(const struct ip6t_ip6 *ip, - const struct ip6t_entry_target *target, int numeric); - - /* Saves the targinfo in parsable form to stdout. */ - void (*save)(const struct ip6t_ip6 *ip, - const struct ip6t_entry_target *target); - - /* Pointer to list of extra command-line options */ - struct option *extra_opts; - - /* Ignore these men behind the curtain: */ - unsigned int option_offset; - struct ip6t_entry_target *t; - unsigned int tflags; - unsigned int used; -#ifdef NO_SHARED_LIBS - unsigned int loaded; /* simulate loading so options are merged properly */ -#endif -}; - -extern int line; +#include +#include +#include +#include /* Your shared library should call one of these. */ -extern void register_match6(struct ip6tables_match *me); -extern void register_target6(struct ip6tables_target *me); - extern int do_command6(int argc, char *argv[], char **table, - ip6tc_handle_t *handle); -/* Keeping track of external matches and targets: linked lists. */ -extern struct ip6tables_match *ip6tables_matches; -extern struct ip6tables_target *ip6tables_targets; + struct xtc_handle **handle, bool restore); -enum ip6t_tryload { - DONT_LOAD, - TRY_LOAD, - LOAD_MUST_SUCCEED -}; +extern int for_each_chain6(int (*fn)(const xt_chainlabel, int, struct xtc_handle *), int verbose, int builtinstoo, struct xtc_handle *handle); +extern int flush_entries6(const xt_chainlabel chain, int verbose, struct xtc_handle *handle); +extern int delete_chain6(const xt_chainlabel chain, int verbose, struct xtc_handle *handle); +void print_rule6(const struct ip6t_entry *e, struct xtc_handle *h, const char *chain, int counters); -extern struct ip6tables_target *find_target(const char *name, enum ip6t_tryload); -extern struct ip6tables_match *find_match(const char *name, enum ip6t_tryload, struct ip6tables_rule_match **match); - -extern int for_each_chain(int (*fn)(const ip6t_chainlabel, int, ip6tc_handle_t *), int verbose, int builtinstoo, ip6tc_handle_t *handle); -extern int flush_entries(const ip6t_chainlabel chain, int verbose, ip6tc_handle_t *handle); -extern int delete_chain(const ip6t_chainlabel chain, int verbose, ip6tc_handle_t *handle); -extern int ip6tables_insmod(const char *modname, const char *modprobe); +extern struct xtables_globals ip6tables_globals; #endif /*_IP6TABLES_USER_H*/ diff --git a/include/iptables.h b/include/iptables.h index f1e62e23..78c10abd 100644 --- a/include/iptables.h +++ b/include/iptables.h @@ -1,179 +1,25 @@ #ifndef _IPTABLES_USER_H #define _IPTABLES_USER_H -#include "iptables_common.h" -#include "libiptc/libiptc.h" - -#ifndef IPT_LIB_DIR -#define IPT_LIB_DIR "/usr/local/lib/iptables" -#endif - -#ifndef IPPROTO_SCTP -#define IPPROTO_SCTP 132 -#endif - -#ifndef IPT_SO_GET_REVISION_MATCH /* Old kernel source. */ -#define IPT_SO_GET_REVISION_MATCH (IPT_BASE_CTL + 2) -#define IPT_SO_GET_REVISION_TARGET (IPT_BASE_CTL + 3) - -struct ipt_get_revision -{ - char name[IPT_FUNCTION_MAXNAMELEN-1]; - - u_int8_t revision; -}; -#endif /* IPT_SO_GET_REVISION_MATCH Old kernel source */ - -struct iptables_rule_match -{ - struct iptables_rule_match *next; - - struct iptables_match *match; -}; - -/* Include file for additions: new matches and targets. */ -struct iptables_match -{ - struct iptables_match *next; - - ipt_chainlabel name; - - /* Revision of match (0 by default). */ - u_int8_t revision; - - const char *version; - - /* Size of match data. */ - size_t size; - - /* Size of match data relevent for userspace comparison purposes */ - size_t userspacesize; - - /* Function which prints out usage message. */ - void (*help)(void); - - /* Initialize the match. */ - void (*init)(struct ipt_entry_match *m, unsigned int *nfcache); - - /* Function which parses command options; returns true if it - ate an option */ - int (*parse)(int c, char **argv, int invert, unsigned int *flags, - const struct ipt_entry *entry, - unsigned int *nfcache, - struct ipt_entry_match **match); - - /* Final check; exit if not ok. */ - void (*final_check)(unsigned int flags); - - /* Prints out the match iff non-NULL: put space at end */ - void (*print)(const struct ipt_ip *ip, - const struct ipt_entry_match *match, int numeric); - - /* Saves the match info in parsable form to stdout. */ - void (*save)(const struct ipt_ip *ip, - const struct ipt_entry_match *match); - - /* Pointer to list of extra command-line options */ - const struct option *extra_opts; - - /* Ignore these men behind the curtain: */ - unsigned int option_offset; - struct ipt_entry_match *m; - unsigned int mflags; -#ifdef NO_SHARED_LIBS - unsigned int loaded; /* simulate loading so options are merged properly */ -#endif -}; - -struct iptables_target -{ - struct iptables_target *next; - - ipt_chainlabel name; - - /* Revision of target (0 by default). */ - u_int8_t revision; - - const char *version; - - /* Size of target data. */ - size_t size; - - /* Size of target data relevent for userspace comparison purposes */ - size_t userspacesize; - - /* Function which prints out usage message. */ - void (*help)(void); - - /* Initialize the target. */ - void (*init)(struct ipt_entry_target *t, unsigned int *nfcache); - - /* Function which parses command options; returns true if it - ate an option */ - int (*parse)(int c, char **argv, int invert, unsigned int *flags, - const struct ipt_entry *entry, - struct ipt_entry_target **target); - - /* Final check; exit if not ok. */ - void (*final_check)(unsigned int flags); - - /* Prints out the target iff non-NULL: put space at end */ - void (*print)(const struct ipt_ip *ip, - const struct ipt_entry_target *target, int numeric); - - /* Saves the targinfo in parsable form to stdout. */ - void (*save)(const struct ipt_ip *ip, - const struct ipt_entry_target *target); - - /* Pointer to list of extra command-line options */ - struct option *extra_opts; - - /* Ignore these men behind the curtain: */ - unsigned int option_offset; - struct ipt_entry_target *t; - unsigned int tflags; - unsigned int used; -#ifdef NO_SHARED_LIBS - unsigned int loaded; /* simulate loading so options are merged properly */ -#endif -}; - -extern int line; +#include +#include +#include +#include /* Your shared library should call one of these. */ -extern void register_match(struct iptables_match *me); -extern void register_target(struct iptables_target *me); -extern void xtables_register_target(struct iptables_target *me); -extern int build_st(struct iptables_target *target, struct ipt_entry_target *t); +extern int do_command4(int argc, char *argv[], char **table, + struct xtc_handle **handle, bool restore); +extern int delete_chain4(const xt_chainlabel chain, int verbose, + struct xtc_handle *handle); +extern int flush_entries4(const xt_chainlabel chain, int verbose, + struct xtc_handle *handle); +extern int for_each_chain4(int (*fn)(const xt_chainlabel, int, struct xtc_handle *), + int verbose, int builtinstoo, struct xtc_handle *handle); +extern void print_rule4(const struct ipt_entry *e, + struct xtc_handle *handle, const char *chain, int counters); -extern struct in_addr *dotted_to_addr(const char *dotted); -extern char *addr_to_dotted(const struct in_addr *addrp); -extern char *addr_to_anyname(const struct in_addr *addr); -extern char *mask_to_dotted(const struct in_addr *mask); +extern struct xtables_globals iptables_globals; -extern void parse_hostnetworkmask(const char *name, struct in_addr **addrpp, - struct in_addr *maskp, unsigned int *naddrs); -extern u_int16_t parse_protocol(const char *s); +extern struct xtables_globals xtables_globals; -extern int do_command(int argc, char *argv[], char **table, - iptc_handle_t *handle); -/* Keeping track of external matches and targets: linked lists. */ -extern struct iptables_match *iptables_matches; -extern struct iptables_target *iptables_targets; - -enum ipt_tryload { - DONT_LOAD, - TRY_LOAD, - LOAD_MUST_SUCCEED -}; - -extern struct iptables_target *find_target(const char *name, enum ipt_tryload); -extern struct iptables_match *find_match(const char *name, enum ipt_tryload, struct iptables_rule_match **match); - -extern int delete_chain(const ipt_chainlabel chain, int verbose, - iptc_handle_t *handle); -extern int flush_entries(const ipt_chainlabel chain, int verbose, - iptc_handle_t *handle); -extern int for_each_chain(int (*fn)(const ipt_chainlabel, int, iptc_handle_t *), - int verbose, int builtinstoo, iptc_handle_t *handle); #endif /*_IPTABLES_USER_H*/ diff --git a/include/iptables/internal.h b/include/iptables/internal.h new file mode 100644 index 00000000..62a8ecb9 --- /dev/null +++ b/include/iptables/internal.h @@ -0,0 +1,13 @@ +#ifndef IPTABLES_INTERNAL_H +#define IPTABLES_INTERNAL_H 1 + +#define IPTABLES_VERSION "1.6.0" + +/** + * Program's own name and version. + */ +extern const char *program_name, *program_version; + +extern int line; + +#endif /* IPTABLES_INTERNAL_H */ diff --git a/include/libiptc/ipt_kernel_headers.h b/include/libiptc/ipt_kernel_headers.h index 7e878284..a5963e94 100644 --- a/include/libiptc/ipt_kernel_headers.h +++ b/include/libiptc/ipt_kernel_headers.h @@ -5,22 +5,11 @@ #include -#if defined(__GLIBC__) && __GLIBC__ == 2 #include #include #include #include #include +#include #include -#else /* libc5 */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#endif #endif diff --git a/include/libiptc/libip6tc.h b/include/libiptc/libip6tc.h index 7a247c46..9aed80a0 100644 --- a/include/libiptc/libip6tc.h +++ b/include/libiptc/libip6tc.h @@ -2,153 +2,160 @@ #define _LIBIP6TC_H /* Library which manipulates firewall rules. Version 0.2. */ +#include #include -#include - -#ifndef IP6T_MIN_ALIGN -#define IP6T_MIN_ALIGN (__alignof__(struct ip6t_entry)) +#ifdef __cplusplus +# include +#else +# include /* INT_MAX in ip6_tables.h */ #endif -#define IP6T_ALIGN(s) (((s) + (IP6T_MIN_ALIGN-1)) & ~(IP6T_MIN_ALIGN-1)) +#include +#include -typedef char ip6t_chainlabel[32]; +#define ip6tc_handle xtc_handle +#define ip6t_chainlabel xt_chainlabel #define IP6TC_LABEL_ACCEPT "ACCEPT" #define IP6TC_LABEL_DROP "DROP" #define IP6TC_LABEL_QUEUE "QUEUE" #define IP6TC_LABEL_RETURN "RETURN" -/* Transparent handle type. */ -typedef struct ip6tc_handle *ip6tc_handle_t; - /* Does this chain exist? */ -int ip6tc_is_chain(const char *chain, const ip6tc_handle_t handle); +int ip6tc_is_chain(const char *chain, struct xtc_handle *const handle); /* Take a snapshot of the rules. Returns NULL on error. */ -ip6tc_handle_t ip6tc_init(const char *tablename); +struct xtc_handle *ip6tc_init(const char *tablename); /* Cleanup after ip6tc_init(). */ -void ip6tc_free(ip6tc_handle_t *h); +void ip6tc_free(struct xtc_handle *h); /* Iterator functions to run through the chains. Returns NULL at end. */ -const char *ip6tc_first_chain(ip6tc_handle_t *handle); -const char *ip6tc_next_chain(ip6tc_handle_t *handle); +const char *ip6tc_first_chain(struct xtc_handle *handle); +const char *ip6tc_next_chain(struct xtc_handle *handle); /* Get first rule in the given chain: NULL for empty chain. */ const struct ip6t_entry *ip6tc_first_rule(const char *chain, - ip6tc_handle_t *handle); + struct xtc_handle *handle); /* Returns NULL when rules run out. */ const struct ip6t_entry *ip6tc_next_rule(const struct ip6t_entry *prev, - ip6tc_handle_t *handle); + struct xtc_handle *handle); /* Returns a pointer to the target name of this position. */ const char *ip6tc_get_target(const struct ip6t_entry *e, - ip6tc_handle_t *handle); + struct xtc_handle *handle); /* Is this a built-in chain? */ -int ip6tc_builtin(const char *chain, const ip6tc_handle_t handle); +int ip6tc_builtin(const char *chain, struct xtc_handle *const handle); /* Get the policy of a given built-in chain */ const char *ip6tc_get_policy(const char *chain, - struct ip6t_counters *counters, - ip6tc_handle_t *handle); + struct xt_counters *counters, + struct xtc_handle *handle); /* These functions return TRUE for OK or 0 and set errno. If errno == 0, it means there was a version error (ie. upgrade libiptc). */ /* Rule numbers start at 1 for the first rule. */ /* Insert the entry `fw' in chain `chain' into position `rulenum'. */ -int ip6tc_insert_entry(const ip6t_chainlabel chain, +int ip6tc_insert_entry(const xt_chainlabel chain, const struct ip6t_entry *e, unsigned int rulenum, - ip6tc_handle_t *handle); + struct xtc_handle *handle); /* Atomically replace rule `rulenum' in `chain' with `fw'. */ -int ip6tc_replace_entry(const ip6t_chainlabel chain, +int ip6tc_replace_entry(const xt_chainlabel chain, const struct ip6t_entry *e, unsigned int rulenum, - ip6tc_handle_t *handle); + struct xtc_handle *handle); /* Append entry `fw' to chain `chain'. Equivalent to insert with rulenum = length of chain. */ -int ip6tc_append_entry(const ip6t_chainlabel chain, +int ip6tc_append_entry(const xt_chainlabel chain, const struct ip6t_entry *e, - ip6tc_handle_t *handle); + struct xtc_handle *handle); -/* Delete the first rule in `chain' which matches `fw'. */ -int ip6tc_delete_entry(const ip6t_chainlabel chain, +/* Check whether a matching rule exists */ +int ip6tc_check_entry(const xt_chainlabel chain, const struct ip6t_entry *origfw, unsigned char *matchmask, - ip6tc_handle_t *handle); + struct xtc_handle *handle); + +/* Delete the first rule in `chain' which matches `fw'. */ +int ip6tc_delete_entry(const xt_chainlabel chain, + const struct ip6t_entry *origfw, + unsigned char *matchmask, + struct xtc_handle *handle); /* Delete the rule in position `rulenum' in `chain'. */ -int ip6tc_delete_num_entry(const ip6t_chainlabel chain, +int ip6tc_delete_num_entry(const xt_chainlabel chain, unsigned int rulenum, - ip6tc_handle_t *handle); + struct xtc_handle *handle); /* Check the packet `fw' on chain `chain'. Returns the verdict, or NULL and sets errno. */ -const char *ip6tc_check_packet(const ip6t_chainlabel chain, +const char *ip6tc_check_packet(const xt_chainlabel chain, struct ip6t_entry *, - ip6tc_handle_t *handle); + struct xtc_handle *handle); /* Flushes the entries in the given chain (ie. empties chain). */ -int ip6tc_flush_entries(const ip6t_chainlabel chain, - ip6tc_handle_t *handle); +int ip6tc_flush_entries(const xt_chainlabel chain, + struct xtc_handle *handle); /* Zeroes the counters in a chain. */ -int ip6tc_zero_entries(const ip6t_chainlabel chain, - ip6tc_handle_t *handle); +int ip6tc_zero_entries(const xt_chainlabel chain, + struct xtc_handle *handle); /* Creates a new chain. */ -int ip6tc_create_chain(const ip6t_chainlabel chain, - ip6tc_handle_t *handle); +int ip6tc_create_chain(const xt_chainlabel chain, + struct xtc_handle *handle); /* Deletes a chain. */ -int ip6tc_delete_chain(const ip6t_chainlabel chain, - ip6tc_handle_t *handle); +int ip6tc_delete_chain(const xt_chainlabel chain, + struct xtc_handle *handle); /* Renames a chain. */ -int ip6tc_rename_chain(const ip6t_chainlabel oldname, - const ip6t_chainlabel newname, - ip6tc_handle_t *handle); +int ip6tc_rename_chain(const xt_chainlabel oldname, + const xt_chainlabel newname, + struct xtc_handle *handle); /* Sets the policy on a built-in chain. */ -int ip6tc_set_policy(const ip6t_chainlabel chain, - const ip6t_chainlabel policy, - struct ip6t_counters *counters, - ip6tc_handle_t *handle); +int ip6tc_set_policy(const xt_chainlabel chain, + const xt_chainlabel policy, + struct xt_counters *counters, + struct xtc_handle *handle); /* Get the number of references to this chain */ -int ip6tc_get_references(unsigned int *ref, const ip6t_chainlabel chain, - ip6tc_handle_t *handle); +int ip6tc_get_references(unsigned int *ref, const xt_chainlabel chain, + struct xtc_handle *handle); /* read packet and byte counters for a specific rule */ -struct ip6t_counters *ip6tc_read_counter(const ip6t_chainlabel chain, +struct xt_counters *ip6tc_read_counter(const xt_chainlabel chain, unsigned int rulenum, - ip6tc_handle_t *handle); + struct xtc_handle *handle); /* zero packet and byte counters for a specific rule */ -int ip6tc_zero_counter(const ip6t_chainlabel chain, +int ip6tc_zero_counter(const xt_chainlabel chain, unsigned int rulenum, - ip6tc_handle_t *handle); + struct xtc_handle *handle); /* set packet and byte counters for a specific rule */ -int ip6tc_set_counter(const ip6t_chainlabel chain, +int ip6tc_set_counter(const xt_chainlabel chain, unsigned int rulenum, - struct ip6t_counters *counters, - ip6tc_handle_t *handle); + struct xt_counters *counters, + struct xtc_handle *handle); /* Makes the actual changes. */ -int ip6tc_commit(ip6tc_handle_t *handle); +int ip6tc_commit(struct xtc_handle *handle); /* Get raw socket. */ -int ip6tc_get_raw_socket(); +int ip6tc_get_raw_socket(void); /* Translates errno numbers into more human-readable form than strerror. */ const char *ip6tc_strerror(int err); -/* Return prefix length, or -1 if not contiguous */ -int ipv6_prefix_length(const struct in6_addr *a); +extern void dump_entries6(struct xtc_handle *const); + +extern const struct xtc_ops ip6tc_ops; #endif /* _LIBIP6TC_H */ diff --git a/include/libiptc/libiptc.h b/include/libiptc/libiptc.h index 7628bda6..24cdbdb7 100644 --- a/include/libiptc/libiptc.h +++ b/include/libiptc/libiptc.h @@ -2,155 +2,157 @@ #define _LIBIPTC_H /* Library which manipulates filtering rules. */ +#include #include +#ifdef __cplusplus +# include +#else +# include /* INT_MAX in ip_tables.h */ +#endif #include +#include #ifdef __cplusplus extern "C" { #endif -#ifndef IPT_MIN_ALIGN -/* ipt_entry has pointers and u_int64_t's in it, so if you align to - it, you'll also align to any crazy matches and targets someone - might write */ -#define IPT_MIN_ALIGN (__alignof__(struct ipt_entry)) -#endif - -#define IPT_ALIGN(s) (((s) + ((IPT_MIN_ALIGN)-1)) & ~((IPT_MIN_ALIGN)-1)) - -typedef char ipt_chainlabel[32]; +#define iptc_handle xtc_handle +#define ipt_chainlabel xt_chainlabel #define IPTC_LABEL_ACCEPT "ACCEPT" #define IPTC_LABEL_DROP "DROP" #define IPTC_LABEL_QUEUE "QUEUE" #define IPTC_LABEL_RETURN "RETURN" -/* Transparent handle type. */ -typedef struct iptc_handle *iptc_handle_t; - /* Does this chain exist? */ -int iptc_is_chain(const char *chain, const iptc_handle_t handle); +int iptc_is_chain(const char *chain, struct xtc_handle *const handle); /* Take a snapshot of the rules. Returns NULL on error. */ -iptc_handle_t iptc_init(const char *tablename); +struct xtc_handle *iptc_init(const char *tablename); /* Cleanup after iptc_init(). */ -void iptc_free(iptc_handle_t *h); +void iptc_free(struct xtc_handle *h); /* Iterator functions to run through the chains. Returns NULL at end. */ -const char *iptc_first_chain(iptc_handle_t *handle); -const char *iptc_next_chain(iptc_handle_t *handle); +const char *iptc_first_chain(struct xtc_handle *handle); +const char *iptc_next_chain(struct xtc_handle *handle); /* Get first rule in the given chain: NULL for empty chain. */ const struct ipt_entry *iptc_first_rule(const char *chain, - iptc_handle_t *handle); + struct xtc_handle *handle); /* Returns NULL when rules run out. */ const struct ipt_entry *iptc_next_rule(const struct ipt_entry *prev, - iptc_handle_t *handle); + struct xtc_handle *handle); /* Returns a pointer to the target name of this entry. */ const char *iptc_get_target(const struct ipt_entry *e, - iptc_handle_t *handle); + struct xtc_handle *handle); /* Is this a built-in chain? */ -int iptc_builtin(const char *chain, const iptc_handle_t handle); +int iptc_builtin(const char *chain, struct xtc_handle *const handle); /* Get the policy of a given built-in chain */ const char *iptc_get_policy(const char *chain, - struct ipt_counters *counter, - iptc_handle_t *handle); + struct xt_counters *counter, + struct xtc_handle *handle); /* These functions return TRUE for OK or 0 and set errno. If errno == 0, it means there was a version error (ie. upgrade libiptc). */ /* Rule numbers start at 1 for the first rule. */ /* Insert the entry `e' in chain `chain' into position `rulenum'. */ -int iptc_insert_entry(const ipt_chainlabel chain, +int iptc_insert_entry(const xt_chainlabel chain, const struct ipt_entry *e, unsigned int rulenum, - iptc_handle_t *handle); + struct xtc_handle *handle); /* Atomically replace rule `rulenum' in `chain' with `e'. */ -int iptc_replace_entry(const ipt_chainlabel chain, +int iptc_replace_entry(const xt_chainlabel chain, const struct ipt_entry *e, unsigned int rulenum, - iptc_handle_t *handle); + struct xtc_handle *handle); /* Append entry `e' to chain `chain'. Equivalent to insert with rulenum = length of chain. */ -int iptc_append_entry(const ipt_chainlabel chain, +int iptc_append_entry(const xt_chainlabel chain, const struct ipt_entry *e, - iptc_handle_t *handle); + struct xtc_handle *handle); + +/* Check whether a mathching rule exists */ +int iptc_check_entry(const xt_chainlabel chain, + const struct ipt_entry *origfw, + unsigned char *matchmask, + struct xtc_handle *handle); /* Delete the first rule in `chain' which matches `e', subject to matchmask (array of length == origfw) */ -int iptc_delete_entry(const ipt_chainlabel chain, +int iptc_delete_entry(const xt_chainlabel chain, const struct ipt_entry *origfw, unsigned char *matchmask, - iptc_handle_t *handle); + struct xtc_handle *handle); /* Delete the rule in position `rulenum' in `chain'. */ -int iptc_delete_num_entry(const ipt_chainlabel chain, +int iptc_delete_num_entry(const xt_chainlabel chain, unsigned int rulenum, - iptc_handle_t *handle); + struct xtc_handle *handle); /* Check the packet `e' on chain `chain'. Returns the verdict, or NULL and sets errno. */ -const char *iptc_check_packet(const ipt_chainlabel chain, +const char *iptc_check_packet(const xt_chainlabel chain, struct ipt_entry *entry, - iptc_handle_t *handle); + struct xtc_handle *handle); /* Flushes the entries in the given chain (ie. empties chain). */ -int iptc_flush_entries(const ipt_chainlabel chain, - iptc_handle_t *handle); +int iptc_flush_entries(const xt_chainlabel chain, + struct xtc_handle *handle); /* Zeroes the counters in a chain. */ -int iptc_zero_entries(const ipt_chainlabel chain, - iptc_handle_t *handle); +int iptc_zero_entries(const xt_chainlabel chain, + struct xtc_handle *handle); /* Creates a new chain. */ -int iptc_create_chain(const ipt_chainlabel chain, - iptc_handle_t *handle); +int iptc_create_chain(const xt_chainlabel chain, + struct xtc_handle *handle); /* Deletes a chain. */ -int iptc_delete_chain(const ipt_chainlabel chain, - iptc_handle_t *handle); +int iptc_delete_chain(const xt_chainlabel chain, + struct xtc_handle *handle); /* Renames a chain. */ -int iptc_rename_chain(const ipt_chainlabel oldname, - const ipt_chainlabel newname, - iptc_handle_t *handle); +int iptc_rename_chain(const xt_chainlabel oldname, + const xt_chainlabel newname, + struct xtc_handle *handle); /* Sets the policy on a built-in chain. */ -int iptc_set_policy(const ipt_chainlabel chain, - const ipt_chainlabel policy, - struct ipt_counters *counters, - iptc_handle_t *handle); +int iptc_set_policy(const xt_chainlabel chain, + const xt_chainlabel policy, + struct xt_counters *counters, + struct xtc_handle *handle); /* Get the number of references to this chain */ int iptc_get_references(unsigned int *ref, - const ipt_chainlabel chain, - iptc_handle_t *handle); + const xt_chainlabel chain, + struct xtc_handle *handle); /* read packet and byte counters for a specific rule */ -struct ipt_counters *iptc_read_counter(const ipt_chainlabel chain, +struct xt_counters *iptc_read_counter(const xt_chainlabel chain, unsigned int rulenum, - iptc_handle_t *handle); + struct xtc_handle *handle); /* zero packet and byte counters for a specific rule */ -int iptc_zero_counter(const ipt_chainlabel chain, +int iptc_zero_counter(const xt_chainlabel chain, unsigned int rulenum, - iptc_handle_t *handle); + struct xtc_handle *handle); /* set packet and byte counters for a specific rule */ -int iptc_set_counter(const ipt_chainlabel chain, +int iptc_set_counter(const xt_chainlabel chain, unsigned int rulenum, - struct ipt_counters *counters, - iptc_handle_t *handle); + struct xt_counters *counters, + struct xtc_handle *handle); /* Makes the actual changes. */ -int iptc_commit(iptc_handle_t *handle); +int iptc_commit(struct xtc_handle *handle); /* Get raw socket. */ int iptc_get_raw_socket(void); @@ -158,6 +160,10 @@ int iptc_get_raw_socket(void); /* Translates errno numbers into more human-readable form than strerror. */ const char *iptc_strerror(int err); +extern void dump_entries(struct xtc_handle *const); + +extern const struct xtc_ops iptc_ops; + #ifdef __cplusplus } #endif diff --git a/include/libiptc/libxtc.h b/include/libiptc/libxtc.h new file mode 100644 index 00000000..37010188 --- /dev/null +++ b/include/libiptc/libxtc.h @@ -0,0 +1,33 @@ +#ifndef _LIBXTC_H +#define _LIBXTC_H +/* Library which manipulates filtering rules. */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef XT_MIN_ALIGN +/* xt_entry has pointers and u_int64_t's in it, so if you align to + it, you'll also align to any crazy matches and targets someone + might write */ +#define XT_MIN_ALIGN (__alignof__(struct xt_entry)) +#endif + +#ifndef XT_ALIGN +#define XT_ALIGN(s) (((s) + ((XT_MIN_ALIGN)-1)) & ~((XT_MIN_ALIGN)-1)) +#endif + +#define XTC_LABEL_ACCEPT "ACCEPT" +#define XTC_LABEL_DROP "DROP" +#define XTC_LABEL_QUEUE "QUEUE" +#define XTC_LABEL_RETURN "RETURN" + + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBXTC_H */ diff --git a/include/libiptc/xtcshared.h b/include/libiptc/xtcshared.h new file mode 100644 index 00000000..773ebc4c --- /dev/null +++ b/include/libiptc/xtcshared.h @@ -0,0 +1,20 @@ +#ifndef _LIBXTC_SHARED_H +#define _LIBXTC_SHARED_H 1 + +typedef char xt_chainlabel[32]; +struct xtc_handle; +struct xt_counters; + +struct xtc_ops { + int (*commit)(struct xtc_handle *); + void (*free)(struct xtc_handle *); + int (*builtin)(const char *, struct xtc_handle *const); + int (*is_chain)(const char *, struct xtc_handle *const); + int (*flush_entries)(const xt_chainlabel, struct xtc_handle *); + int (*create_chain)(const xt_chainlabel, struct xtc_handle *); + int (*set_policy)(const xt_chainlabel, const xt_chainlabel, + struct xt_counters *, struct xtc_handle *); + const char *(*strerror)(int); +}; + +#endif /* _LIBXTC_SHARED_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2e2524d4..39e7f33c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -269,6 +269,7 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_perf_event_output, + BPF_FUNC_skb_load_bytes, __BPF_FUNC_MAX_ID, }; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 5d206c71..d91f2c97 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -216,6 +216,7 @@ enum in6_addr_gen_mode { IN6_ADDR_GEN_MODE_EUI64, IN6_ADDR_GEN_MODE_NONE, IN6_ADDR_GEN_MODE_STABLE_PRIVACY, + IN6_ADDR_GEN_MODE_RANDOM, }; /* Bridge section */ @@ -460,6 +461,9 @@ enum { IFLA_GENEVE_PORT, /* destination port */ IFLA_GENEVE_COLLECT_METADATA, IFLA_GENEVE_REMOTE6, + IFLA_GENEVE_UDP_CSUM, + IFLA_GENEVE_UDP_ZERO_CSUM6_TX, + IFLA_GENEVE_UDP_ZERO_CSUM6_RX, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) diff --git a/include/linux/ila.h b/include/linux/ila.h index f08e8d77..4f9e1dea 100644 --- a/include/linux/ila.h +++ b/include/linux/ila.h @@ -3,13 +3,35 @@ #ifndef _LINUX_ILA_H #define _LINUX_ILA_H +/* NETLINK_GENERIC related info */ +#define ILA_GENL_NAME "ila" +#define ILA_GENL_VERSION 0x1 + enum { ILA_ATTR_UNSPEC, ILA_ATTR_LOCATOR, /* u64 */ + ILA_ATTR_IDENTIFIER, /* u64 */ + ILA_ATTR_LOCATOR_MATCH, /* u64 */ + ILA_ATTR_IFINDEX, /* s32 */ + ILA_ATTR_DIR, /* u32 */ __ILA_ATTR_MAX, }; #define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1) +enum { + ILA_CMD_UNSPEC, + ILA_CMD_ADD, + ILA_CMD_DEL, + ILA_CMD_GET, + + __ILA_CMD_MAX, +}; + +#define ILA_CMD_MAX (__ILA_CMD_MAX - 1) + +#define ILA_DIR_IN (1 << 0) +#define ILA_DIR_OUT (1 << 1) + #endif /* _LINUX_ILA_H */ diff --git a/include/linux/in6.h b/include/linux/in6.h index 994f4c22..aa5b66df 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -196,6 +196,7 @@ struct in6_flowlabel_req { #define IPV6_IPSEC_POLICY 34 #define IPV6_XFRM_POLICY 35 +#define IPV6_HDRINCL 36 #endif /* diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 18c543a5..6aaa2a3e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -311,6 +311,7 @@ enum rtattr_type_t { RTA_PREF, RTA_ENCAP_TYPE, RTA_ENCAP, + RTA_EXPIRES, __RTA_MAX }; diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 024e1f4c..dafcb891 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -4,6 +4,7 @@ #include #define SOCK_DIAG_BY_FAMILY 20 +#define SOCK_DESTROY 21 struct sock_diag_req { __u8 sdiag_family; diff --git a/include/linux/tipc_netlink.h b/include/linux/tipc_netlink.h index d4c8f142..25eb645e 100644 --- a/include/linux/tipc_netlink.h +++ b/include/linux/tipc_netlink.h @@ -56,6 +56,7 @@ enum { TIPC_NL_NET_GET, TIPC_NL_NET_SET, TIPC_NL_NAME_TABLE_GET, + TIPC_NL_PEER_REMOVE, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/include/utils.h b/include/utils.h index cc821e80..7310f4e0 100644 --- a/include/utils.h +++ b/include/utils.h @@ -40,6 +40,10 @@ extern bool do_all; #define IPSEC_PROTO_ANY 255 #endif +#ifndef CONFDIR +#define CONFDIR "/etc/iproute2" +#endif + #define SPRINT_BSIZE 64 #define SPRINT_BUF(x) char x[SPRINT_BSIZE] @@ -196,6 +200,9 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n); __attribute__ ((format (printf, (pos_str), (pos_args)))) #endif +#define _textify(x) #x +#define textify(x) _textify(x) + #define htonll(x) ((1==htonl(1)) ? (x) : ((uint64_t)htonl((x) & 0xFFFFFFFF) << 32) | htonl((x) >> 32)) #define ntohll(x) ((1==ntohl(1)) ? (x) : ((uint64_t)ntohl((x) & 0xFFFFFFFF) << 32) | ntohl((x) >> 32)) diff --git a/include/xtables.h b/include/xtables.h new file mode 100644 index 00000000..978ae0d1 --- /dev/null +++ b/include/xtables.h @@ -0,0 +1,567 @@ +#ifndef _XTABLES_H +#define _XTABLES_H + +/* + * Changing any structs/functions may incur a needed change + * in libxtables_vcurrent/vage too. + */ + +#include /* PF_* */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef IPPROTO_SCTP +#define IPPROTO_SCTP 132 +#endif +#ifndef IPPROTO_DCCP +#define IPPROTO_DCCP 33 +#endif +#ifndef IPPROTO_MH +# define IPPROTO_MH 135 +#endif +#ifndef IPPROTO_UDPLITE +#define IPPROTO_UDPLITE 136 +#endif + +#include + +struct in_addr; + +/* + * .size is here so that there is a somewhat reasonable check + * against the chosen .type. + */ +#define XTOPT_POINTER(stype, member) \ + .ptroff = offsetof(stype, member), \ + .size = sizeof(((stype *)NULL)->member) +#define XTOPT_TABLEEND {.name = NULL} + +/** + * Select the format the input has to conform to, as well as the target type + * (area pointed to with XTOPT_POINTER). Note that the storing is not always + * uniform. @cb->val will be populated with as much as there is space, i.e. + * exactly 2 items for ranges, but the target area can receive more values + * (e.g. in case of ranges), or less values (e.g. %XTTYPE_HOSTMASK). + * + * %XTTYPE_NONE: option takes no argument + * %XTTYPE_UINT*: standard integer + * %XTTYPE_UINT*RC: colon-separated range of standard integers + * %XTTYPE_DOUBLE: double-precision floating point number + * %XTTYPE_STRING: arbitrary string + * %XTTYPE_TOSMASK: 8-bit TOS value with optional mask + * %XTTYPE_MARKMASK32: 32-bit mark with optional mask + * %XTTYPE_SYSLOGLEVEL: syslog level by name or number + * %XTTYPE_HOST: one host or address (ptr: union nf_inet_addr) + * %XTTYPE_HOSTMASK: one host or address, with an optional prefix length + * (ptr: union nf_inet_addr; only host portion is stored) + * %XTTYPE_PROTOCOL: protocol number/name from /etc/protocols (ptr: uint8_t) + * %XTTYPE_PORT: 16-bit port name or number (supports %XTOPT_NBO) + * %XTTYPE_PORTRC: colon-separated port range (names acceptable), + * (supports %XTOPT_NBO) + * %XTTYPE_PLEN: prefix length + * %XTTYPE_PLENMASK: prefix length (ptr: union nf_inet_addr) + * %XTTYPE_ETHERMAC: Ethernet MAC address in hex form + */ +enum xt_option_type { + XTTYPE_NONE, + XTTYPE_UINT8, + XTTYPE_UINT16, + XTTYPE_UINT32, + XTTYPE_UINT64, + XTTYPE_UINT8RC, + XTTYPE_UINT16RC, + XTTYPE_UINT32RC, + XTTYPE_UINT64RC, + XTTYPE_DOUBLE, + XTTYPE_STRING, + XTTYPE_TOSMASK, + XTTYPE_MARKMASK32, + XTTYPE_SYSLOGLEVEL, + XTTYPE_HOST, + XTTYPE_HOSTMASK, + XTTYPE_PROTOCOL, + XTTYPE_PORT, + XTTYPE_PORTRC, + XTTYPE_PLEN, + XTTYPE_PLENMASK, + XTTYPE_ETHERMAC, +}; + +/** + * %XTOPT_INVERT: option is invertible (usable with !) + * %XTOPT_MAND: option is mandatory + * %XTOPT_MULTI: option may be specified multiple times + * %XTOPT_PUT: store value into memory at @ptroff + * %XTOPT_NBO: store value in network-byte order + * (only certain XTTYPEs recognize this) + */ +enum xt_option_flags { + XTOPT_INVERT = 1 << 0, + XTOPT_MAND = 1 << 1, + XTOPT_MULTI = 1 << 2, + XTOPT_PUT = 1 << 3, + XTOPT_NBO = 1 << 4, +}; + +/** + * @name: name of option + * @type: type of input and validation method, see %XTTYPE_* + * @id: unique number (within extension) for option, 0-31 + * @excl: bitmask of flags that cannot be used with this option + * @also: bitmask of flags that must be used with this option + * @flags: bitmask of option flags, see %XTOPT_* + * @ptroff: offset into private structure for member + * @size: size of the item pointed to by @ptroff; this is a safeguard + * @min: lowest allowed value (for singular integral types) + * @max: highest allowed value (for singular integral types) + */ +struct xt_option_entry { + const char *name; + enum xt_option_type type; + unsigned int id, excl, also, flags; + unsigned int ptroff; + size_t size; + unsigned int min, max; +}; + +/** + * @arg: input from command line + * @ext_name: name of extension currently being processed + * @entry: current option being processed + * @data: per-extension kernel data block + * @xflags: options of the extension that have been used + * @invert: whether option was used with ! + * @nvals: number of results in uXX_multi + * @val: parsed result + * @udata: per-extension private scratch area + * (cf. xtables_{match,target}->udata_size) + */ +struct xt_option_call { + const char *arg, *ext_name; + const struct xt_option_entry *entry; + void *data; + unsigned int xflags; + bool invert; + uint8_t nvals; + union { + uint8_t u8, u8_range[2], syslog_level, protocol; + uint16_t u16, u16_range[2], port, port_range[2]; + uint32_t u32, u32_range[2]; + uint64_t u64, u64_range[2]; + double dbl; + struct { + union nf_inet_addr haddr, hmask; + uint8_t hlen; + }; + struct { + uint8_t tos_value, tos_mask; + }; + struct { + uint32_t mark, mask; + }; + uint8_t ethermac[6]; + } val; + /* Wished for a world where the ones below were gone: */ + union { + struct xt_entry_match **match; + struct xt_entry_target **target; + }; + void *xt_entry; + void *udata; +}; + +/** + * @ext_name: name of extension currently being processed + * @data: per-extension (kernel) data block + * @udata: per-extension private scratch area + * (cf. xtables_{match,target}->udata_size) + * @xflags: options of the extension that have been used + */ +struct xt_fcheck_call { + const char *ext_name; + void *data, *udata; + unsigned int xflags; +}; + +/** + * A "linear"/linked-list based name<->id map, for files similar to + * /etc/iproute2/. + */ +struct xtables_lmap { + char *name; + int id; + struct xtables_lmap *next; +}; + +enum xtables_ext_flags { + XTABLES_EXT_ALIAS = 1 << 0, +}; + +/* Include file for additions: new matches and targets. */ +struct xtables_match +{ + /* + * ABI/API version this module requires. Must be first member, + * as the rest of this struct may be subject to ABI changes. + */ + const char *version; + + struct xtables_match *next; + + const char *name; + const char *real_name; + + /* Revision of match (0 by default). */ + uint8_t revision; + + /* Extension flags */ + uint8_t ext_flags; + + uint16_t family; + + /* Size of match data. */ + size_t size; + + /* Size of match data relevant for userspace comparison purposes */ + size_t userspacesize; + + /* Function which prints out usage message. */ + void (*help)(void); + + /* Initialize the match. */ + void (*init)(struct xt_entry_match *m); + + /* Function which parses command options; returns true if it + ate an option */ + /* entry is struct ipt_entry for example */ + int (*parse)(int c, char **argv, int invert, unsigned int *flags, + const void *entry, + struct xt_entry_match **match); + + /* Final check; exit if not ok. */ + void (*final_check)(unsigned int flags); + + /* Prints out the match iff non-NULL: put space at end */ + /* ip is struct ipt_ip * for example */ + void (*print)(const void *ip, + const struct xt_entry_match *match, int numeric); + + /* Saves the match info in parsable form to stdout. */ + /* ip is struct ipt_ip * for example */ + void (*save)(const void *ip, const struct xt_entry_match *match); + + /* Print match name or alias */ + const char *(*alias)(const struct xt_entry_match *match); + + /* Pointer to list of extra command-line options */ + const struct option *extra_opts; + + /* New parser */ + void (*x6_parse)(struct xt_option_call *); + void (*x6_fcheck)(struct xt_fcheck_call *); + const struct xt_option_entry *x6_options; + + /* Size of per-extension instance extra "global" scratch space */ + size_t udata_size; + + /* Ignore these men behind the curtain: */ + void *udata; + unsigned int option_offset; + struct xt_entry_match *m; + unsigned int mflags; + unsigned int loaded; /* simulate loading so options are merged properly */ +}; + +struct xtables_target +{ + /* + * ABI/API version this module requires. Must be first member, + * as the rest of this struct may be subject to ABI changes. + */ + const char *version; + + struct xtables_target *next; + + + const char *name; + + /* Real target behind this, if any. */ + const char *real_name; + + /* Revision of target (0 by default). */ + uint8_t revision; + + /* Extension flags */ + uint8_t ext_flags; + + uint16_t family; + + + /* Size of target data. */ + size_t size; + + /* Size of target data relevant for userspace comparison purposes */ + size_t userspacesize; + + /* Function which prints out usage message. */ + void (*help)(void); + + /* Initialize the target. */ + void (*init)(struct xt_entry_target *t); + + /* Function which parses command options; returns true if it + ate an option */ + /* entry is struct ipt_entry for example */ + int (*parse)(int c, char **argv, int invert, unsigned int *flags, + const void *entry, + struct xt_entry_target **targetinfo); + + /* Final check; exit if not ok. */ + void (*final_check)(unsigned int flags); + + /* Prints out the target iff non-NULL: put space at end */ + void (*print)(const void *ip, + const struct xt_entry_target *target, int numeric); + + /* Saves the targinfo in parsable form to stdout. */ + void (*save)(const void *ip, + const struct xt_entry_target *target); + + /* Print target name or alias */ + const char *(*alias)(const struct xt_entry_target *target); + + /* Pointer to list of extra command-line options */ + const struct option *extra_opts; + + /* New parser */ + void (*x6_parse)(struct xt_option_call *); + void (*x6_fcheck)(struct xt_fcheck_call *); + const struct xt_option_entry *x6_options; + + size_t udata_size; + + /* Ignore these men behind the curtain: */ + void *udata; + unsigned int option_offset; + struct xt_entry_target *t; + unsigned int tflags; + unsigned int used; + unsigned int loaded; /* simulate loading so options are merged properly */ +}; + +struct xtables_rule_match { + struct xtables_rule_match *next; + struct xtables_match *match; + /* Multiple matches of the same type: the ones before + the current one are completed from parsing point of view */ + bool completed; +}; + +/** + * struct xtables_pprot - + * + * A few hardcoded protocols for 'all' and in case the user has no + * /etc/protocols. + */ +struct xtables_pprot { + const char *name; + uint8_t num; +}; + +enum xtables_tryload { + XTF_DONT_LOAD, + XTF_DURING_LOAD, + XTF_TRY_LOAD, + XTF_LOAD_MUST_SUCCEED, +}; + +enum xtables_exittype { + OTHER_PROBLEM = 1, + PARAMETER_PROBLEM, + VERSION_PROBLEM, + RESOURCE_PROBLEM, + XTF_ONLY_ONCE, + XTF_NO_INVERT, + XTF_BAD_VALUE, + XTF_ONE_ACTION, +}; + +struct xtables_globals +{ + unsigned int option_offset; + const char *program_name, *program_version; + struct option *orig_opts; + struct option *opts; + void (*exit_err)(enum xtables_exittype status, const char *msg, ...) __attribute__((noreturn, format(printf,2,3))); + int (*compat_rev)(const char *name, uint8_t rev, int opt); +}; + +#define XT_GETOPT_TABLEEND {.name = NULL, .has_arg = false} + +#ifdef __cplusplus +extern "C" { +#endif + +extern const char *xtables_modprobe_program; +extern struct xtables_match *xtables_matches; +extern struct xtables_target *xtables_targets; + +extern void xtables_init(void); +extern void xtables_set_nfproto(uint8_t); +extern void *xtables_calloc(size_t, size_t); +extern void *xtables_malloc(size_t); +extern void *xtables_realloc(void *, size_t); + +extern int xtables_insmod(const char *, const char *, bool); +extern int xtables_load_ko(const char *, bool); +extern int xtables_set_params(struct xtables_globals *xtp); +extern void xtables_free_opts(int reset_offset); +extern struct option *xtables_merge_options(struct option *origopts, + struct option *oldopts, const struct option *newopts, + unsigned int *option_offset); + +extern int xtables_init_all(struct xtables_globals *xtp, uint8_t nfproto); +extern struct xtables_match *xtables_find_match(const char *name, + enum xtables_tryload, struct xtables_rule_match **match); +extern struct xtables_target *xtables_find_target(const char *name, + enum xtables_tryload); +extern int xtables_compatible_revision(const char *name, uint8_t revision, + int opt); + +extern void xtables_rule_matches_free(struct xtables_rule_match **matches); + +/* Your shared library should call one of these. */ +extern void xtables_register_match(struct xtables_match *me); +extern void xtables_register_matches(struct xtables_match *, unsigned int); +extern void xtables_register_target(struct xtables_target *me); +extern void xtables_register_targets(struct xtables_target *, unsigned int); + +extern bool xtables_strtoul(const char *, char **, uintmax_t *, + uintmax_t, uintmax_t); +extern bool xtables_strtoui(const char *, char **, unsigned int *, + unsigned int, unsigned int); +extern int xtables_service_to_port(const char *name, const char *proto); +extern uint16_t xtables_parse_port(const char *port, const char *proto); +extern void +xtables_parse_interface(const char *arg, char *vianame, unsigned char *mask); + +/* this is a special 64bit data type that is 8-byte aligned */ +#define aligned_u64 uint64_t __attribute__((aligned(8))) + +extern struct xtables_globals *xt_params; +#define xtables_error (xt_params->exit_err) + +extern void xtables_param_act(unsigned int, const char *, ...); + +extern const char *xtables_ipaddr_to_numeric(const struct in_addr *); +extern const char *xtables_ipaddr_to_anyname(const struct in_addr *); +extern const char *xtables_ipmask_to_numeric(const struct in_addr *); +extern struct in_addr *xtables_numeric_to_ipaddr(const char *); +extern struct in_addr *xtables_numeric_to_ipmask(const char *); +extern int xtables_ipmask_to_cidr(const struct in_addr *); +extern void xtables_ipparse_any(const char *, struct in_addr **, + struct in_addr *, unsigned int *); +extern void xtables_ipparse_multiple(const char *, struct in_addr **, + struct in_addr **, unsigned int *); + +extern struct in6_addr *xtables_numeric_to_ip6addr(const char *); +extern const char *xtables_ip6addr_to_numeric(const struct in6_addr *); +extern const char *xtables_ip6addr_to_anyname(const struct in6_addr *); +extern const char *xtables_ip6mask_to_numeric(const struct in6_addr *); +extern int xtables_ip6mask_to_cidr(const struct in6_addr *); +extern void xtables_ip6parse_any(const char *, struct in6_addr **, + struct in6_addr *, unsigned int *); +extern void xtables_ip6parse_multiple(const char *, struct in6_addr **, + struct in6_addr **, unsigned int *); + +/** + * Print the specified value to standard output, quoting dangerous + * characters if required. + */ +extern void xtables_save_string(const char *value); + +#define FMT_NUMERIC 0x0001 +#define FMT_NOCOUNTS 0x0002 +#define FMT_KILOMEGAGIGA 0x0004 +#define FMT_OPTIONS 0x0008 +#define FMT_NOTABLE 0x0010 +#define FMT_NOTARGET 0x0020 +#define FMT_VIA 0x0040 +#define FMT_NONEWLINE 0x0080 +#define FMT_LINENUMBERS 0x0100 + +#define FMT_PRINT_RULE (FMT_NOCOUNTS | FMT_OPTIONS | FMT_VIA \ + | FMT_NUMERIC | FMT_NOTABLE) +#define FMT(tab,notab) ((format) & FMT_NOTABLE ? (notab) : (tab)) + +extern void xtables_print_num(uint64_t number, unsigned int format); + +#if defined(ALL_INCLUSIVE) || defined(NO_SHARED_LIBS) +# ifdef _INIT +# undef _init +# define _init _INIT +# endif + extern void init_extensions(void); + extern void init_extensions4(void); + extern void init_extensions6(void); +#else +# define _init __attribute__((constructor)) _INIT +#endif + +extern const struct xtables_pprot xtables_chain_protos[]; +extern uint16_t xtables_parse_protocol(const char *s); + +/* kernel revision handling */ +extern int kernel_version; +extern void get_kernel_version(void); +#define LINUX_VERSION(x,y,z) (0x10000*(x) + 0x100*(y) + z) +#define LINUX_VERSION_MAJOR(x) (((x)>>16) & 0xFF) +#define LINUX_VERSION_MINOR(x) (((x)>> 8) & 0xFF) +#define LINUX_VERSION_PATCH(x) ( (x) & 0xFF) + +/* xtoptions.c */ +extern void xtables_option_metavalidate(const char *, + const struct xt_option_entry *); +extern struct option *xtables_options_xfrm(struct option *, struct option *, + const struct xt_option_entry *, + unsigned int *); +extern void xtables_option_parse(struct xt_option_call *); +extern void xtables_option_tpcall(unsigned int, char **, bool, + struct xtables_target *, void *); +extern void xtables_option_mpcall(unsigned int, char **, bool, + struct xtables_match *, void *); +extern void xtables_option_tfcall(struct xtables_target *); +extern void xtables_option_mfcall(struct xtables_match *); +extern void xtables_options_fcheck(const char *, unsigned int, + const struct xt_option_entry *); + +extern struct xtables_lmap *xtables_lmap_init(const char *); +extern void xtables_lmap_free(struct xtables_lmap *); +extern int xtables_lmap_name2id(const struct xtables_lmap *, const char *); +extern const char *xtables_lmap_id2name(const struct xtables_lmap *, int); + +#ifdef XTABLES_INTERNAL + +/* Shipped modules rely on this... */ + +# ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +# endif + +extern void _init(void); + +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _XTABLES_H */ diff --git a/ip/ipaddress.c b/ip/ipaddress.c index a495a391..9d254d27 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -296,6 +296,9 @@ static void print_af_spec(FILE *fp, struct rtattr *af_spec_attr) case IN6_ADDR_GEN_MODE_STABLE_PRIVACY: fprintf(fp, "addrgenmode stable_secret "); break; + case IN6_ADDR_GEN_MODE_RANDOM: + fprintf(fp, "addrgenmode random "); + break; default: fprintf(fp, "addrgenmode %#.2hhx ", mode); break; diff --git a/ip/iplink.c b/ip/iplink.c index c706d208..5ab9d613 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -84,7 +84,7 @@ void iplink_usage(void) fprintf(stderr, " [ state { auto | enable | disable} ] ]\n"); fprintf(stderr, " [ master DEVICE ]\n"); fprintf(stderr, " [ nomaster ]\n"); - fprintf(stderr, " [ addrgenmode { eui64 | none } ]\n"); + fprintf(stderr, " [ addrgenmode { eui64 | none | stable_secret | random } ]\n"); fprintf(stderr, " [ protodown { on | off } ]\n"); fprintf(stderr, " ip link show [ DEVICE | group GROUP ] [up] [master DEV] [type TYPE]\n"); @@ -176,6 +176,10 @@ static int get_addr_gen_mode(const char *mode) return IN6_ADDR_GEN_MODE_EUI64; if (strcasecmp(mode, "none") == 0) return IN6_ADDR_GEN_MODE_NONE; + if (strcasecmp(mode, "stable_secret") == 0) + return IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + if (strcasecmp(mode, "random") == 0) + return IN6_ADDR_GEN_MODE_RANDOM; return -1; } diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c index 8bcf8822..99a237f4 100644 --- a/ip/ipmonitor.c +++ b/ip/ipmonitor.c @@ -284,12 +284,16 @@ int do_ipmonitor(int argc, char **argv) } if (file) { FILE *fp; + int err; + fp = fopen(file, "r"); if (fp == NULL) { perror("Cannot fopen"); exit(-1); } - return rtnl_from_file(fp, accept_msg, stdout); + err = rtnl_from_file(fp, accept_msg, stdout); + fclose(fp); + return err; } if (rtnl_open(&rth, groups) < 0) diff --git a/ip/iproute.c b/ip/iproute.c index 4d86a596..d5e3ebe2 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -86,7 +86,7 @@ static void usage(void) fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n"); fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n"); fprintf(stderr, " [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n"); - fprintf(stderr, " [ pref PREF ]\n"); + fprintf(stderr, " [ pref PREF ] [ expires TIME ]\n"); fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n"); fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n"); fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n"); @@ -829,6 +829,7 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) int table_ok = 0; int raw = 0; int type_ok = 0; + static int hz; memset(&req, 0, sizeof(req)); @@ -899,6 +900,14 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) if (rtnl_dsfield_a2n(&tos, *argv)) invarg("\"tos\" value is invalid\n", *argv); req.r.rtm_tos = tos; + } else if (strcmp(*argv, "expires") == 0 ) { + __u32 expires; + NEXT_ARG(); + if (get_u32(&expires, *argv, 0)) + invarg("\"expires\" value is invalid\n", *argv); + if (!hz) + hz = get_user_hz(); + addattr32(&req.n, sizeof(req), RTA_EXPIRES, expires*hz); } else if (matches(*argv, "metric") == 0 || matches(*argv, "priority") == 0 || strcmp(*argv, "preference") == 0) { diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c index 8b21efad..e6e991af 100644 --- a/ip/xfrm_monitor.c +++ b/ip/xfrm_monitor.c @@ -411,12 +411,16 @@ int do_xfrm_monitor(int argc, char **argv) if (file) { FILE *fp; + int err; + fp = fopen(file, "r"); if (fp == NULL) { perror("Cannot fopen"); exit(-1); } - return rtnl_from_file(fp, xfrm_accept_msg, (void*)stdout); + err = rtnl_from_file(fp, xfrm_accept_msg, stdout); + fclose(fp); + return err; } if (rtnl_open_byproto(&rth, groups, NETLINK_XFRM) < 0) diff --git a/lib/coverity_model.c b/lib/coverity_model.c new file mode 100644 index 00000000..c8963020 --- /dev/null +++ b/lib/coverity_model.c @@ -0,0 +1,19 @@ +/* + * Coverity Scan model + * + * This is a modeling file for Coverity Scan. Modeling helps to avoid false + * positives. + * + * - A model file can't import any header files. + * - Therefore only some built-in primitives like int, char and void are + * available but not wchar_t, NULL etc. + * - Modeling doesn't need full structs and typedefs. Rudimentary structs + * and similar types are sufficient. + * - An uninitialized local pointer is not an error. It signifies that the + * variable could be either NULL or have some data. + * + * Coverity Scan doesn't pick up modifications automatically. The model file + * must be uploaded by an admin. + */ + + diff --git a/lib/rt_names.c b/lib/rt_names.c index 1071a938..f6d17c0e 100644 --- a/lib/rt_names.c +++ b/lib/rt_names.c @@ -23,10 +23,7 @@ #include #include "rt_names.h" - -#ifndef CONFDIR -#define CONFDIR "/etc/iproute2" -#endif +#include "utils.h" #define NAME_MAX_LEN 512 diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index ac6f4813..189a8f15 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -150,7 +150,7 @@ ip-link \- network device configuration .br .B nomaster " |" .br -.B addrgenmode { eui64 | none } +.B addrgenmode { eui64 | none | stable_secret | random } .br .B link-netnsid ID .BR " }" @@ -1029,8 +1029,20 @@ set master device of the device (enslave device). unset master device of the device (release device). .TP -.BR "addrgenmode eui64 " or " addrgenmode none" -set IPv6 address generation mode +.BI addrgenmode " eui64|none|stable_secret|random" +set the IPv6 address generation mode + +.I eui64 +- use a Modified EUI-64 format interface identifier + +.I none +- disable automatic address generation + +.I stable_secret +- generate the interface identifier based on a preset /proc/sys/net/ipv6/conf/{default,DEVICE}/stable_secret + +.I random +- like stable_secret, but auto-generate a new random secret if none is set .TP .BR "link-netnsid " diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index 9934a1e8..c764bfc8 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -130,9 +130,11 @@ replace " } " .B quickack .IR BOOL " ] [ " .B congctl -.IR NAME " ]" +.IR NAME " ] [ " .B pref -.IR PREF " ]" +.IR PREF " ] [ " +.B expires +.IR TIME " ]" .ti -8 .IR TYPE " := [ " @@ -656,6 +658,12 @@ is a set of encapsulation attributes specific to the .in -8 .RE +.TP +.BI expires " TIME " "(4.4+ only)" +the route will be deleted after the expires time. +.B Only +support IPv6 at present. + .TP ip route delete delete route diff --git a/man/man8/tipc-bearer.8 b/man/man8/tipc-bearer.8 index 50a1ed24..565ee01d 100644 --- a/man/man8/tipc-bearer.8 +++ b/man/man8/tipc-bearer.8 @@ -218,6 +218,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR tipc-media (8), .BR tipc-nametable (8), .BR tipc-node (8), +.BR tipc-peer (8), .BR tipc-socket (8) .br .SH REPORTING BUGS diff --git a/man/man8/tipc-link.8 b/man/man8/tipc-link.8 index 3be8c9ad..2ee03a0b 100644 --- a/man/man8/tipc-link.8 +++ b/man/man8/tipc-link.8 @@ -213,6 +213,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR tipc-bearer (8), .BR tipc-nametable (8), .BR tipc-node (8), +.BR tipc-peer (8), .BR tipc-socket (8) .br .SH REPORTING BUGS diff --git a/man/man8/tipc-media.8 b/man/man8/tipc-media.8 index 6c6e2b15..4689cb3f 100644 --- a/man/man8/tipc-media.8 +++ b/man/man8/tipc-media.8 @@ -74,6 +74,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR tipc-link (8), .BR tipc-nametable (8), .BR tipc-node (8), +.BR tipc-peer (8), .BR tipc-socket (8) .br .SH REPORTING BUGS diff --git a/man/man8/tipc-nametable.8 b/man/man8/tipc-nametable.8 index d3397f97..4bcefe47 100644 --- a/man/man8/tipc-nametable.8 +++ b/man/man8/tipc-nametable.8 @@ -87,6 +87,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR tipc-link (8), .BR tipc-media (8), .BR tipc-node (8), +.BR tipc-peer (8), .BR tipc-socket (8) .br .SH REPORTING BUGS diff --git a/man/man8/tipc-node.8 b/man/man8/tipc-node.8 index ef32ec7c..a72a4099 100644 --- a/man/man8/tipc-node.8 +++ b/man/man8/tipc-node.8 @@ -59,6 +59,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR tipc-link (8), .BR tipc-media (8), .BR tipc-nametable (8), +.BR tipc-peer (8), .BR tipc-socket (8) .br .SH REPORTING BUGS diff --git a/man/man8/tipc-peer.8 b/man/man8/tipc-peer.8 new file mode 100644 index 00000000..430651f7 --- /dev/null +++ b/man/man8/tipc-peer.8 @@ -0,0 +1,52 @@ +.TH TIPC-PEER 8 "04 Dec 2015" "iproute2" "Linux" + +.\" For consistency, please keep padding right aligned. +.\" For example '.B "foo " bar' and not '.B foo " bar"' + +.SH NAME +tipc-peer \- modify peer information + +.SH SYNOPSIS +.ad l +.in +8 + +.ti -8 +.B tipc peer remove address +.IR ADDRESS + +.SH OPTIONS +Options (flags) that can be passed anywhere in the command chain. +.TP +.BR "\-h" , " --help" +Show help about last valid command. For example +.B tipc peer --help +will show peer help and +.B tipc --help +will show general help. The position of the option in the string is irrelevant. +.SH DESCRIPTION + +.SS Peer remove +Remove an offline peer node from the local data structures. The peer is +identified by its +.B address + +.SH EXIT STATUS +Exit status is 0 if command was successful or a positive integer upon failure. + +.SH SEE ALSO +.BR tipc (8), +.BR tipc-bearer (8), +.BR tipc-link (8), +.BR tipc-media (8), +.BR tipc-nametable (8), +.BR tipc-node (8), +.BR tipc-socket (8) +.br +.SH REPORTING BUGS +Report any bugs to the Network Developers mailing list +.B +where the development and maintenance is primarily done. +You do not have to be subscribed to the list to send a message there. + +.SH AUTHOR +Richard Alpe diff --git a/man/man8/tipc.8 b/man/man8/tipc.8 index c1165523..32943fa5 100644 --- a/man/man8/tipc.8 +++ b/man/man8/tipc.8 @@ -87,6 +87,7 @@ Exit status is 0 if command was successful or a positive integer upon failure. .BR tipc-media (8), .BR tipc-nametable (8), .BR tipc-node (8), +.BR tipc-peer (8), .BR tipc-socket (8) .br .SH REPORTING BUGS diff --git a/misc/lnstat_util.c b/misc/lnstat_util.c index 70a77c56..a2583665 100644 --- a/misc/lnstat_util.c +++ b/misc/lnstat_util.c @@ -172,8 +172,10 @@ static struct lnstat_file *alloc_and_open(const char *path, const char *file) /* allocate */ lf = malloc(sizeof(*lf)); - if (!lf) + if (!lf) { + fprintf(stderr, "out of memory\n"); return NULL; + } /* initialize */ memset(lf, 0, sizeof(*lf)); @@ -190,6 +192,7 @@ static struct lnstat_file *alloc_and_open(const char *path, const char *file) /* open */ lf->fp = fopen(lf->path, "r"); if (!lf->fp) { + perror(lf->path); free(lf); return NULL; } @@ -256,12 +259,16 @@ struct lnstat_file *lnstat_scan_dir(const char *path, const int num_req_files, continue; lf = alloc_and_open(path, de->d_name); - if (!lf) + if (!lf) { + closedir(dir); return NULL; + } /* fill in field structure */ - if (lnstat_scan_fields(lf) < 0) + if (lnstat_scan_fields(lf) < 0) { + closedir(dir); return NULL; + } /* prepend to global list */ lf->next = lnstat_files; diff --git a/tc/e_bpf.c b/tc/e_bpf.c index 218ba404..2d650a46 100644 --- a/tc/e_bpf.c +++ b/tc/e_bpf.c @@ -26,10 +26,19 @@ static char *argv_default[] = { BPF_DEFAULT_CMD, NULL }; static void explain(void) { - fprintf(stderr, "Usage: ... bpf [ import UDS_FILE ] [ run CMD ]\n\n"); + fprintf(stderr, "Usage: ... bpf [ import UDS_FILE ] [ run CMD ]\n"); + fprintf(stderr, " ... bpf [ debug ]\n"); + fprintf(stderr, " ... bpf [ graft MAP_FILE ] [ key KEY ]\n"); + fprintf(stderr, " `... [ object-file OBJ_FILE ] [ type TYPE ] [ section NAME ] [ verbose ]\n"); + fprintf(stderr, " `... [ object-pinned PROG_FILE ]\n"); + fprintf(stderr, "\n"); fprintf(stderr, "Where UDS_FILE provides the name of a unix domain socket file\n"); fprintf(stderr, "to import eBPF maps and the optional CMD denotes the command\n"); fprintf(stderr, "to be executed (default: \'%s\').\n", BPF_DEFAULT_CMD); + fprintf(stderr, "Where MAP_FILE points to a pinned map, OBJ_FILE to an object file\n"); + fprintf(stderr, "and PROG_FILE to a pinned program. TYPE can be {cls, act}, where\n"); + fprintf(stderr, "\'cls\' is default. KEY is optional and can be inferred from the\n"); + fprintf(stderr, "section name, otherwise it needs to be provided.\n"); } static int bpf_num_env_entries(void) @@ -58,17 +67,40 @@ static int parse_bpf(struct exec_util *eu, int argc, char **argv) NEXT_ARG(); argv_run = argv; break; - } else if (matches(*argv, "import") == 0 || - matches(*argv, "imp") == 0) { + } else if (matches(*argv, "import") == 0) { NEXT_ARG(); bpf_uds_name = *argv; + } else if (matches(*argv, "debug") == 0 || + matches(*argv, "dbg") == 0) { + if (bpf_trace_pipe()) + fprintf(stderr, + "No trace pipe, tracefs not mounted?\n"); + return -1; + } else if (matches(*argv, "graft") == 0) { + const char *bpf_map_path; + bool has_key = false; + uint32_t key; + + NEXT_ARG(); + bpf_map_path = *argv; + NEXT_ARG(); + if (matches(*argv, "key") == 0) { + NEXT_ARG(); + if (get_unsigned(&key, *argv, 0)) { + fprintf(stderr, "Illegal \"key\"\n"); + return -1; + } + has_key = true; + NEXT_ARG(); + } + return bpf_graft_map(bpf_map_path, has_key ? + &key : NULL, argc, argv); } else { explain(); return -1; } - argc--; - argv++; + NEXT_ARG_FWD(); } if (!bpf_uds_name) { @@ -142,6 +174,6 @@ err: } struct exec_util bpf_exec_util = { - .id = "bpf", - .parse_eopt = parse_bpf, + .id = "bpf", + .parse_eopt = parse_bpf, }; diff --git a/tc/f_bpf.c b/tc/f_bpf.c index ac77af58..afc2e582 100644 --- a/tc/f_bpf.c +++ b/tc/f_bpf.c @@ -11,19 +11,8 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + +#include #include "utils.h" #include "tc_util.h" @@ -31,6 +20,13 @@ static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_CLS; +static const int nla_tbl[BPF_NLA_MAX] = { + [BPF_NLA_OPS_LEN] = TCA_BPF_OPS_LEN, + [BPF_NLA_OPS] = TCA_BPF_OPS, + [BPF_NLA_FD] = TCA_BPF_FD, + [BPF_NLA_NAME] = TCA_BPF_NAME, +}; + static void explain(void) { fprintf(stderr, "Usage: ... bpf ...\n"); @@ -42,6 +38,7 @@ static void explain(void) fprintf(stderr, "eBPF use case:\n"); fprintf(stderr, " object-file FILE [ section CLS_NAME ] [ export UDS_FILE ]"); fprintf(stderr, " [ verbose ] [ direct-action ]\n"); + fprintf(stderr, " object-pinned FILE [ direct-action ]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Common remaining options:\n"); fprintf(stderr, " [ action ACTION_SPEC ]\n"); @@ -51,7 +48,8 @@ static void explain(void) fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n"); - fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode.\n"); + fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode, or a\n"); + fprintf(stderr, "pinned eBPF program.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where CLS_NAME refers to the section name containing the\n"); fprintf(stderr, "classifier (default \'%s\').\n", bpf_default_section(bpf_type)); @@ -66,119 +64,38 @@ static void explain(void) static int bpf_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n) { + const char *bpf_obj = NULL, *bpf_uds_name = NULL; struct tcmsg *t = NLMSG_DATA(n); - const char *bpf_uds_name = NULL; - const char *bpf_sec_name = NULL; unsigned int bpf_flags = 0; - char *bpf_obj = NULL; - struct rtattr *tail; bool seen_run = false; - long h = 0; + struct rtattr *tail; int ret = 0; if (argc == 0) return 0; if (handle) { - h = strtol(handle, NULL, 0); - if (h == LONG_MIN || h == LONG_MAX) { - fprintf(stderr, "Illegal handle \"%s\", must be " - "numeric.\n", handle); + if (get_u32(&t->tcm_handle, handle, 0)) { + fprintf(stderr, "Illegal \"handle\"\n"); return -1; } } - t->tcm_handle = h; - tail = (struct rtattr *)(((void *)n) + NLMSG_ALIGN(n->nlmsg_len)); addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0); while (argc > 0) { if (matches(*argv, "run") == 0) { - struct sock_filter bpf_ops[BPF_MAXINSNS]; - bool from_file, ebpf, bpf_verbose; - int ret; - NEXT_ARG(); opt_bpf: - bpf_sec_name = bpf_default_section(bpf_type); - bpf_verbose = false; - ebpf = false; seen_run = true; - - if (strcmp(*argv, "bytecode-file") == 0 || - strcmp(*argv, "bcf") == 0) { - from_file = true; - } else if (strcmp(*argv, "bytecode") == 0 || - strcmp(*argv, "bc") == 0) { - from_file = false; - } else if (strcmp(*argv, "object-file") == 0 || - strcmp(*argv, "obj") == 0) { - ebpf = true; - } else { - fprintf(stderr, "What is \"%s\"?\n", *argv); - explain(); + if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type, + &bpf_obj, &bpf_uds_name, n)) { + fprintf(stderr, "Failed to retrieve (e)BPF data!\n"); return -1; } - - NEXT_ARG(); - if (ebpf) { - bpf_uds_name = getenv(BPF_ENV_UDS); - bpf_obj = *argv; - - NEXT_ARG_FWD(); - - if (argc > 0 && - (strcmp(*argv, "section") == 0 || - strcmp(*argv, "sec") == 0)) { - NEXT_ARG(); - bpf_sec_name = *argv; - NEXT_ARG_FWD(); - } - if (argc > 0 && !bpf_uds_name && - (strcmp(*argv, "export") == 0 || - strcmp(*argv, "exp") == 0)) { - NEXT_ARG(); - bpf_uds_name = *argv; - NEXT_ARG_FWD(); - } - if (argc > 0 && - (strcmp(*argv, "verbose") == 0 || - strcmp(*argv, "verb") == 0)) { - bpf_verbose = true; - NEXT_ARG_FWD(); - } - - PREV_ARG(); - } - - ret = ebpf ? bpf_open_object(bpf_obj, bpf_type, bpf_sec_name, - bpf_verbose) : - bpf_parse_ops(argc, argv, bpf_ops, from_file); - if (ret < 0) { - fprintf(stderr, "%s\n", ebpf ? - "Could not load object" : - "Illegal \"bytecode\""); - return -1; - } - - if (ebpf) { - char bpf_name[256]; - - bpf_obj = basename(bpf_obj); - - snprintf(bpf_name, sizeof(bpf_name), "%s:[%s]", - bpf_obj, bpf_sec_name); - - addattr32(n, MAX_MSG, TCA_BPF_FD, ret); - addattrstrz(n, MAX_MSG, TCA_BPF_NAME, bpf_name); - } else { - addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, ret); - addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, - ret * sizeof(struct sock_filter)); - } } else if (matches(*argv, "classid") == 0 || - strcmp(*argv, "flowid") == 0) { + matches(*argv, "flowid") == 0) { unsigned int handle; NEXT_ARG(); @@ -204,7 +121,7 @@ opt_bpf: return -1; } continue; - } else if (strcmp(*argv, "help") == 0) { + } else if (matches(*argv, "help") == 0) { explain(); return -1; } else { @@ -280,7 +197,7 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f, } struct filter_util bpf_filter_util = { - .id = "bpf", - .parse_fopt = bpf_parse_opt, - .print_fopt = bpf_print_opt, + .id = "bpf", + .parse_fopt = bpf_parse_opt, + .print_fopt = bpf_print_opt, }; diff --git a/tc/m_bpf.c b/tc/m_bpf.c index fb4c3c7f..c5e2fa5b 100644 --- a/tc/m_bpf.c +++ b/tc/m_bpf.c @@ -12,20 +12,23 @@ #include #include -#include -#include -#include -#include + #include #include #include "utils.h" -#include "rt_names.h" #include "tc_util.h" #include "tc_bpf.h" static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_ACT; +static const int nla_tbl[BPF_NLA_MAX] = { + [BPF_NLA_OPS_LEN] = TCA_ACT_BPF_OPS_LEN, + [BPF_NLA_OPS] = TCA_ACT_BPF_OPS, + [BPF_NLA_FD] = TCA_ACT_BPF_FD, + [BPF_NLA_NAME] = TCA_ACT_BPF_NAME, +}; + static void explain(void) { fprintf(stderr, "Usage: ... bpf ... [ index INDEX ]\n"); @@ -37,12 +40,14 @@ static void explain(void) fprintf(stderr, "eBPF use case:\n"); fprintf(stderr, " object-file FILE [ section ACT_NAME ] [ export UDS_FILE ]"); fprintf(stderr, " [ verbose ]\n"); + fprintf(stderr, " object-pinned FILE\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n"); fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n"); - fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode.\n"); + fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode, or a\n"); + fprintf(stderr, "pinned eBPF program.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where ACT_NAME refers to the section name containing the\n"); fprintf(stderr, "action (default \'%s\').\n", bpf_default_section(bpf_type)); @@ -54,114 +59,40 @@ static void explain(void) fprintf(stderr, "explicitly specifies an action index upon creation.\n"); } -static void usage(void) +static int bpf_parse_opt(struct action_util *a, int *ptr_argc, char ***ptr_argv, + int tca_id, struct nlmsghdr *n) { - explain(); - exit(-1); -} - -static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p, - int tca_id, struct nlmsghdr *n) -{ - char **argv = *argv_p, bpf_name[256]; + const char *bpf_obj = NULL, *bpf_uds_name = NULL; + struct tc_act_bpf parm; + bool seen_run = false; struct rtattr *tail; - struct tc_act_bpf parm = { 0 }; - struct sock_filter bpf_ops[BPF_MAXINSNS]; - bool ebpf_fill = false, bpf_fill = false; - bool ebpf = false, seen_run = false; - const char *bpf_uds_name = NULL; - const char *bpf_sec_name = NULL; - char *bpf_obj = NULL; - int argc = *argc_p, ret = 0; - __u16 bpf_len = 0; - __u32 bpf_fd = 0; + int argc, ret = 0; + char **argv; + + argv = *ptr_argv; + argc = *ptr_argc; if (matches(*argv, "bpf") != 0) return -1; NEXT_ARG(); + tail = NLMSG_TAIL(n); + addattr_l(n, MAX_MSG, tca_id, NULL, 0); + while (argc > 0) { if (matches(*argv, "run") == 0) { - bool from_file, bpf_verbose; - int ret; - NEXT_ARG(); opt_bpf: - bpf_sec_name = bpf_default_section(bpf_type); - bpf_verbose = false; seen_run = true; - - if (strcmp(*argv, "bytecode-file") == 0 || - strcmp(*argv, "bcf") == 0) { - from_file = true; - } else if (strcmp(*argv, "bytecode") == 0 || - strcmp(*argv, "bc") == 0) { - from_file = false; - } else if (strcmp(*argv, "object-file") == 0 || - strcmp(*argv, "obj") == 0) { - ebpf = true; - } else { - fprintf(stderr, "unexpected \"%s\"\n", *argv); - explain(); + if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type, + &bpf_obj, &bpf_uds_name, n)) { + fprintf(stderr, "Failed to retrieve (e)BPF data!\n"); return -1; } - - NEXT_ARG(); - if (ebpf) { - bpf_uds_name = getenv(BPF_ENV_UDS); - bpf_obj = *argv; - - NEXT_ARG_FWD(); - - if (argc > 0 && - (strcmp(*argv, "section") == 0 || - strcmp(*argv, "sec") == 0)) { - NEXT_ARG(); - bpf_sec_name = *argv; - NEXT_ARG_FWD(); - } - if (argc > 0 && !bpf_uds_name && - (strcmp(*argv, "export") == 0 || - strcmp(*argv, "exp") == 0)) { - NEXT_ARG(); - bpf_uds_name = *argv; - NEXT_ARG_FWD(); - } - if (argc > 0 && - (strcmp(*argv, "verbose") == 0 || - strcmp(*argv, "verb") == 0)) { - bpf_verbose = true; - NEXT_ARG_FWD(); - } - - PREV_ARG(); - } - - ret = ebpf ? bpf_open_object(bpf_obj, bpf_type, bpf_sec_name, - bpf_verbose) : - bpf_parse_ops(argc, argv, bpf_ops, from_file); - if (ret < 0) { - fprintf(stderr, "%s\n", ebpf ? - "Could not load object" : - "Illegal \"bytecode\""); - return -1; - } - - if (ebpf) { - bpf_obj = basename(bpf_obj); - - snprintf(bpf_name, sizeof(bpf_name), "%s:[%s]", - bpf_obj, bpf_sec_name); - - bpf_fd = ret; - ebpf_fill = true; - } else { - bpf_len = ret; - bpf_fill = true; - } } else if (matches(*argv, "help") == 0) { - usage(); + explain(); + return -1; } else if (matches(*argv, "index") == 0) { break; } else { @@ -173,7 +104,9 @@ opt_bpf: NEXT_ARG_FWD(); } + memset(&parm, 0, sizeof(parm)); parm.action = TC_ACT_PIPE; + if (argc) { if (matches(*argv, "reclassify") == 0) { parm.action = TC_ACT_RECLASSIFY; @@ -207,32 +140,19 @@ opt_bpf: } } - tail = NLMSG_TAIL(n); - - addattr_l(n, MAX_MSG, tca_id, NULL, 0); addattr_l(n, MAX_MSG, TCA_ACT_BPF_PARMS, &parm, sizeof(parm)); - - if (ebpf_fill) { - addattr32(n, MAX_MSG, TCA_ACT_BPF_FD, bpf_fd); - addattrstrz(n, MAX_MSG, TCA_ACT_BPF_NAME, bpf_name); - } else if (bpf_fill) { - addattr16(n, MAX_MSG, TCA_ACT_BPF_OPS_LEN, bpf_len); - addattr_l(n, MAX_MSG, TCA_ACT_BPF_OPS, &bpf_ops, - bpf_len * sizeof(struct sock_filter)); - } - tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail; - *argc_p = argc; - *argv_p = argv; - if (bpf_uds_name) ret = bpf_send_map_fds(bpf_uds_name, bpf_obj); + *ptr_argc = argc; + *ptr_argv = argv; + return ret; } -static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg) +static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg) { struct rtattr *tb[TCA_ACT_BPF_MAX + 1]; struct tc_act_bpf *parm; @@ -249,7 +169,6 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg) } parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]); - fprintf(f, "bpf "); if (tb[TCA_ACT_BPF_NAME]) @@ -276,12 +195,11 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg) } fprintf(f, "\n "); - return 0; } struct action_util bpf_action_util = { - .id = "bpf", - .parse_aopt = parse_bpf, - .print_aopt = print_bpf, + .id = "bpf", + .parse_aopt = bpf_parse_opt, + .print_aopt = bpf_print_opt, }; diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c index 276871a5..f9b2b007 100644 --- a/tc/tc_bpf.c +++ b/tc/tc_bpf.c @@ -20,18 +20,25 @@ #include #include #include -#include -#include -#include -#include -#include -#include #ifdef HAVE_ELF #include #include #endif +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + #include "utils.h" #include "bpf_elf.h" @@ -40,9 +47,51 @@ #include "tc_util.h" #include "tc_bpf.h" -int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, - char **bpf_string, bool *need_release, - const char separator) +#ifdef HAVE_ELF +static int bpf_obj_open(const char *path, enum bpf_prog_type type, + const char *sec, bool verbose); +#else +static int bpf_obj_open(const char *path, enum bpf_prog_type type, + const char *sec, bool verbose) +{ + fprintf(stderr, "No ELF library support compiled in.\n"); + errno = ENOSYS; + return -1; +} +#endif + +static inline __u64 bpf_ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +static int bpf(int cmd, union bpf_attr *attr, unsigned int size) +{ +#ifdef __NR_bpf + return syscall(__NR_bpf, cmd, attr, size); +#else + fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); + errno = ENOSYS; + return -1; +#endif +} + +static int bpf_map_update(int fd, const void *key, const void *value, + uint64_t flags) +{ + union bpf_attr attr = { + .map_fd = fd, + .key = bpf_ptr_to_u64(key), + .value = bpf_ptr_to_u64(value), + .flags = flags, + }; + + return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); +} + +static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator) { char sp; @@ -90,8 +139,8 @@ int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, return 0; } -int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, - bool from_file) +static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops, + bool from_file) { char *bpf_string, *token, separator = ','; int ret = 0, i = 0; @@ -135,7 +184,6 @@ int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, goto out; } ret = bpf_len; - out: if (need_release) free(bpf_string); @@ -161,6 +209,246 @@ void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) ops[i].jf, ops[i].k); } +static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, + int length) +{ + char file[PATH_MAX], buff[4096]; + struct bpf_elf_map tmp, zero; + unsigned int val; + FILE *fp; + + snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "No procfs support?!\n"); + return -EIO; + } + + memset(&tmp, 0, sizeof(tmp)); + while (fgets(buff, sizeof(buff), fp)) { + if (sscanf(buff, "map_type:\t%u", &val) == 1) + tmp.type = val; + else if (sscanf(buff, "key_size:\t%u", &val) == 1) + tmp.size_key = val; + else if (sscanf(buff, "value_size:\t%u", &val) == 1) + tmp.size_value = val; + else if (sscanf(buff, "max_entries:\t%u", &val) == 1) + tmp.max_elem = val; + } + + fclose(fp); + + if (!memcmp(&tmp, map, length)) { + return 0; + } else { + memset(&zero, 0, sizeof(zero)); + /* If kernel doesn't have eBPF-related fdinfo, we cannot do much, + * so just accept it. We know we do have an eBPF fd and in this + * case, everything is 0. It is guaranteed that no such map exists + * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC. + */ + if (!memcmp(&tmp, &zero, length)) + return 0; + + fprintf(stderr, "Map specs from pinned file differ!\n"); + return -EINVAL; + } +} + +static int bpf_mnt_fs(const char *target) +{ + bool bind_done = false; + + while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) { + if (errno != EINVAL || bind_done) { + fprintf(stderr, "mount --make-private %s failed: %s\n", + target, strerror(errno)); + return -1; + } + + if (mount(target, target, "none", MS_BIND, NULL)) { + fprintf(stderr, "mount --bind %s %s failed: %s\n", + target, target, strerror(errno)); + return -1; + } + + bind_done = true; + } + + if (mount("bpf", target, "bpf", 0, NULL)) { + fprintf(stderr, "mount -t bpf bpf %s failed: %s\n", + target, strerror(errno)); + return -1; + } + + return 0; +} + +static int bpf_valid_mntpt(const char *mnt, unsigned long magic) +{ + struct statfs st_fs; + + if (statfs(mnt, &st_fs) < 0) + return -ENOENT; + if ((unsigned long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, + char *mnt, int len, + const char * const *known_mnts) +{ + const char * const *ptr; + char type[100]; + FILE *fp; + + if (known_mnts) { + ptr = known_mnts; + while (*ptr) { + if (bpf_valid_mntpt(*ptr, magic) == 0) { + strncpy(mnt, *ptr, len - 1); + mnt[len - 1] = 0; + return mnt; + } + ptr++; + } + } + + fp = fopen("/proc/mounts", "r"); + if (fp == NULL || len != PATH_MAX) + return NULL; + + while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", + mnt, type) == 2) { + if (strcmp(type, fstype) == 0) + break; + } + + fclose(fp); + if (strcmp(type, fstype) != 0) + return NULL; + + return mnt; +} + +int bpf_trace_pipe(void) +{ + char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT; + static const char * const tracefs_known_mnts[] = { + TRACE_DIR_MNT, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, + }; + char tpipe[PATH_MAX]; + const char *mnt; + int fd; + + mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt, + sizeof(tracefs_mnt), tracefs_known_mnts); + if (!mnt) { + fprintf(stderr, "tracefs not mounted?\n"); + return -1; + } + + snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt); + + fd = open(tpipe, O_RDONLY); + if (fd < 0) + return -1; + + fprintf(stderr, "Running! Hang up with ^C!\n\n"); + while (1) { + static char buff[4096]; + ssize_t ret; + + ret = read(fd, buff, sizeof(buff) - 1); + if (ret > 0) { + write(2, buff, ret); + fflush(stderr); + } + } + + return 0; +} + +static const char *bpf_get_tc_dir(void) +{ + static bool bpf_mnt_cached = false; + static char bpf_tc_dir[PATH_MAX]; + static const char *mnt; + static const char * const bpf_known_mnts[] = { + BPF_DIR_MNT, + 0, + }; + char bpf_mnt[PATH_MAX] = BPF_DIR_MNT; + char bpf_glo_dir[PATH_MAX]; + int ret; + + if (bpf_mnt_cached) + goto done; + + mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt), + bpf_known_mnts); + if (!mnt) { + mnt = getenv(BPF_ENV_MNT); + if (!mnt) + mnt = BPF_DIR_MNT; + ret = bpf_mnt_fs(mnt); + if (ret) { + mnt = NULL; + goto out; + } + } + + snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC); + ret = mkdir(bpf_tc_dir, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir, + strerror(errno)); + mnt = NULL; + goto out; + } + + snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s", + bpf_tc_dir, BPF_DIR_GLOBALS); + ret = mkdir(bpf_glo_dir, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir, + strerror(errno)); + mnt = NULL; + goto out; + } + + mnt = bpf_tc_dir; +out: + bpf_mnt_cached = true; +done: + return mnt; +} + +static int bpf_obj_get(const char *pathname) +{ + union bpf_attr attr; + char tmp[PATH_MAX]; + + if (strlen(pathname) > 2 && pathname[0] == 'm' && + pathname[1] == ':' && bpf_get_tc_dir()) { + snprintf(tmp, sizeof(tmp), "%s/%s", + bpf_get_tc_dir(), pathname + 2); + pathname = tmp; + } + + memset(&attr, 0, sizeof(attr)); + attr.pathname = bpf_ptr_to_u64(pathname); + + return bpf(BPF_OBJ_GET, &attr, sizeof(attr)); +} + const char *bpf_default_section(const enum bpf_prog_type type) { switch (type) { @@ -173,18 +461,262 @@ const char *bpf_default_section(const enum bpf_prog_type type) } } +enum bpf_mode { + CBPF_BYTECODE = 0, + CBPF_FILE, + EBPF_OBJECT, + EBPF_PINNED, + __BPF_MODE_MAX, +#define BPF_MODE_MAX __BPF_MODE_MAX +}; + +static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl, + enum bpf_prog_type *type, enum bpf_mode *mode, + const char **ptr_object, const char **ptr_section, + const char **ptr_uds_name, struct sock_filter *opcodes) +{ + const char *file, *section, *uds_name; + bool verbose = false; + int ret, argc; + char **argv; + + argv = *ptr_argv; + argc = *ptr_argc; + + if (opt_tbl[CBPF_BYTECODE] && + (matches(*argv, "bytecode") == 0 || + strcmp(*argv, "bc") == 0)) { + *mode = CBPF_BYTECODE; + } else if (opt_tbl[CBPF_FILE] && + (matches(*argv, "bytecode-file") == 0 || + strcmp(*argv, "bcf") == 0)) { + *mode = CBPF_FILE; + } else if (opt_tbl[EBPF_OBJECT] && + (matches(*argv, "object-file") == 0 || + strcmp(*argv, "obj") == 0)) { + *mode = EBPF_OBJECT; + } else if (opt_tbl[EBPF_PINNED] && + (matches(*argv, "object-pinned") == 0 || + matches(*argv, "pinned") == 0 || + matches(*argv, "fd") == 0)) { + *mode = EBPF_PINNED; + } else { + fprintf(stderr, "What mode is \"%s\"?\n", *argv); + return -1; + } + + NEXT_ARG(); + file = section = uds_name = NULL; + if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) { + file = *argv; + NEXT_ARG_FWD(); + + if (*type == BPF_PROG_TYPE_UNSPEC) { + if (argc > 0 && matches(*argv, "type") == 0) { + NEXT_ARG(); + if (matches(*argv, "cls") == 0) { + *type = BPF_PROG_TYPE_SCHED_CLS; + } else if (matches(*argv, "act") == 0) { + *type = BPF_PROG_TYPE_SCHED_ACT; + } else { + fprintf(stderr, "What type is \"%s\"?\n", + *argv); + return -1; + } + NEXT_ARG_FWD(); + } else { + *type = BPF_PROG_TYPE_SCHED_CLS; + } + } + + section = bpf_default_section(*type); + if (argc > 0 && matches(*argv, "section") == 0) { + NEXT_ARG(); + section = *argv; + NEXT_ARG_FWD(); + } + + uds_name = getenv(BPF_ENV_UDS); + if (argc > 0 && !uds_name && + matches(*argv, "export") == 0) { + NEXT_ARG(); + uds_name = *argv; + NEXT_ARG_FWD(); + } + + if (argc > 0 && matches(*argv, "verbose") == 0) { + verbose = true; + NEXT_ARG_FWD(); + } + + PREV_ARG(); + } + + if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE) + ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE); + else if (*mode == EBPF_OBJECT) + ret = bpf_obj_open(file, *type, section, verbose); + else if (*mode == EBPF_PINNED) + ret = bpf_obj_get(file); + else + return -1; + + if (ptr_object) + *ptr_object = file; + if (ptr_section) + *ptr_section = section; + if (ptr_uds_name) + *ptr_uds_name = uds_name; + + *ptr_argc = argc; + *ptr_argv = argv; + + return ret; +} + +int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl, + enum bpf_prog_type type, const char **ptr_object, + const char **ptr_uds_name, struct nlmsghdr *n) +{ + struct sock_filter opcodes[BPF_MAXINSNS]; + const bool opt_tbl[BPF_MODE_MAX] = { + [CBPF_BYTECODE] = true, + [CBPF_FILE] = true, + [EBPF_OBJECT] = true, + [EBPF_PINNED] = true, + }; + char annotation[256]; + const char *section; + enum bpf_mode mode; + int ret; + + ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode, + ptr_object, §ion, ptr_uds_name, opcodes); + if (ret < 0) + return ret; + + if (mode == CBPF_BYTECODE || mode == CBPF_FILE) { + addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret); + addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes, + ret * sizeof(struct sock_filter)); + } + + if (mode == EBPF_OBJECT || mode == EBPF_PINNED) { + snprintf(annotation, sizeof(annotation), "%s:[%s]", + basename(*ptr_object), mode == EBPF_PINNED ? + "*fsobj" : section); + + addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret); + addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation); + } + + return 0; +} + +int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv) +{ + enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC; + const bool opt_tbl[BPF_MODE_MAX] = { + [CBPF_BYTECODE] = false, + [CBPF_FILE] = false, + [EBPF_OBJECT] = true, + [EBPF_PINNED] = true, + }; + const struct bpf_elf_map test = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .size_key = sizeof(int), + .size_value = sizeof(int), + }; + int ret, prog_fd, map_fd; + const char *section; + enum bpf_mode mode; + uint32_t map_key; + + prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode, + NULL, §ion, NULL, NULL); + if (prog_fd < 0) + return prog_fd; + if (key) { + map_key = *key; + } else { + ret = sscanf(section, "%*i/%i", &map_key); + if (ret != 1) { + fprintf(stderr, "Couldn\'t infer map key from section " + "name! Please provide \'key\' argument!\n"); + ret = -EINVAL; + goto out_prog; + } + } + + map_fd = bpf_obj_get(map_path); + if (map_fd < 0) { + fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n", + map_path, strerror(errno)); + ret = map_fd; + goto out_prog; + } + + ret = bpf_map_selfcheck_pinned(map_fd, &test, + offsetof(struct bpf_elf_map, max_elem)); + if (ret < 0) { + fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path); + goto out_map; + } + + ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY); + if (ret < 0) + fprintf(stderr, "Map update failed: %s\n", strerror(errno)); +out_map: + close(map_fd); +out_prog: + close(prog_fd); + return ret; +} + #ifdef HAVE_ELF +struct bpf_elf_prog { + enum bpf_prog_type type; + const struct bpf_insn *insns; + size_t size; + const char *license; +}; + +struct bpf_hash_entry { + unsigned int pinning; + const char *subpath; + struct bpf_hash_entry *next; +}; + +struct bpf_elf_ctx { + Elf *elf_fd; + GElf_Ehdr elf_hdr; + Elf_Data *sym_tab; + Elf_Data *str_tab; + int obj_fd; + int map_fds[ELF_MAX_MAPS]; + struct bpf_elf_map maps[ELF_MAX_MAPS]; + int sym_num; + int map_num; + bool *sec_done; + int sec_maps; + char license[ELF_MAX_LICENSE_LEN]; + enum bpf_prog_type type; + bool verbose; + struct bpf_elf_st stat; + struct bpf_hash_entry *ht[256]; +}; + struct bpf_elf_sec_data { - GElf_Shdr sec_hdr; - char *sec_name; - Elf_Data *sec_data; + GElf_Shdr sec_hdr; + Elf_Data *sec_data; + const char *sec_name; }; struct bpf_map_data { - int *fds; - const char *obj; - struct bpf_elf_st *st; - struct bpf_elf_map *ent; + int *fds; + const char *obj; + struct bpf_elf_st *st; + struct bpf_elf_map *ent; }; /* If we provide a small buffer with log level enabled, the kernel @@ -193,15 +725,8 @@ struct bpf_map_data { * verifier we still want to hand something descriptive to the user. */ static char bpf_log_buf[65536]; -static bool bpf_verbose; -static struct bpf_elf_st bpf_st; - -static int map_fds[ELF_MAX_MAPS]; -static struct bpf_elf_map map_ent[ELF_MAX_MAPS]; - -static void bpf_dump_error(const char *format, ...) __check_format_string(1, 2); -static void bpf_dump_error(const char *format, ...) +static __check_format_string(1, 2) void bpf_dump_error(const char *format, ...) { va_list vl; @@ -215,46 +740,7 @@ static void bpf_dump_error(const char *format, ...) } } -static void bpf_save_finfo(int file_fd) -{ - struct stat st; - int ret; - - memset(&bpf_st, 0, sizeof(bpf_st)); - - ret = fstat(file_fd, &st); - if (ret < 0) { - fprintf(stderr, "Stat of elf file failed: %s\n", - strerror(errno)); - return; - } - - bpf_st.st_dev = st.st_dev; - bpf_st.st_ino = st.st_ino; -} - -static void bpf_clear_finfo(void) -{ - memset(&bpf_st, 0, sizeof(bpf_st)); -} - -static bool bpf_may_skip_map_creation(int file_fd) -{ - struct stat st; - int ret; - - ret = fstat(file_fd, &st); - if (ret < 0) { - fprintf(stderr, "Stat of elf file failed: %s\n", - strerror(errno)); - return false; - } - - return (bpf_st.st_dev == st.st_dev) && - (bpf_st.st_ino == st.st_ino); -} - -static int bpf_create_map(enum bpf_map_type type, unsigned int size_key, +static int bpf_map_create(enum bpf_map_type type, unsigned int size_key, unsigned int size_value, unsigned int max_elem) { union bpf_attr attr = { @@ -267,135 +753,418 @@ static int bpf_create_map(enum bpf_map_type type, unsigned int size_key, return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } -static int bpf_update_map(int fd, const void *key, const void *value, - uint64_t flags) -{ - union bpf_attr attr = { - .map_fd = fd, - .key = bpf_ptr_to_u64(key), - .value = bpf_ptr_to_u64(value), - .flags = flags, - }; - - return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); -} - static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, - unsigned int len, const char *license) + size_t size, const char *license) { union bpf_attr attr = { .prog_type = type, .insns = bpf_ptr_to_u64(insns), - .insn_cnt = len / sizeof(struct bpf_insn), + .insn_cnt = size / sizeof(struct bpf_insn), .license = bpf_ptr_to_u64(license), .log_buf = bpf_ptr_to_u64(bpf_log_buf), .log_size = sizeof(bpf_log_buf), .log_level = 1, }; + if (getenv(BPF_ENV_NOLOG)) { + attr.log_buf = 0; + attr.log_size = 0; + attr.log_level = 0; + } + return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } -static int bpf_prog_attach(enum bpf_prog_type type, const char *sec, - const struct bpf_insn *insns, unsigned int size, - const char *license) +static int bpf_obj_pin(int fd, const char *pathname) { - int prog_fd = bpf_prog_load(type, insns, size, license); + union bpf_attr attr = { + .pathname = bpf_ptr_to_u64(pathname), + .bpf_fd = fd, + }; - if (prog_fd < 0 || bpf_verbose) { - bpf_dump_error("%s (section \'%s\'): %s\n", prog_fd < 0 ? - "BPF program rejected" : - "BPF program verification", - sec, strerror(errno)); + return bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); +} + +static int bpf_obj_hash(const char *object, uint8_t *out, size_t len) +{ + struct sockaddr_alg alg = { + .salg_family = AF_ALG, + .salg_type = "hash", + .salg_name = "sha1", + }; + int ret, cfd, ofd, ffd; + struct stat stbuff; + ssize_t size; + + if (!object || len != 20) + return -EINVAL; + + cfd = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (cfd < 0) { + fprintf(stderr, "Cannot get AF_ALG socket: %s\n", + strerror(errno)); + return cfd; } - return prog_fd; -} - -static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key, - unsigned int size_value, unsigned int max_elem) -{ - int map_fd = bpf_create_map(type, size_key, size_value, max_elem); - - if (map_fd < 0) - bpf_dump_error("BPF map rejected: %s\n", strerror(errno)); - - return map_fd; -} - -static void bpf_maps_init(void) -{ - int i; - - memset(map_ent, 0, sizeof(map_ent)); - for (i = 0; i < ARRAY_SIZE(map_fds); i++) - map_fds[i] = -1; -} - -static int bpf_maps_count(void) -{ - int i, count = 0; - - for (i = 0; i < ARRAY_SIZE(map_fds); i++) { - if (map_fds[i] < 0) - break; - count++; + ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg)); + if (ret < 0) { + fprintf(stderr, "Error binding socket: %s\n", strerror(errno)); + goto out_cfd; } - return count; -} - -static void bpf_maps_destroy(void) -{ - int i; - - memset(map_ent, 0, sizeof(map_ent)); - for (i = 0; i < ARRAY_SIZE(map_fds); i++) { - if (map_fds[i] >= 0) - close(map_fds[i]); - } -} - -static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps) -{ - int i, ret; - - for (i = 0; (i < num_maps) && (num_maps <= ARRAY_SIZE(map_fds)); i++) { - struct bpf_elf_map *map = &maps[i]; - - ret = bpf_map_attach(map->type, map->size_key, - map->size_value, map->max_elem); - if (ret < 0) - goto err_unwind; - - map_fds[i] = ret; + ofd = accept(cfd, NULL, 0); + if (ofd < 0) { + fprintf(stderr, "Error accepting socket: %s\n", + strerror(errno)); + ret = ofd; + goto out_cfd; } - return 0; + ffd = open(object, O_RDONLY); + if (ffd < 0) { + fprintf(stderr, "Error opening object %s: %s\n", + object, strerror(errno)); + ret = ffd; + goto out_ofd; + } -err_unwind: - bpf_maps_destroy(); + ret = fstat(ffd, &stbuff); + if (ret < 0) { + fprintf(stderr, "Error doing fstat: %s\n", + strerror(errno)); + goto out_ffd; + } + + size = sendfile(ofd, ffd, NULL, stbuff.st_size); + if (size != stbuff.st_size) { + fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n", + size, stbuff.st_size, strerror(errno)); + ret = -1; + goto out_ffd; + } + + size = read(ofd, out, len); + if (size != len) { + fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n", + size, len, strerror(errno)); + ret = -1; + } else { + ret = 0; + } +out_ffd: + close(ffd); +out_ofd: + close(ofd); +out_cfd: + close(cfd); return ret; } -static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index, - struct bpf_elf_sec_data *sec_data) +static const char *bpf_get_obj_uid(const char *pathname) { + static bool bpf_uid_cached = false; + static char bpf_uid[64]; + uint8_t tmp[20]; + int ret; + + if (bpf_uid_cached) + goto done; + + ret = bpf_obj_hash(pathname, tmp, sizeof(tmp)); + if (ret) { + fprintf(stderr, "Object hashing failed!\n"); + return NULL; + } + + hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid)); + bpf_uid_cached = true; +done: + return bpf_uid; +} + +static int bpf_init_env(const char *pathname) +{ + struct rlimit limit = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + /* Don't bother in case we fail! */ + setrlimit(RLIMIT_MEMLOCK, &limit); + + if (!bpf_get_tc_dir()) { + fprintf(stderr, "Continuing without mounted eBPF fs. " + "Too old kernel?\n"); + return 0; + } + + if (!bpf_get_obj_uid(pathname)) + return -1; + + return 0; +} + +static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx, + uint32_t pinning) +{ + struct bpf_hash_entry *entry; + + entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; + while (entry && entry->pinning != pinning) + entry = entry->next; + + return entry ? entry->subpath : NULL; +} + +static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx, + uint32_t pinning) +{ + switch (pinning) { + case PIN_OBJECT_NS: + case PIN_GLOBAL_NS: + return false; + case PIN_NONE: + return true; + default: + return !bpf_custom_pinning(ctx, pinning); + } +} + +static void bpf_make_pathname(char *pathname, size_t len, const char *name, + const struct bpf_elf_ctx *ctx, uint32_t pinning) +{ + switch (pinning) { + case PIN_OBJECT_NS: + snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(), + bpf_get_obj_uid(NULL), name); + break; + case PIN_GLOBAL_NS: + snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(), + BPF_DIR_GLOBALS, name); + break; + default: + snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(), + bpf_custom_pinning(ctx, pinning), name); + break; + } +} + +static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx, + uint32_t pinning) +{ + char pathname[PATH_MAX]; + + if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir()) + return 0; + + bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); + return bpf_obj_get(pathname); +} + +static int bpf_make_obj_path(void) +{ + char tmp[PATH_MAX]; + int ret; + + snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(), + bpf_get_obj_uid(NULL)); + + ret = mkdir(tmp, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno)); + return ret; + } + + return 0; +} + +static int bpf_make_custom_path(const char *todo) +{ + char tmp[PATH_MAX], rem[PATH_MAX], *sub; + int ret; + + snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir()); + snprintf(rem, sizeof(rem), "%s/", todo); + sub = strtok(rem, "/"); + + while (sub) { + if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX) + return -EINVAL; + + strcat(tmp, sub); + strcat(tmp, "/"); + + ret = mkdir(tmp, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", tmp, + strerror(errno)); + return ret; + } + + sub = strtok(NULL, "/"); + } + + return 0; +} + +static int bpf_place_pinned(int fd, const char *name, + const struct bpf_elf_ctx *ctx, uint32_t pinning) +{ + char pathname[PATH_MAX]; + const char *tmp; + int ret = 0; + + if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir()) + return 0; + + if (pinning == PIN_OBJECT_NS) + ret = bpf_make_obj_path(); + else if ((tmp = bpf_custom_pinning(ctx, pinning))) + ret = bpf_make_custom_path(tmp); + if (ret < 0) + return ret; + + bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); + return bpf_obj_pin(fd, pathname); +} + +static int bpf_prog_attach(const char *section, + const struct bpf_elf_prog *prog, bool verbose) +{ + int fd; + + /* We can add pinning here later as well, same as bpf_map_attach(). */ + errno = 0; + fd = bpf_prog_load(prog->type, prog->insns, prog->size, + prog->license); + if (fd < 0 || verbose) { + bpf_dump_error("Prog section \'%s\' (type:%u insns:%zu " + "license:\'%s\') %s%s (%d)!\n\n", + section, prog->type, + prog->size / sizeof(struct bpf_insn), + prog->license, fd < 0 ? "rejected: " : + "loaded", fd < 0 ? strerror(errno) : "", + fd < 0 ? errno : fd); + } + + return fd; +} + +static int bpf_map_attach(const char *name, const struct bpf_elf_map *map, + const struct bpf_elf_ctx *ctx, bool verbose) +{ + int fd, ret; + + fd = bpf_probe_pinned(name, ctx, map->pinning); + if (fd > 0) { + ret = bpf_map_selfcheck_pinned(fd, map, + offsetof(struct bpf_elf_map, + id)); + if (ret < 0) { + close(fd); + fprintf(stderr, "Map \'%s\' self-check failed!\n", + name); + return ret; + } + if (verbose) + fprintf(stderr, "Map \'%s\' loaded as pinned!\n", + name); + return fd; + } + + errno = 0; + fd = bpf_map_create(map->type, map->size_key, map->size_value, + map->max_elem); + if (fd < 0 || verbose) { + bpf_dump_error("Map \'%s\' (type:%u id:%u pinning:%u " + "ksize:%u vsize:%u max-elems:%u) %s%s (%d)!\n", + name, map->type, map->id, map->pinning, + map->size_key, map->size_value, map->max_elem, + fd < 0 ? "rejected: " : "loaded", fd < 0 ? + strerror(errno) : "", fd < 0 ? errno : fd); + if (fd < 0) + return fd; + } + + ret = bpf_place_pinned(fd, name, ctx, map->pinning); + if (ret < 0 && errno != EEXIST) { + fprintf(stderr, "Could not pin %s map: %s\n", name, + strerror(errno)); + close(fd); + return ret; + } + + return fd; +} + +#define __ELF_ST_BIND(x) ((x) >> 4) +#define __ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) + +static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx, + const GElf_Sym *sym) +{ + return ctx->str_tab->d_buf + sym->st_name; +} + +static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) +{ + GElf_Sym sym; + int i; + + for (i = 0; i < ctx->sym_num; i++) { + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + + if (__ELF_ST_BIND(sym.st_info) != STB_GLOBAL || + __ELF_ST_TYPE(sym.st_info) != STT_NOTYPE || + sym.st_shndx != ctx->sec_maps || + sym.st_value / sizeof(struct bpf_elf_map) != which) + continue; + + return bpf_str_tab_name(ctx, &sym); + } + + return NULL; +} + +static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx) +{ + const char *map_name; + int i, fd; + + for (i = 0; i < ctx->map_num; i++) { + map_name = bpf_map_fetch_name(ctx, i); + if (!map_name) + return -EIO; + + fd = bpf_map_attach(map_name, &ctx->maps[i], ctx, + ctx->verbose); + if (fd < 0) + return fd; + + ctx->map_fds[i] = fd; + } + + return 0; +} + +static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + Elf_Data *sec_edata; GElf_Shdr sec_hdr; Elf_Scn *sec_fd; - Elf_Data *sec_edata; char *sec_name; - memset(sec_data, 0, sizeof(*sec_data)); + memset(data, 0, sizeof(*data)); - sec_fd = elf_getscn(elf_fd, sec_index); + sec_fd = elf_getscn(ctx->elf_fd, section); if (!sec_fd) return -EINVAL; - if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) return -EIO; - sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx, + sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx, sec_hdr.sh_name); if (!sec_name || !sec_hdr.sh_size) return -ENOENT; @@ -404,16 +1173,131 @@ static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index, if (!sec_edata || elf_getdata(sec_fd, sec_edata)) return -EIO; - memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); - sec_data->sec_name = sec_name; - sec_data->sec_data = sec_edata; + memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); + data->sec_name = sec_name; + data->sec_data = sec_edata; return 0; } -static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo, - struct bpf_elf_sec_data *data_insn, - Elf_Data *sym_tab) +static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0) + return -EINVAL; + + ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map); + ctx->sec_maps = section; + ctx->sec_done[section] = true; + + if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) { + fprintf(stderr, "Too many BPF maps in ELF section!\n"); + return -ENOMEM; + } + + memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size); + return 0; +} + +static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + if (data->sec_data->d_size > sizeof(ctx->license)) + return -ENOMEM; + + memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size); + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->sym_tab = data->sec_data; + ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize; + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->str_tab = data->sec_data; + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx) +{ + struct bpf_elf_sec_data data; + int i, ret = -1; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0) + continue; + + if (!strcmp(data.sec_name, ELF_SECTION_MAPS)) + ret = bpf_fetch_maps(ctx, i, &data); + else if (!strcmp(data.sec_name, ELF_SECTION_LICENSE)) + ret = bpf_fetch_license(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_SYMTAB) + ret = bpf_fetch_symtab(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_STRTAB && + i != ctx->elf_hdr.e_shstrndx) + ret = bpf_fetch_strtab(ctx, i, &data); + if (ret < 0) { + fprintf(stderr, "Error parsing section %d! Perhaps" + "check with readelf -a?\n", i); + break; + } + } + + if (ctx->sym_tab && ctx->str_tab && ctx->sec_maps) { + ret = bpf_maps_attach_all(ctx); + if (ret < 0) { + fprintf(stderr, "Error loading maps into kernel!\n"); + return ret; + } + } + + return ret; +} + +static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section) +{ + struct bpf_elf_sec_data data; + struct bpf_elf_prog prog; + int ret, i, fd = -1; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + if (ctx->sec_done[i]) + continue; + + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0 || strcmp(data.sec_name, section)) + continue; + + memset(&prog, 0, sizeof(prog)); + prog.type = ctx->type; + prog.insns = data.sec_data->d_buf; + prog.size = data.sec_data->d_size; + prog.license = ctx->license; + + fd = bpf_prog_attach(section, &prog, ctx->verbose); + if (fd < 0) + continue; + + ctx->sec_done[i] = true; + break; + } + + return fd; +} + +static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, + struct bpf_elf_sec_data *data_relo, + struct bpf_elf_sec_data *data_insn) { Elf_Data *idata = data_insn->sec_data; GElf_Shdr *rhdr = &data_relo->sec_hdr; @@ -422,7 +1306,7 @@ static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo, unsigned int num_insns = idata->d_size / sizeof(*insns); for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { - unsigned int ioff, fnum; + unsigned int ioff, rmap; GElf_Rel relo; GElf_Sym sym; @@ -430,291 +1314,367 @@ static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo, return -EIO; ioff = relo.r_offset / sizeof(struct bpf_insn); - if (ioff >= num_insns) - return -EINVAL; - if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) + if (ioff >= num_insns || + insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) return -EINVAL; - if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) + if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) return -EIO; - fnum = sym.st_value / sizeof(struct bpf_elf_map); - if (fnum >= ARRAY_SIZE(map_fds)) + rmap = sym.st_value / sizeof(struct bpf_elf_map); + if (rmap >= ARRAY_SIZE(ctx->map_fds)) return -EINVAL; - if (map_fds[fnum] < 0) + if (!ctx->map_fds[rmap]) return -EINVAL; + if (ctx->verbose) + fprintf(stderr, "Map \'%s\' (%d) injected into prog " + "section \'%s\' at offset %u!\n", + bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap], + data_insn->sec_name, ioff); + insns[ioff].src_reg = BPF_PSEUDO_MAP_FD; - insns[ioff].imm = map_fds[fnum]; + insns[ioff].imm = ctx->map_fds[rmap]; } return 0; } -static int bpf_fetch_ancillary(int file_fd, Elf *elf_fd, GElf_Ehdr *elf_hdr, - bool *sec_done, char *license, unsigned int lic_len, - Elf_Data **sym_tab) +static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section) { - int sec_index, ret = -1; + struct bpf_elf_sec_data data_relo, data_insn; + struct bpf_elf_prog prog; + int ret, idx, i, fd = -1; - for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { - struct bpf_elf_sec_data data_anc; - - ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, - &data_anc); - if (ret < 0) - continue; - - /* Extract and load eBPF map fds. */ - if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS) && - !bpf_may_skip_map_creation(file_fd)) { - struct bpf_elf_map *maps; - unsigned int maps_num; - - if (data_anc.sec_data->d_size % sizeof(*maps) != 0) - return -EINVAL; - - maps = data_anc.sec_data->d_buf; - maps_num = data_anc.sec_data->d_size / sizeof(*maps); - memcpy(map_ent, maps, data_anc.sec_data->d_size); - - ret = bpf_maps_attach(maps, maps_num); - if (ret < 0) - return ret; - - sec_done[sec_index] = true; - } - /* Extract eBPF license. */ - else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) { - if (data_anc.sec_data->d_size > lic_len) - return -ENOMEM; - - sec_done[sec_index] = true; - memcpy(license, data_anc.sec_data->d_buf, - data_anc.sec_data->d_size); - } - /* Extract symbol table for relocations (map fd fixups). */ - else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) { - sec_done[sec_index] = true; - *sym_tab = data_anc.sec_data; - } - } - - return ret; -} - -static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done, - enum bpf_prog_type type, const char *sec, - const char *license, Elf_Data *sym_tab) -{ - int sec_index, prog_fd = -1; - - for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { - struct bpf_elf_sec_data data_relo, data_insn; - int ins_index, ret; - - /* Attach eBPF programs with relocation data (maps). */ - ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, - &data_relo); + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + ret = bpf_fill_section_data(ctx, i, &data_relo); if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) continue; - ins_index = data_relo.sec_hdr.sh_info; - - ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index, - &data_insn); - if (ret < 0) - continue; - if (strcmp(data_insn.sec_name, sec)) + idx = data_relo.sec_hdr.sh_info; + ret = bpf_fill_section_data(ctx, idx, &data_insn); + if (ret < 0 || strcmp(data_insn.sec_name, section)) continue; - ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab); + ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn); if (ret < 0) continue; - prog_fd = bpf_prog_attach(type, sec, data_insn.sec_data->d_buf, - data_insn.sec_data->d_size, license); - if (prog_fd < 0) + memset(&prog, 0, sizeof(prog)); + prog.type = ctx->type; + prog.insns = data_insn.sec_data->d_buf; + prog.size = data_insn.sec_data->d_size; + prog.license = ctx->license; + + fd = bpf_prog_attach(section, &prog, ctx->verbose); + if (fd < 0) continue; - sec_done[sec_index] = true; - sec_done[ins_index] = true; + ctx->sec_done[i] = true; + ctx->sec_done[idx] = true; break; } - return prog_fd; + return fd; } -static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done, - enum bpf_prog_type type, const char *sec, - const char *license) -{ - int sec_index, prog_fd = -1; - - for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { - struct bpf_elf_sec_data data_insn; - int ret; - - /* Attach eBPF programs without relocation data. */ - if (sec_done[sec_index]) - continue; - - ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, - &data_insn); - if (ret < 0) - continue; - if (strcmp(data_insn.sec_name, sec)) - continue; - - prog_fd = bpf_prog_attach(type, sec, data_insn.sec_data->d_buf, - data_insn.sec_data->d_size, license); - if (prog_fd < 0) - continue; - - sec_done[sec_index] = true; - break; - } - - return prog_fd; -} - -static int bpf_fetch_prog_sec(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done, - enum bpf_prog_type type, const char *sec, - const char *license, Elf_Data *sym_tab) +static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section) { int ret = -1; - if (sym_tab) - ret = bpf_fetch_prog_relo(elf_fd, elf_hdr, sec_done, type, - sec, license, sym_tab); + if (ctx->sym_tab) + ret = bpf_fetch_prog_relo(ctx, section); if (ret < 0) - ret = bpf_fetch_prog(elf_fd, elf_hdr, sec_done, type, sec, - license); + ret = bpf_fetch_prog(ctx, section); + return ret; } -static int bpf_fill_prog_arrays(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done, - enum bpf_prog_type type, const char *license, - Elf_Data *sym_tab) +static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id) { - int sec_index; + int i; - for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { - struct bpf_elf_sec_data data_insn; - int ret, map_id, key_id, prog_fd; + for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) + if (ctx->map_fds[i] && ctx->maps[i].id == id && + ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) + return i; + return -1; +} - if (sec_done[sec_index]) +static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx) +{ + struct bpf_elf_sec_data data; + uint32_t map_id, key_id; + int fd, i, ret, idx; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + if (ctx->sec_done[i]) continue; - ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, - &data_insn); + ret = bpf_fill_section_data(ctx, i, &data); if (ret < 0) continue; - ret = sscanf(data_insn.sec_name, "%i/%i", &map_id, &key_id); + ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id); if (ret != 2) continue; - if (map_id >= ARRAY_SIZE(map_fds) || map_fds[map_id] < 0) - return -ENOENT; - if (map_ent[map_id].type != BPF_MAP_TYPE_PROG_ARRAY || - map_ent[map_id].max_elem <= key_id) - return -EINVAL; + idx = bpf_find_map_by_id(ctx, map_id); + if (idx < 0) + continue; - prog_fd = bpf_fetch_prog_sec(elf_fd, elf_hdr, sec_done, - type, data_insn.sec_name, - license, sym_tab); - if (prog_fd < 0) + fd = bpf_fetch_prog_sec(ctx, data.sec_name); + if (fd < 0) return -EIO; - ret = bpf_update_map(map_fds[map_id], &key_id, &prog_fd, - BPF_ANY); + ret = bpf_map_update(ctx->map_fds[idx], &key_id, + &fd, BPF_ANY); if (ret < 0) return -ENOENT; - sec_done[sec_index] = true; + ctx->sec_done[i] = true; } return 0; } -int bpf_open_object(const char *path, enum bpf_prog_type type, - const char *sec, bool verbose) +static void bpf_save_finfo(struct bpf_elf_ctx *ctx) { - char license[ELF_MAX_LICENSE_LEN]; - int file_fd, prog_fd = -1, ret; - Elf_Data *sym_tab = NULL; - GElf_Ehdr elf_hdr; - bool *sec_done; - Elf *elf_fd; + struct stat st; + int ret; - if (elf_version(EV_CURRENT) == EV_NONE) - return -EINVAL; + memset(&ctx->stat, 0, sizeof(ctx->stat)); - file_fd = open(path, O_RDONLY, 0); - if (file_fd < 0) - return -errno; - - elf_fd = elf_begin(file_fd, ELF_C_READ, NULL); - if (!elf_fd) { - ret = -EINVAL; - goto out; + ret = fstat(ctx->obj_fd, &st); + if (ret < 0) { + fprintf(stderr, "Stat of elf file failed: %s\n", + strerror(errno)); + return; } - if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) { + ctx->stat.st_dev = st.st_dev; + ctx->stat.st_ino = st.st_ino; +} + +static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path) +{ + char buff[PATH_MAX]; + + while (fgets(buff, sizeof(buff), fp)) { + char *ptr = buff; + + while (*ptr == ' ' || *ptr == '\t') + ptr++; + + if (*ptr == '#' || *ptr == '\n' || *ptr == 0) + continue; + + if (sscanf(ptr, "%i %s\n", id, path) != 2 && + sscanf(ptr, "%i %s #", id, path) != 2) { + strcpy(path, ptr); + return -1; + } + + return 1; + } + + return 0; +} + +static bool bpf_pinning_reserved(uint32_t pinning) +{ + switch (pinning) { + case PIN_NONE: + case PIN_OBJECT_NS: + case PIN_GLOBAL_NS: + return true; + default: + return false; + } +} + +static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file) +{ + struct bpf_hash_entry *entry; + char subpath[PATH_MAX]; + uint32_t pinning; + FILE *fp; + int ret; + + fp = fopen(db_file, "r"); + if (!fp) + return; + + memset(subpath, 0, sizeof(subpath)); + while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) { + if (ret == -1) { + fprintf(stderr, "Database %s is corrupted at: %s\n", + db_file, subpath); + fclose(fp); + return; + } + + if (bpf_pinning_reserved(pinning)) { + fprintf(stderr, "Database %s, id %u is reserved - " + "ignoring!\n", db_file, pinning); + continue; + } + + entry = malloc(sizeof(*entry)); + if (!entry) { + fprintf(stderr, "No memory left for db entry!\n"); + continue; + } + + entry->pinning = pinning; + entry->subpath = strdup(subpath); + if (!entry->subpath) { + fprintf(stderr, "No memory left for db entry!\n"); + free(entry); + continue; + } + + entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; + ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry; + } + + fclose(fp); +} + +static void bpf_hash_destroy(struct bpf_elf_ctx *ctx) +{ + struct bpf_hash_entry *entry; + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) { + while ((entry = ctx->ht[i]) != NULL) { + ctx->ht[i] = entry->next; + free((char *)entry->subpath); + free(entry); + } + } +} + +static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname, + enum bpf_prog_type type, bool verbose) +{ + int ret = -EINVAL; + + if (elf_version(EV_CURRENT) == EV_NONE || + bpf_init_env(pathname)) + return ret; + + memset(ctx, 0, sizeof(*ctx)); + ctx->verbose = verbose; + ctx->type = type; + + ctx->obj_fd = open(pathname, O_RDONLY); + if (ctx->obj_fd < 0) + return ctx->obj_fd; + + ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL); + if (!ctx->elf_fd) { + ret = -EINVAL; + goto out_fd; + } + + if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) != + &ctx->elf_hdr) { ret = -EIO; goto out_elf; } - sec_done = calloc(elf_hdr.e_shnum, sizeof(*sec_done)); - if (!sec_done) { + ctx->sec_done = calloc(ctx->elf_hdr.e_shnum, + sizeof(*(ctx->sec_done))); + if (!ctx->sec_done) { ret = -ENOMEM; goto out_elf; } - memset(license, 0, sizeof(license)); - bpf_verbose = verbose; + bpf_save_finfo(ctx); + bpf_hash_init(ctx, CONFDIR "/bpf_pinning"); - if (!bpf_may_skip_map_creation(file_fd)) - bpf_maps_init(); + return 0; +out_elf: + elf_end(ctx->elf_fd); +out_fd: + close(ctx->obj_fd); + return ret; +} - ret = bpf_fetch_ancillary(file_fd, elf_fd, &elf_hdr, sec_done, - license, sizeof(license), &sym_tab); - if (ret < 0) - goto out_maps; +static int bpf_maps_count(struct bpf_elf_ctx *ctx) +{ + int i, count = 0; - prog_fd = bpf_fetch_prog_sec(elf_fd, &elf_hdr, sec_done, type, - sec, license, sym_tab); - if (prog_fd < 0) - goto out_maps; - - if (!bpf_may_skip_map_creation(file_fd)) { - ret = bpf_fill_prog_arrays(elf_fd, &elf_hdr, sec_done, - type, license, sym_tab); - if (ret < 0) - goto out_prog; + for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { + if (!ctx->map_fds[i]) + break; + count++; } - bpf_save_finfo(file_fd); + return count; +} - free(sec_done); +static void bpf_maps_teardown(struct bpf_elf_ctx *ctx) +{ + int i; - elf_end(elf_fd); - close(file_fd); + for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { + if (ctx->map_fds[i]) + close(ctx->map_fds[i]); + } +} - return prog_fd; +static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure) +{ + if (failure) + bpf_maps_teardown(ctx); -out_prog: - close(prog_fd); -out_maps: - bpf_maps_destroy(); - free(sec_done); -out_elf: - elf_end(elf_fd); + bpf_hash_destroy(ctx); + free(ctx->sec_done); + elf_end(ctx->elf_fd); + close(ctx->obj_fd); +} + +static struct bpf_elf_ctx __ctx; + +static int bpf_obj_open(const char *pathname, enum bpf_prog_type type, + const char *section, bool verbose) +{ + struct bpf_elf_ctx *ctx = &__ctx; + int fd = 0, ret; + + ret = bpf_elf_ctx_init(ctx, pathname, type, verbose); + if (ret < 0) { + fprintf(stderr, "Cannot initialize ELF context!\n"); + return ret; + } + + ret = bpf_fetch_ancillary(ctx); + if (ret < 0) { + fprintf(stderr, "Error fetching ELF ancillary data!\n"); + goto out; + } + + fd = bpf_fetch_prog_sec(ctx, section); + if (fd < 0) { + fprintf(stderr, "Error fetching program/map!\n"); + ret = fd; + goto out; + } + + ret = bpf_fill_prog_arrays(ctx); + if (ret < 0) + fprintf(stderr, "Error filling program arrays!\n"); out: - close(file_fd); - bpf_clear_finfo(); - return prog_fd; + bpf_elf_ctx_destroy(ctx, ret < 0); + if (ret < 0) { + if (fd) + close(fd); + return ret; + } + + return fd; } static int @@ -803,6 +1763,7 @@ bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux, int bpf_send_map_fds(const char *path, const char *obj) { + struct bpf_elf_ctx *ctx = &__ctx; struct sockaddr_un addr; struct bpf_map_data bpf_aux; int fd, ret; @@ -827,18 +1788,18 @@ int bpf_send_map_fds(const char *path, const char *obj) memset(&bpf_aux, 0, sizeof(bpf_aux)); - bpf_aux.fds = map_fds; - bpf_aux.ent = map_ent; - + bpf_aux.fds = ctx->map_fds; + bpf_aux.ent = ctx->maps; + bpf_aux.st = &ctx->stat; bpf_aux.obj = obj; - bpf_aux.st = &bpf_st; ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux, - bpf_maps_count()); + bpf_maps_count(ctx)); if (ret < 0) fprintf(stderr, "Cannot send fds to %s: %s\n", path, strerror(errno)); + bpf_maps_teardown(ctx); close(fd); return ret; } diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h index 2ad88121..526d0b12 100644 --- a/tc/tc_bpf.h +++ b/tc/tc_bpf.h @@ -13,61 +13,57 @@ #ifndef _TC_BPF_H_ #define _TC_BPF_H_ 1 -#include #include -#include #include -#include -#include -#include -#include +#include #include "utils.h" #include "bpf_scm.h" +enum { + BPF_NLA_OPS_LEN = 0, + BPF_NLA_OPS, + BPF_NLA_FD, + BPF_NLA_NAME, + __BPF_NLA_MAX, +}; + +#define BPF_NLA_MAX __BPF_NLA_MAX + #define BPF_ENV_UDS "TC_BPF_UDS" +#define BPF_ENV_MNT "TC_BPF_MNT" +#define BPF_ENV_NOLOG "TC_BPF_NOLOG" -int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, - char **bpf_string, bool *need_release, - const char separator); -int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, - bool from_file); -void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len); +#ifndef BPF_FS_MAGIC +# define BPF_FS_MAGIC 0xcafe4a11 +#endif +#define BPF_DIR_MNT "/sys/fs/bpf" + +#define BPF_DIR_TC "tc" +#define BPF_DIR_GLOBALS "globals" + +#ifndef TRACEFS_MAGIC +# define TRACEFS_MAGIC 0x74726163 +#endif + +#define TRACE_DIR_MNT "/sys/kernel/tracing" + +int bpf_trace_pipe(void); const char *bpf_default_section(const enum bpf_prog_type type); -#ifdef HAVE_ELF -int bpf_open_object(const char *path, enum bpf_prog_type type, - const char *sec, bool verbose); +int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl, + enum bpf_prog_type type, const char **ptr_object, + const char **ptr_uds_name, struct nlmsghdr *n); +int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv); +void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len); + +#ifdef HAVE_ELF int bpf_send_map_fds(const char *path, const char *obj); int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, unsigned int entries); - -static inline __u64 bpf_ptr_to_u64(const void *ptr) -{ - return (__u64) (unsigned long) ptr; -} - -static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size) -{ -#ifdef __NR_bpf - return syscall(__NR_bpf, cmd, attr, size); #else - fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); - errno = ENOSYS; - return -1; -#endif -} -#else -static inline int bpf_open_object(const char *path, enum bpf_prog_type type, - const char *sec, bool verbose) -{ - fprintf(stderr, "No ELF library support compiled in.\n"); - errno = ENOSYS; - return -1; -} - static inline int bpf_send_map_fds(const char *path, const char *obj) { return 0; diff --git a/tc/tc_monitor.c b/tc/tc_monitor.c index 097068e9..ebb94320 100644 --- a/tc/tc_monitor.c +++ b/tc/tc_monitor.c @@ -91,13 +91,17 @@ int do_tcmonitor(int argc, char **argv) } if (file) { - FILE *fp; - fp = fopen(file, "r"); + FILE *fp = fopen(file, "r"); + int ret; + if (fp == NULL) { perror("Cannot fopen"); exit(-1); } - return rtnl_from_file(fp, accept_tcmsg, (void*)stdout); + + ret = rtnl_from_file(fp, accept_tcmsg, stdout); + fclose(fp); + return ret; } if (rtnl_open(&rth, groups) < 0) diff --git a/tipc/Makefile b/tipc/Makefile index bc5ecfd3..f06dcb11 100644 --- a/tipc/Makefile +++ b/tipc/Makefile @@ -6,7 +6,7 @@ TIPCOBJ=bearer.o \ media.o misc.o \ msg.o nametable.o \ node.o socket.o \ - tipc.o + peer.o tipc.o include ../Config diff --git a/tipc/peer.c b/tipc/peer.c new file mode 100644 index 00000000..de0c73c3 --- /dev/null +++ b/tipc/peer.c @@ -0,0 +1,93 @@ +/* + * peer.c TIPC peer functionality. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Richard Alpe + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "cmdl.h" +#include "msg.h" +#include "misc.h" +#include "peer.h" + +static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + char *str; + uint32_t addr; + struct nlattr *nest; + char buf[MNL_SOCKET_BUFFER_SIZE]; + + if ((cmdl->argc != cmdl->optind + 1) || help_flag) { + fprintf(stderr, "Usage: %s peer remove address ADDRESS\n", + cmdl->argv[0]); + return -EINVAL; + } + + str = shift_cmdl(cmdl); + addr = str2addr(str); + if (!addr) + return -1; + + if (!(nlh = msg_init(buf, TIPC_NL_PEER_REMOVE))) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + + nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET); + mnl_attr_put_u32(nlh, TIPC_NLA_NET_ADDR, addr); + mnl_attr_nest_end(nlh, nest); + + return msg_doit(nlh, NULL, NULL); +} + +static void cmd_peer_rm_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s peer remove address ADDRESS\n", + cmdl->argv[0]); +} + +static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + const struct cmd cmds[] = { + { "address", cmd_peer_rm_addr, cmd_peer_rm_help }, + { NULL } + }; + + return run_cmd(nlh, cmd, cmds, cmdl, NULL); +} + +void cmd_peer_help(struct cmdl *cmdl) +{ + fprintf(stderr, + "Usage: %s peer COMMAND [ARGS] ...\n\n" + "COMMANDS\n" + " remove - Remove an offline peer node\n", + cmdl->argv[0]); +} + +int cmd_peer(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, + void *data) +{ + const struct cmd cmds[] = { + { "remove", cmd_peer_rm, cmd_peer_rm_help }, + { NULL } + }; + + return run_cmd(nlh, cmd, cmds, cmdl, NULL); +} diff --git a/tipc/peer.h b/tipc/peer.h new file mode 100644 index 00000000..89722616 --- /dev/null +++ b/tipc/peer.h @@ -0,0 +1,21 @@ +/* + * peer.h TIPC peer functionality. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Richard Alpe + */ + +#ifndef _TIPC_PEER_H +#define _TIPC_PEER_H + +extern int help_flag; + +int cmd_peer(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, + void *data); +void cmd_peer_help(struct cmdl *cmdl); + +#endif diff --git a/tipc/tipc.c b/tipc/tipc.c index 44398052..600d5e2a 100644 --- a/tipc/tipc.c +++ b/tipc/tipc.c @@ -20,6 +20,7 @@ #include "socket.h" #include "media.h" #include "node.h" +#include "peer.h" #include "cmdl.h" int help_flag; @@ -39,6 +40,7 @@ static void about(struct cmdl *cmdl) " media - Show or modify media\n" " nametable - Show nametable\n" " node - Show or modify node related parameters\n" + " peer - Peer related operations\n" " socket - Show sockets\n", cmdl->argv[0]); } @@ -59,6 +61,7 @@ int main(int argc, char *argv[]) { "media", cmd_media, cmd_media_help}, { "nametable", cmd_nametable, cmd_nametable_help}, { "node", cmd_node, cmd_node_help}, + { "peer", cmd_peer, cmd_peer_help}, { "socket", cmd_socket, cmd_socket_help}, { NULL } };