Merge branch 'master' into net-next
This commit is contained in:
commit
edadd6b076
|
|
@ -1,258 +0,0 @@
|
||||||
/*
|
|
||||||
* eBPF user space agent part
|
|
||||||
*
|
|
||||||
* Simple, _self-contained_ user space agent for the eBPF kernel
|
|
||||||
* ebpf_prog.c program, which gets all map fds passed from tc via unix
|
|
||||||
* domain socket in one transaction and can thus keep referencing
|
|
||||||
* them from user space in order to read out (or possibly modify)
|
|
||||||
* map data. Here, just as a minimal example to display counters.
|
|
||||||
*
|
|
||||||
* The agent only uses the bpf(2) syscall API to read or possibly
|
|
||||||
* write to eBPF maps, it doesn't need to be aware of the low-level
|
|
||||||
* bytecode parts and/or ELF parsing bits.
|
|
||||||
*
|
|
||||||
* ! For more details, see header comment in bpf_prog.c !
|
|
||||||
*
|
|
||||||
* gcc bpf_agent.c -o bpf_agent -Wall -O2
|
|
||||||
*
|
|
||||||
* For example, a more complex user space agent could run on each
|
|
||||||
* host, reading and writing into eBPF maps used by tc classifier
|
|
||||||
* and actions. It would thus allow for implementing a distributed
|
|
||||||
* tc architecture, for example, which would push down central
|
|
||||||
* policies into eBPF maps, and thus altering run-time behaviour.
|
|
||||||
*
|
|
||||||
* -- Happy eBPF hacking! ;)
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
#include <sys/un.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/socket.h>
|
|
||||||
|
|
||||||
/* Just some misc macros as min(), offsetof(), etc. */
|
|
||||||
#include "../../include/utils.h"
|
|
||||||
/* Common code from fd passing. */
|
|
||||||
#include "../../include/bpf_scm.h"
|
|
||||||
/* Common, shared definitions with ebpf_prog.c */
|
|
||||||
#include "bpf_shared.h"
|
|
||||||
/* Mini syscall wrapper */
|
|
||||||
#include "bpf_sys.h"
|
|
||||||
|
|
||||||
static void bpf_dump_drops(int fd)
|
|
||||||
{
|
|
||||||
int cpu, max;
|
|
||||||
|
|
||||||
max = sysconf(_SC_NPROCESSORS_ONLN);
|
|
||||||
|
|
||||||
printf(" `- number of drops:");
|
|
||||||
for (cpu = 0; cpu < max; cpu++) {
|
|
||||||
long drops;
|
|
||||||
|
|
||||||
assert(bpf_lookup_elem(fd, &cpu, &drops) == 0);
|
|
||||||
printf("\tcpu%d: %5ld", cpu, drops);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void bpf_dump_queue(int fd)
|
|
||||||
{
|
|
||||||
/* Just for the same of the example. */
|
|
||||||
int max_queue = 4, i;
|
|
||||||
|
|
||||||
printf(" | nic queues:");
|
|
||||||
for (i = 0; i < max_queue; i++) {
|
|
||||||
struct count_queue cq;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
memset(&cq, 0, sizeof(cq));
|
|
||||||
ret = bpf_lookup_elem(fd, &i, &cq);
|
|
||||||
assert(ret == 0 || (ret < 0 && errno == ENOENT));
|
|
||||||
|
|
||||||
printf("\tq%d:[pkts: %ld, mis: %ld]",
|
|
||||||
i, cq.total, cq.mismatch);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void bpf_dump_proto(int fd)
|
|
||||||
{
|
|
||||||
uint8_t protos[] = { IPPROTO_TCP, IPPROTO_UDP, IPPROTO_ICMP };
|
|
||||||
char *names[] = { "tcp", "udp", "icmp" };
|
|
||||||
int i;
|
|
||||||
|
|
||||||
printf(" ` protos:");
|
|
||||||
for (i = 0; i < ARRAY_SIZE(protos); i++) {
|
|
||||||
struct count_tuple ct;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
memset(&ct, 0, sizeof(ct));
|
|
||||||
ret = bpf_lookup_elem(fd, &protos[i], &ct);
|
|
||||||
assert(ret == 0 || (ret < 0 && errno == ENOENT));
|
|
||||||
|
|
||||||
printf("\t%s:[pkts: %ld, bytes: %ld]",
|
|
||||||
names[i], ct.packets, ct.bytes);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void bpf_dump_map_data(int *tfd)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < 30; i++) {
|
|
||||||
const int period = 5;
|
|
||||||
|
|
||||||
printf("data, period: %dsec\n", period);
|
|
||||||
|
|
||||||
bpf_dump_drops(tfd[BPF_MAP_ID_DROPS]);
|
|
||||||
bpf_dump_queue(tfd[BPF_MAP_ID_QUEUE]);
|
|
||||||
bpf_dump_proto(tfd[BPF_MAP_ID_PROTO]);
|
|
||||||
|
|
||||||
sleep(period);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void bpf_info_loop(int *fds, struct bpf_map_aux *aux)
|
|
||||||
{
|
|
||||||
int i, tfd[BPF_MAP_ID_MAX];
|
|
||||||
|
|
||||||
printf("ver: %d\nobj: %s\ndev: %lu\nino: %lu\nmaps: %u\n",
|
|
||||||
aux->uds_ver, aux->obj_name, aux->obj_st.st_dev,
|
|
||||||
aux->obj_st.st_ino, aux->num_ent);
|
|
||||||
|
|
||||||
for (i = 0; i < aux->num_ent; i++) {
|
|
||||||
printf("map%d:\n", i);
|
|
||||||
printf(" `- fd: %u\n", fds[i]);
|
|
||||||
printf(" | serial: %u\n", aux->ent[i].id);
|
|
||||||
printf(" | type: %u\n", aux->ent[i].type);
|
|
||||||
printf(" | max elem: %u\n", aux->ent[i].max_elem);
|
|
||||||
printf(" | size key: %u\n", aux->ent[i].size_key);
|
|
||||||
printf(" ` size val: %u\n", aux->ent[i].size_value);
|
|
||||||
|
|
||||||
tfd[aux->ent[i].id] = fds[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
bpf_dump_map_data(tfd);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void bpf_map_get_from_env(int *tfd)
|
|
||||||
{
|
|
||||||
char key[64], *val;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < BPF_MAP_ID_MAX; i++) {
|
|
||||||
memset(key, 0, sizeof(key));
|
|
||||||
snprintf(key, sizeof(key), "BPF_MAP%d", i);
|
|
||||||
|
|
||||||
val = getenv(key);
|
|
||||||
assert(val != NULL);
|
|
||||||
|
|
||||||
tfd[i] = atoi(val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
|
|
||||||
unsigned int entries)
|
|
||||||
{
|
|
||||||
struct bpf_map_set_msg msg;
|
|
||||||
int *cmsg_buf, min_fd, i;
|
|
||||||
char *amsg_buf, *mmsg_buf;
|
|
||||||
|
|
||||||
cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
|
|
||||||
amsg_buf = (char *)msg.aux.ent;
|
|
||||||
mmsg_buf = (char *)&msg.aux;
|
|
||||||
|
|
||||||
for (i = 0; i < entries; i += min_fd) {
|
|
||||||
struct cmsghdr *cmsg;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
|
|
||||||
|
|
||||||
bpf_map_set_init_single(&msg, min_fd);
|
|
||||||
|
|
||||||
ret = recvmsg(fd, &msg.hdr, 0);
|
|
||||||
if (ret <= 0)
|
|
||||||
return ret ? : -1;
|
|
||||||
|
|
||||||
cmsg = CMSG_FIRSTHDR(&msg.hdr);
|
|
||||||
if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
|
|
||||||
return -EINVAL;
|
|
||||||
if (msg.hdr.msg_flags & MSG_CTRUNC)
|
|
||||||
return -EIO;
|
|
||||||
|
|
||||||
min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
|
|
||||||
if (min_fd > entries || min_fd <= 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
|
|
||||||
memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
|
|
||||||
memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
|
|
||||||
|
|
||||||
if (i + min_fd == aux->num_ent)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
int fds[BPF_SCM_MAX_FDS];
|
|
||||||
struct bpf_map_aux aux;
|
|
||||||
struct sockaddr_un addr;
|
|
||||||
int fd, ret, i;
|
|
||||||
|
|
||||||
/* When arguments are being passed, we take it as a path
|
|
||||||
* to a Unix domain socket, otherwise we grab the fds
|
|
||||||
* from the environment to demonstrate both possibilities.
|
|
||||||
*/
|
|
||||||
if (argc == 1) {
|
|
||||||
int tfd[BPF_MAP_ID_MAX];
|
|
||||||
|
|
||||||
bpf_map_get_from_env(tfd);
|
|
||||||
bpf_dump_map_data(tfd);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
|
|
||||||
if (fd < 0) {
|
|
||||||
fprintf(stderr, "Cannot open socket: %s\n",
|
|
||||||
strerror(errno));
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&addr, 0, sizeof(addr));
|
|
||||||
addr.sun_family = AF_UNIX;
|
|
||||||
strncpy(addr.sun_path, argv[argc - 1], sizeof(addr.sun_path));
|
|
||||||
|
|
||||||
ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
|
|
||||||
if (ret < 0) {
|
|
||||||
fprintf(stderr, "Cannot bind to socket: %s\n",
|
|
||||||
strerror(errno));
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(fds, 0, sizeof(fds));
|
|
||||||
memset(&aux, 0, sizeof(aux));
|
|
||||||
|
|
||||||
ret = bpf_map_set_recv(fd, fds, &aux, BPF_SCM_MAX_FDS);
|
|
||||||
if (ret >= 0)
|
|
||||||
bpf_info_loop(fds, &aux);
|
|
||||||
|
|
||||||
for (i = 0; i < aux.num_ent; i++)
|
|
||||||
close(fds[i]);
|
|
||||||
|
|
||||||
close(fd);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
#include "../../include/bpf_api.h"
|
||||||
|
|
||||||
|
#define MAP_INNER_ID 42
|
||||||
|
|
||||||
|
struct bpf_elf_map __section_maps map_inner = {
|
||||||
|
.type = BPF_MAP_TYPE_ARRAY,
|
||||||
|
.size_key = sizeof(uint32_t),
|
||||||
|
.size_value = sizeof(uint32_t),
|
||||||
|
.id = MAP_INNER_ID,
|
||||||
|
.inner_idx = 0,
|
||||||
|
.pinning = PIN_GLOBAL_NS,
|
||||||
|
.max_elem = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_elf_map __section_maps map_outer = {
|
||||||
|
.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
|
||||||
|
.size_key = sizeof(uint32_t),
|
||||||
|
.size_value = sizeof(uint32_t),
|
||||||
|
.inner_id = MAP_INNER_ID,
|
||||||
|
.pinning = PIN_GLOBAL_NS,
|
||||||
|
.max_elem = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
__section("egress")
|
||||||
|
int emain(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct bpf_elf_map *map_inner;
|
||||||
|
int key = 0, *val;
|
||||||
|
|
||||||
|
map_inner = map_lookup_elem(&map_outer, &key);
|
||||||
|
if (map_inner) {
|
||||||
|
val = map_lookup_elem(map_inner, &key);
|
||||||
|
if (val)
|
||||||
|
lock_xadd(val, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return BPF_H_DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
__section("ingress")
|
||||||
|
int imain(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct bpf_elf_map *map_inner;
|
||||||
|
int key = 0, *val;
|
||||||
|
|
||||||
|
map_inner = map_lookup_elem(&map_outer, &key);
|
||||||
|
if (map_inner) {
|
||||||
|
val = map_lookup_elem(map_inner, &key);
|
||||||
|
if (val)
|
||||||
|
printt("map val: %d\n", *val);
|
||||||
|
}
|
||||||
|
|
||||||
|
return BPF_H_DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
BPF_LICENSE("GPL");
|
||||||
|
|
@ -1,501 +0,0 @@
|
||||||
/*
|
|
||||||
* eBPF kernel space program part
|
|
||||||
*
|
|
||||||
* Toy eBPF program for demonstration purposes, some parts derived from
|
|
||||||
* kernel tree's samples/bpf/sockex2_kern.c example.
|
|
||||||
*
|
|
||||||
* More background on eBPF, kernel tree: Documentation/networking/filter.txt
|
|
||||||
*
|
|
||||||
* Note, this file is rather large, and most classifier and actions are
|
|
||||||
* likely smaller to accomplish one specific use-case and are tailored
|
|
||||||
* for high performance. For performance reasons, you might also have the
|
|
||||||
* classifier and action already merged inside the classifier.
|
|
||||||
*
|
|
||||||
* In order to show various features it serves as a bigger programming
|
|
||||||
* example, which you should feel free to rip apart and experiment with.
|
|
||||||
*
|
|
||||||
* Compilation, configuration example:
|
|
||||||
*
|
|
||||||
* Note: as long as the BPF backend in LLVM is still experimental,
|
|
||||||
* you need to build LLVM with LLVM with --enable-experimental-targets=BPF
|
|
||||||
* Also, make sure your 4.1+ kernel is compiled with CONFIG_BPF_SYSCALL=y,
|
|
||||||
* and you have libelf.h and gelf.h headers and can link tc against -lelf.
|
|
||||||
*
|
|
||||||
* In case you need to sync kernel headers, go to your kernel source tree:
|
|
||||||
* # make headers_install INSTALL_HDR_PATH=/usr/
|
|
||||||
*
|
|
||||||
* $ export PATH=/home/<...>/llvm/Debug+Asserts/bin/:$PATH
|
|
||||||
* $ clang -O2 -emit-llvm -c bpf_prog.c -o - | llc -march=bpf -filetype=obj -o bpf.o
|
|
||||||
* $ objdump -h bpf.o
|
|
||||||
* [...]
|
|
||||||
* 3 classifier 000007f8 0000000000000000 0000000000000000 00000040 2**3
|
|
||||||
* CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
|
|
||||||
* 4 action-mark 00000088 0000000000000000 0000000000000000 00000838 2**3
|
|
||||||
* CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
|
|
||||||
* 5 action-rand 00000098 0000000000000000 0000000000000000 000008c0 2**3
|
|
||||||
* CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
|
|
||||||
* 6 maps 00000030 0000000000000000 0000000000000000 00000958 2**2
|
|
||||||
* CONTENTS, ALLOC, LOAD, DATA
|
|
||||||
* 7 license 00000004 0000000000000000 0000000000000000 00000988 2**0
|
|
||||||
* CONTENTS, ALLOC, LOAD, DATA
|
|
||||||
* [...]
|
|
||||||
* # echo 1 > /proc/sys/net/core/bpf_jit_enable
|
|
||||||
* $ gcc bpf_agent.c -o bpf_agent -Wall -O2
|
|
||||||
* # ./bpf_agent /tmp/bpf-uds (e.g. on a different terminal)
|
|
||||||
* # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
|
|
||||||
* action bpf obj bpf.o sec action-mark \
|
|
||||||
* action bpf obj bpf.o sec action-rand ok
|
|
||||||
* # tc filter show dev em1
|
|
||||||
* filter parent 1: protocol all pref 49152 bpf
|
|
||||||
* filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid 1:1 bpf.o:[classifier]
|
|
||||||
* action order 1: bpf bpf.o:[action-mark] default-action pipe
|
|
||||||
* index 52 ref 1 bind 1
|
|
||||||
*
|
|
||||||
* action order 2: bpf bpf.o:[action-rand] default-action pipe
|
|
||||||
* index 53 ref 1 bind 1
|
|
||||||
*
|
|
||||||
* action order 3: gact action pass
|
|
||||||
* random type none pass val 0
|
|
||||||
* index 38 ref 1 bind 1
|
|
||||||
*
|
|
||||||
* The same program can also be installed on ingress side (as opposed to above
|
|
||||||
* egress configuration), e.g.:
|
|
||||||
*
|
|
||||||
* # tc qdisc add dev em1 handle ffff: ingress
|
|
||||||
* # tc filter add dev em1 parent ffff: bpf obj ...
|
|
||||||
*
|
|
||||||
* Notes on BPF agent:
|
|
||||||
*
|
|
||||||
* In the above example, the bpf_agent creates the unix domain socket
|
|
||||||
* natively. "tc exec" can also spawn a shell and hold the socktes there:
|
|
||||||
*
|
|
||||||
* # tc exec bpf imp /tmp/bpf-uds
|
|
||||||
* # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
|
|
||||||
* action bpf obj bpf.o sec action-mark \
|
|
||||||
* action bpf obj bpf.o sec action-rand ok
|
|
||||||
* sh-4.2# (shell spawned from tc exec)
|
|
||||||
* sh-4.2# bpf_agent
|
|
||||||
* [...]
|
|
||||||
*
|
|
||||||
* This will read out fds over environment and produce the same data dump
|
|
||||||
* as below. This has the advantage that the spawned shell owns the fds
|
|
||||||
* and thus if the agent is restarted, it can reattach to the same fds, also
|
|
||||||
* various programs can easily read/modify the data simultaneously from user
|
|
||||||
* space side.
|
|
||||||
*
|
|
||||||
* If the shell is unnecessary, the agent can also just be spawned directly
|
|
||||||
* via tc exec:
|
|
||||||
*
|
|
||||||
* # tc exec bpf imp /tmp/bpf-uds run bpf_agent
|
|
||||||
* # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
|
|
||||||
* action bpf obj bpf.o sec action-mark \
|
|
||||||
* action bpf obj bpf.o sec action-rand ok
|
|
||||||
*
|
|
||||||
* BPF agent example output:
|
|
||||||
*
|
|
||||||
* ver: 1
|
|
||||||
* obj: bpf.o
|
|
||||||
* dev: 64770
|
|
||||||
* ino: 6045133
|
|
||||||
* maps: 3
|
|
||||||
* map0:
|
|
||||||
* `- fd: 4
|
|
||||||
* | serial: 1
|
|
||||||
* | type: 1
|
|
||||||
* | max elem: 256
|
|
||||||
* | size key: 1
|
|
||||||
* ` size val: 16
|
|
||||||
* map1:
|
|
||||||
* `- fd: 5
|
|
||||||
* | serial: 2
|
|
||||||
* | type: 1
|
|
||||||
* | max elem: 1024
|
|
||||||
* | size key: 4
|
|
||||||
* ` size val: 16
|
|
||||||
* map2:
|
|
||||||
* `- fd: 6
|
|
||||||
* | serial: 3
|
|
||||||
* | type: 2
|
|
||||||
* | max elem: 64
|
|
||||||
* | size key: 4
|
|
||||||
* ` size val: 8
|
|
||||||
* data, period: 5sec
|
|
||||||
* `- number of drops: cpu0: 0 cpu1: 0 cpu2: 0 cpu3: 0
|
|
||||||
* | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 0, mis: 0] q3:[pkts: 0, mis: 0]
|
|
||||||
* ` protos: tcp:[pkts: 0, bytes: 0] udp:[pkts: 0, bytes: 0] icmp:[pkts: 0, bytes: 0]
|
|
||||||
* data, period: 5sec
|
|
||||||
* `- number of drops: cpu0: 5 cpu1: 0 cpu2: 0 cpu3: 1
|
|
||||||
* | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 24, mis: 14] q3:[pkts: 0, mis: 0]
|
|
||||||
* ` protos: tcp:[pkts: 13, bytes: 1989] udp:[pkts: 10, bytes: 710] icmp:[pkts: 0, bytes: 0]
|
|
||||||
* data, period: 5sec
|
|
||||||
* `- number of drops: cpu0: 5 cpu1: 0 cpu2: 3 cpu3: 3
|
|
||||||
* | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 39, mis: 21] q3:[pkts: 0, mis: 0]
|
|
||||||
* ` protos: tcp:[pkts: 20, bytes: 3549] udp:[pkts: 18, bytes: 1278] icmp:[pkts: 0, bytes: 0]
|
|
||||||
* [...]
|
|
||||||
*
|
|
||||||
* This now means, the below classifier and action pipeline has been loaded
|
|
||||||
* as eBPF bytecode into the kernel, the kernel has verified that the
|
|
||||||
* execution of the bytecode is "safe", and it has JITed the programs
|
|
||||||
* afterwards, so that upon invocation they're running on native speed. tc
|
|
||||||
* has transferred all map file descriptors to the bpf_agent via IPC and
|
|
||||||
* even after tc exits, the agent can read out or modify all map data.
|
|
||||||
*
|
|
||||||
* Note that the export to the uds is done only once in the classifier and
|
|
||||||
* not in the action. It's enough to export the (here) shared descriptors
|
|
||||||
* once.
|
|
||||||
*
|
|
||||||
* If you need to disassemble the generated JIT image (echo with 2), the
|
|
||||||
* kernel tree has under tools/net/ a small helper, you can invoke e.g.
|
|
||||||
* `bpf_jit_disasm -o`.
|
|
||||||
*
|
|
||||||
* Please find in the code below further comments.
|
|
||||||
*
|
|
||||||
* -- Happy eBPF hacking! ;)
|
|
||||||
*/
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/socket.h>
|
|
||||||
#include <asm/types.h>
|
|
||||||
#include <linux/in.h>
|
|
||||||
#include <linux/if.h>
|
|
||||||
#include <linux/if_ether.h>
|
|
||||||
#include <linux/ip.h>
|
|
||||||
#include <linux/ipv6.h>
|
|
||||||
#include <linux/if_tunnel.h>
|
|
||||||
#include <linux/filter.h>
|
|
||||||
#include <linux/bpf.h>
|
|
||||||
|
|
||||||
/* Common, shared definitions with ebpf_agent.c. */
|
|
||||||
#include "bpf_shared.h"
|
|
||||||
/* BPF helper functions for our example. */
|
|
||||||
#include "../../include/bpf_api.h"
|
|
||||||
|
|
||||||
/* Could be defined here as well, or included from the header. */
|
|
||||||
#define TC_ACT_UNSPEC (-1)
|
|
||||||
#define TC_ACT_OK 0
|
|
||||||
#define TC_ACT_RECLASSIFY 1
|
|
||||||
#define TC_ACT_SHOT 2
|
|
||||||
#define TC_ACT_PIPE 3
|
|
||||||
#define TC_ACT_STOLEN 4
|
|
||||||
#define TC_ACT_QUEUED 5
|
|
||||||
#define TC_ACT_REPEAT 6
|
|
||||||
|
|
||||||
/* Other, misc stuff. */
|
|
||||||
#define IP_MF 0x2000
|
|
||||||
#define IP_OFFSET 0x1FFF
|
|
||||||
|
|
||||||
/* eBPF map definitions, all placed in section "maps". */
|
|
||||||
struct bpf_elf_map __section("maps") map_proto = {
|
|
||||||
.type = BPF_MAP_TYPE_HASH,
|
|
||||||
.id = BPF_MAP_ID_PROTO,
|
|
||||||
.size_key = sizeof(uint8_t),
|
|
||||||
.size_value = sizeof(struct count_tuple),
|
|
||||||
.max_elem = 256,
|
|
||||||
.flags = BPF_F_NO_PREALLOC,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct bpf_elf_map __section("maps") map_queue = {
|
|
||||||
.type = BPF_MAP_TYPE_HASH,
|
|
||||||
.id = BPF_MAP_ID_QUEUE,
|
|
||||||
.size_key = sizeof(uint32_t),
|
|
||||||
.size_value = sizeof(struct count_queue),
|
|
||||||
.max_elem = 1024,
|
|
||||||
.flags = BPF_F_NO_PREALLOC,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct bpf_elf_map __section("maps") map_drops = {
|
|
||||||
.type = BPF_MAP_TYPE_ARRAY,
|
|
||||||
.id = BPF_MAP_ID_DROPS,
|
|
||||||
.size_key = sizeof(uint32_t),
|
|
||||||
.size_value = sizeof(long),
|
|
||||||
.max_elem = 64,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Helper functions and definitions for the flow dissector used by the
|
|
||||||
* example classifier. This resembles the kernel's flow dissector to
|
|
||||||
* some extend and is just used as an example to show what's possible
|
|
||||||
* with eBPF.
|
|
||||||
*/
|
|
||||||
struct sockaddr;
|
|
||||||
|
|
||||||
struct vlan_hdr {
|
|
||||||
__be16 h_vlan_TCI;
|
|
||||||
__be16 h_vlan_encapsulated_proto;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct flow_keys {
|
|
||||||
__u32 src;
|
|
||||||
__u32 dst;
|
|
||||||
union {
|
|
||||||
__u32 ports;
|
|
||||||
__u16 port16[2];
|
|
||||||
};
|
|
||||||
__s32 th_off;
|
|
||||||
__u8 ip_proto;
|
|
||||||
};
|
|
||||||
|
|
||||||
static __inline__ int flow_ports_offset(__u8 ip_proto)
|
|
||||||
{
|
|
||||||
switch (ip_proto) {
|
|
||||||
case IPPROTO_TCP:
|
|
||||||
case IPPROTO_UDP:
|
|
||||||
case IPPROTO_DCCP:
|
|
||||||
case IPPROTO_ESP:
|
|
||||||
case IPPROTO_SCTP:
|
|
||||||
case IPPROTO_UDPLITE:
|
|
||||||
default:
|
|
||||||
return 0;
|
|
||||||
case IPPROTO_AH:
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ bool flow_is_frag(struct __sk_buff *skb, int nh_off)
|
|
||||||
{
|
|
||||||
return !!(load_half(skb, nh_off + offsetof(struct iphdr, frag_off)) &
|
|
||||||
(IP_MF | IP_OFFSET));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ int flow_parse_ipv4(struct __sk_buff *skb, int nh_off,
|
|
||||||
__u8 *ip_proto, struct flow_keys *flow)
|
|
||||||
{
|
|
||||||
__u8 ip_ver_len;
|
|
||||||
|
|
||||||
if (unlikely(flow_is_frag(skb, nh_off)))
|
|
||||||
*ip_proto = 0;
|
|
||||||
else
|
|
||||||
*ip_proto = load_byte(skb, nh_off + offsetof(struct iphdr,
|
|
||||||
protocol));
|
|
||||||
if (*ip_proto != IPPROTO_GRE) {
|
|
||||||
flow->src = load_word(skb, nh_off + offsetof(struct iphdr, saddr));
|
|
||||||
flow->dst = load_word(skb, nh_off + offsetof(struct iphdr, daddr));
|
|
||||||
}
|
|
||||||
|
|
||||||
ip_ver_len = load_byte(skb, nh_off + 0 /* offsetof(struct iphdr, ihl) */);
|
|
||||||
if (likely(ip_ver_len == 0x45))
|
|
||||||
nh_off += 20;
|
|
||||||
else
|
|
||||||
nh_off += (ip_ver_len & 0xF) << 2;
|
|
||||||
|
|
||||||
return nh_off;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, int off)
|
|
||||||
{
|
|
||||||
__u32 w0 = load_word(skb, off);
|
|
||||||
__u32 w1 = load_word(skb, off + sizeof(w0));
|
|
||||||
__u32 w2 = load_word(skb, off + sizeof(w0) * 2);
|
|
||||||
__u32 w3 = load_word(skb, off + sizeof(w0) * 3);
|
|
||||||
|
|
||||||
return w0 ^ w1 ^ w2 ^ w3;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ int flow_parse_ipv6(struct __sk_buff *skb, int nh_off,
|
|
||||||
__u8 *ip_proto, struct flow_keys *flow)
|
|
||||||
{
|
|
||||||
*ip_proto = load_byte(skb, nh_off + offsetof(struct ipv6hdr, nexthdr));
|
|
||||||
|
|
||||||
flow->src = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, saddr));
|
|
||||||
flow->dst = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, daddr));
|
|
||||||
|
|
||||||
return nh_off + sizeof(struct ipv6hdr);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ bool flow_dissector(struct __sk_buff *skb,
|
|
||||||
struct flow_keys *flow)
|
|
||||||
{
|
|
||||||
int poff, nh_off = BPF_LL_OFF + ETH_HLEN;
|
|
||||||
__be16 proto = skb->protocol;
|
|
||||||
__u8 ip_proto;
|
|
||||||
|
|
||||||
/* TODO: check for skb->vlan_tci, skb->vlan_proto first */
|
|
||||||
if (proto == htons(ETH_P_8021AD)) {
|
|
||||||
proto = load_half(skb, nh_off +
|
|
||||||
offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
|
|
||||||
nh_off += sizeof(struct vlan_hdr);
|
|
||||||
}
|
|
||||||
if (proto == htons(ETH_P_8021Q)) {
|
|
||||||
proto = load_half(skb, nh_off +
|
|
||||||
offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
|
|
||||||
nh_off += sizeof(struct vlan_hdr);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (likely(proto == htons(ETH_P_IP)))
|
|
||||||
nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
|
|
||||||
else if (proto == htons(ETH_P_IPV6))
|
|
||||||
nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
|
|
||||||
switch (ip_proto) {
|
|
||||||
case IPPROTO_GRE: {
|
|
||||||
struct gre_hdr {
|
|
||||||
__be16 flags;
|
|
||||||
__be16 proto;
|
|
||||||
};
|
|
||||||
|
|
||||||
__u16 gre_flags = load_half(skb, nh_off +
|
|
||||||
offsetof(struct gre_hdr, flags));
|
|
||||||
__u16 gre_proto = load_half(skb, nh_off +
|
|
||||||
offsetof(struct gre_hdr, proto));
|
|
||||||
|
|
||||||
if (gre_flags & (GRE_VERSION | GRE_ROUTING))
|
|
||||||
break;
|
|
||||||
|
|
||||||
nh_off += 4;
|
|
||||||
if (gre_flags & GRE_CSUM)
|
|
||||||
nh_off += 4;
|
|
||||||
if (gre_flags & GRE_KEY)
|
|
||||||
nh_off += 4;
|
|
||||||
if (gre_flags & GRE_SEQ)
|
|
||||||
nh_off += 4;
|
|
||||||
|
|
||||||
if (gre_proto == ETH_P_8021Q) {
|
|
||||||
gre_proto = load_half(skb, nh_off +
|
|
||||||
offsetof(struct vlan_hdr,
|
|
||||||
h_vlan_encapsulated_proto));
|
|
||||||
nh_off += sizeof(struct vlan_hdr);
|
|
||||||
}
|
|
||||||
if (gre_proto == ETH_P_IP)
|
|
||||||
nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
|
|
||||||
else if (gre_proto == ETH_P_IPV6)
|
|
||||||
nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case IPPROTO_IPIP:
|
|
||||||
nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
|
|
||||||
break;
|
|
||||||
case IPPROTO_IPV6:
|
|
||||||
nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
nh_off += flow_ports_offset(ip_proto);
|
|
||||||
|
|
||||||
flow->ports = load_word(skb, nh_off);
|
|
||||||
flow->th_off = nh_off;
|
|
||||||
flow->ip_proto = ip_proto;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ void cls_update_proto_map(const struct __sk_buff *skb,
|
|
||||||
const struct flow_keys *flow)
|
|
||||||
{
|
|
||||||
uint8_t proto = flow->ip_proto;
|
|
||||||
struct count_tuple *ct, _ct;
|
|
||||||
|
|
||||||
ct = map_lookup_elem(&map_proto, &proto);
|
|
||||||
if (likely(ct)) {
|
|
||||||
lock_xadd(&ct->packets, 1);
|
|
||||||
lock_xadd(&ct->bytes, skb->len);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* No hit yet, we need to create a new entry. */
|
|
||||||
_ct.packets = 1;
|
|
||||||
_ct.bytes = skb->len;
|
|
||||||
|
|
||||||
map_update_elem(&map_proto, &proto, &_ct, BPF_ANY);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ void cls_update_queue_map(const struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
uint32_t queue = skb->queue_mapping;
|
|
||||||
struct count_queue *cq, _cq;
|
|
||||||
bool mismatch;
|
|
||||||
|
|
||||||
mismatch = skb->queue_mapping != get_smp_processor_id();
|
|
||||||
|
|
||||||
cq = map_lookup_elem(&map_queue, &queue);
|
|
||||||
if (likely(cq)) {
|
|
||||||
lock_xadd(&cq->total, 1);
|
|
||||||
if (mismatch)
|
|
||||||
lock_xadd(&cq->mismatch, 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* No hit yet, we need to create a new entry. */
|
|
||||||
_cq.total = 1;
|
|
||||||
_cq.mismatch = mismatch ? 1 : 0;
|
|
||||||
|
|
||||||
map_update_elem(&map_queue, &queue, &_cq, BPF_ANY);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* eBPF program definitions, placed in various sections, which can
|
|
||||||
* have custom section names. If custom names are in use, it's
|
|
||||||
* required to point tc to the correct section, e.g.
|
|
||||||
*
|
|
||||||
* tc filter add [...] bpf obj cls.o sec cls-tos [...]
|
|
||||||
*
|
|
||||||
* in case the program resides in __section("cls-tos").
|
|
||||||
*
|
|
||||||
* Default section for cls_bpf is: "classifier", for act_bpf is:
|
|
||||||
* "action". Naturally, if for example multiple actions are present
|
|
||||||
* in the same file, they need to have distinct section names.
|
|
||||||
*
|
|
||||||
* It is however not required to have multiple programs sharing
|
|
||||||
* a file.
|
|
||||||
*/
|
|
||||||
__section("classifier")
|
|
||||||
int cls_main(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
struct flow_keys flow;
|
|
||||||
|
|
||||||
if (!flow_dissector(skb, &flow))
|
|
||||||
return 0; /* No match in cls_bpf. */
|
|
||||||
|
|
||||||
cls_update_proto_map(skb, &flow);
|
|
||||||
cls_update_queue_map(skb);
|
|
||||||
|
|
||||||
return flow.ip_proto;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ void act_update_drop_map(void)
|
|
||||||
{
|
|
||||||
uint32_t *count, cpu = get_smp_processor_id();
|
|
||||||
|
|
||||||
count = map_lookup_elem(&map_drops, &cpu);
|
|
||||||
if (count)
|
|
||||||
/* Only this cpu is accessing this element. */
|
|
||||||
(*count)++;
|
|
||||||
}
|
|
||||||
|
|
||||||
__section("action-mark")
|
|
||||||
int act_mark_main(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
/* You could also mangle skb data here with the helper function
|
|
||||||
* BPF_FUNC_skb_store_bytes, etc. Or, alternatively you could
|
|
||||||
* do that already in the classifier itself as a merged combination
|
|
||||||
* of classifier'n'action model.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (skb->mark == 0xcafe) {
|
|
||||||
act_update_drop_map();
|
|
||||||
return TC_ACT_SHOT;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Default configured tc opcode. */
|
|
||||||
return TC_ACT_UNSPEC;
|
|
||||||
}
|
|
||||||
|
|
||||||
__section("action-rand")
|
|
||||||
int act_rand_main(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
/* Sorry, we're near event horizon ... */
|
|
||||||
if ((get_prandom_u32() & 3) == 0) {
|
|
||||||
act_update_drop_map();
|
|
||||||
return TC_ACT_SHOT;
|
|
||||||
}
|
|
||||||
|
|
||||||
return TC_ACT_UNSPEC;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Last but not least, the file contains a license. Some future helper
|
|
||||||
* functions may only be available with a GPL license.
|
|
||||||
*/
|
|
||||||
BPF_LICENSE("GPL");
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
#ifndef __BPF_SHARED__
|
|
||||||
#define __BPF_SHARED__
|
|
||||||
|
|
||||||
enum {
|
|
||||||
BPF_MAP_ID_PROTO,
|
|
||||||
BPF_MAP_ID_QUEUE,
|
|
||||||
BPF_MAP_ID_DROPS,
|
|
||||||
__BPF_MAP_ID_MAX,
|
|
||||||
#define BPF_MAP_ID_MAX __BPF_MAP_ID_MAX
|
|
||||||
};
|
|
||||||
|
|
||||||
struct count_tuple {
|
|
||||||
long packets; /* type long for lock_xadd() */
|
|
||||||
long bytes;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct count_queue {
|
|
||||||
long total;
|
|
||||||
long mismatch;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* __BPF_SHARED__ */
|
|
||||||
|
|
@ -1,23 +0,0 @@
|
||||||
#ifndef __BPF_SYS__
|
|
||||||
#define __BPF_SYS__
|
|
||||||
|
|
||||||
#include <sys/syscall.h>
|
|
||||||
#include <linux/bpf.h>
|
|
||||||
|
|
||||||
static inline __u64 bpf_ptr_to_u64(const void *ptr)
|
|
||||||
{
|
|
||||||
return (__u64) (unsigned long) ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int bpf_lookup_elem(int fd, void *key, void *value)
|
|
||||||
{
|
|
||||||
union bpf_attr attr = {
|
|
||||||
.map_fd = fd,
|
|
||||||
.key = bpf_ptr_to_u64(key),
|
|
||||||
.value = bpf_ptr_to_u64(value),
|
|
||||||
};
|
|
||||||
|
|
||||||
return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __BPF_SYS__ */
|
|
||||||
|
|
@ -36,6 +36,8 @@ struct bpf_elf_map {
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
__u32 id;
|
__u32 id;
|
||||||
__u32 pinning;
|
__u32 pinning;
|
||||||
|
__u32 inner_id;
|
||||||
|
__u32 inner_idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* __BPF_ELF__ */
|
#endif /* __BPF_ELF__ */
|
||||||
|
|
|
||||||
|
|
@ -261,6 +261,8 @@ int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
|
||||||
int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type);
|
int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type);
|
||||||
int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type);
|
int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type);
|
||||||
|
|
||||||
|
void bpf_dump_prog_info(FILE *f, uint32_t id);
|
||||||
|
|
||||||
#ifdef HAVE_ELF
|
#ifdef HAVE_ELF
|
||||||
int bpf_send_map_fds(const char *path, const char *obj);
|
int bpf_send_map_fds(const char *path, const char *obj);
|
||||||
int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
|
int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,6 @@
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <linux/sockios.h>
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <linux/mpls.h>
|
#include <linux/mpls.h>
|
||||||
|
|
||||||
|
|
|
||||||
10
ip/iproute.c
10
ip/iproute.c
|
|
@ -1731,6 +1731,16 @@ static int iproute_get(int argc, char **argv)
|
||||||
addattr32(&req.n, sizeof(req), RTA_UID, uid);
|
addattr32(&req.n, sizeof(req), RTA_UID, uid);
|
||||||
} else if (matches(*argv, "fibmatch") == 0) {
|
} else if (matches(*argv, "fibmatch") == 0) {
|
||||||
fib_match = 1;
|
fib_match = 1;
|
||||||
|
} else if (strcmp(*argv, "as") == 0) {
|
||||||
|
inet_prefix addr;
|
||||||
|
|
||||||
|
NEXT_ARG();
|
||||||
|
if (strcmp(*argv, "to") == 0)
|
||||||
|
NEXT_ARG();
|
||||||
|
get_addr(&addr, *argv, req.r.rtm_family);
|
||||||
|
if (req.r.rtm_family == AF_UNSPEC)
|
||||||
|
req.r.rtm_family = addr.family;
|
||||||
|
addattr_l(&req.n, sizeof(req), RTA_NEWDST, &addr.data, addr.bytelen);
|
||||||
} else {
|
} else {
|
||||||
inet_prefix addr;
|
inet_prefix addr;
|
||||||
|
|
||||||
|
|
|
||||||
207
lib/bpf.c
207
lib/bpf.c
|
|
@ -152,6 +152,54 @@ static int bpf_map_update(int fd, const void *key, const void *value,
|
||||||
return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
|
return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int bpf_prog_fd_by_id(uint32_t id)
|
||||||
|
{
|
||||||
|
union bpf_attr attr = {};
|
||||||
|
|
||||||
|
attr.prog_id = id;
|
||||||
|
|
||||||
|
return bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bpf_prog_info_by_fd(int fd, struct bpf_prog_info *info,
|
||||||
|
uint32_t *info_len)
|
||||||
|
{
|
||||||
|
union bpf_attr attr = {};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
attr.info.bpf_fd = fd;
|
||||||
|
attr.info.info = bpf_ptr_to_u64(info);
|
||||||
|
attr.info.info_len = *info_len;
|
||||||
|
|
||||||
|
*info_len = 0;
|
||||||
|
ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
|
||||||
|
if (!ret)
|
||||||
|
*info_len = attr.info.info_len;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bpf_dump_prog_info(FILE *f, uint32_t id)
|
||||||
|
{
|
||||||
|
struct bpf_prog_info info = {};
|
||||||
|
uint32_t len = sizeof(info);
|
||||||
|
int fd, ret;
|
||||||
|
|
||||||
|
fprintf(f, "id %u ", id);
|
||||||
|
|
||||||
|
fd = bpf_prog_fd_by_id(id);
|
||||||
|
if (fd < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ret = bpf_prog_info_by_fd(fd, &info, &len);
|
||||||
|
if (!ret && len) {
|
||||||
|
if (info.jited_prog_len)
|
||||||
|
fprintf(f, "jited ");
|
||||||
|
}
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
|
static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
|
||||||
char **bpf_string, bool *need_release,
|
char **bpf_string, bool *need_release,
|
||||||
const char separator)
|
const char separator)
|
||||||
|
|
@ -1023,15 +1071,16 @@ static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
|
||||||
|
|
||||||
static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
|
static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
|
||||||
uint32_t size_value, uint32_t max_elem,
|
uint32_t size_value, uint32_t max_elem,
|
||||||
uint32_t flags)
|
uint32_t flags, int inner_fd)
|
||||||
{
|
{
|
||||||
union bpf_attr attr = {};
|
union bpf_attr attr = {};
|
||||||
|
|
||||||
attr.map_type = type;
|
attr.map_type = type;
|
||||||
attr.key_size = size_key;
|
attr.key_size = size_key;
|
||||||
attr.value_size = size_value;
|
attr.value_size = inner_fd ? sizeof(int) : size_value;
|
||||||
attr.max_entries = max_elem;
|
attr.max_entries = max_elem;
|
||||||
attr.map_flags = flags;
|
attr.map_flags = flags;
|
||||||
|
attr.inner_map_fd = inner_fd;
|
||||||
|
|
||||||
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
|
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
|
||||||
}
|
}
|
||||||
|
|
@ -1343,7 +1392,7 @@ retry:
|
||||||
|
|
||||||
static void bpf_map_report(int fd, const char *name,
|
static void bpf_map_report(int fd, const char *name,
|
||||||
const struct bpf_elf_map *map,
|
const struct bpf_elf_map *map,
|
||||||
struct bpf_elf_ctx *ctx)
|
struct bpf_elf_ctx *ctx, int inner_fd)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
|
fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
|
||||||
fd < 0 ? "rejected: " : "loaded",
|
fd < 0 ? "rejected: " : "loaded",
|
||||||
|
|
@ -1354,15 +1403,91 @@ static void bpf_map_report(int fd, const char *name,
|
||||||
fprintf(stderr, " - Identifier: %u\n", map->id);
|
fprintf(stderr, " - Identifier: %u\n", map->id);
|
||||||
fprintf(stderr, " - Pinning: %u\n", map->pinning);
|
fprintf(stderr, " - Pinning: %u\n", map->pinning);
|
||||||
fprintf(stderr, " - Size key: %u\n", map->size_key);
|
fprintf(stderr, " - Size key: %u\n", map->size_key);
|
||||||
fprintf(stderr, " - Size value: %u\n", map->size_value);
|
fprintf(stderr, " - Size value: %u\n",
|
||||||
|
inner_fd ? (int)sizeof(int) : map->size_value);
|
||||||
fprintf(stderr, " - Max elems: %u\n", map->max_elem);
|
fprintf(stderr, " - Max elems: %u\n", map->max_elem);
|
||||||
fprintf(stderr, " - Flags: %#x\n\n", map->flags);
|
fprintf(stderr, " - Flags: %#x\n\n", map->flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
|
static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id)
|
||||||
struct bpf_elf_ctx *ctx)
|
|
||||||
{
|
{
|
||||||
int fd, ret;
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ctx->map_num; i++) {
|
||||||
|
if (ctx->maps[i].id != id)
|
||||||
|
continue;
|
||||||
|
if (ctx->map_fds[i] < 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return ctx->map_fds[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return -ENOENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map)
|
||||||
|
{
|
||||||
|
char file[PATH_MAX], buff[4096];
|
||||||
|
unsigned int val;
|
||||||
|
FILE *fp;
|
||||||
|
|
||||||
|
snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
|
||||||
|
|
||||||
|
memset(map, 0, sizeof(*map));
|
||||||
|
|
||||||
|
fp = fopen(file, "r");
|
||||||
|
if (!fp) {
|
||||||
|
fprintf(stderr, "No procfs support?!\n");
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (fgets(buff, sizeof(buff), fp)) {
|
||||||
|
if (sscanf(buff, "map_type:\t%u", &val) == 1)
|
||||||
|
map->type = val;
|
||||||
|
else if (sscanf(buff, "key_size:\t%u", &val) == 1)
|
||||||
|
map->size_key = val;
|
||||||
|
else if (sscanf(buff, "value_size:\t%u", &val) == 1)
|
||||||
|
map->size_value = val;
|
||||||
|
else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
|
||||||
|
map->max_elem = val;
|
||||||
|
else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
|
||||||
|
map->flags = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_report_map_in_map(int outer_fd, int inner_fd, uint32_t idx)
|
||||||
|
{
|
||||||
|
struct bpf_elf_map outer_map;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
fprintf(stderr, "Cannot insert map into map! ");
|
||||||
|
|
||||||
|
ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map);
|
||||||
|
if (!ret) {
|
||||||
|
if (idx >= outer_map.max_elem &&
|
||||||
|
outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
|
||||||
|
fprintf(stderr, "Outer map has %u elements, index %u is invalid!\n",
|
||||||
|
outer_map.max_elem, idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "Different map specs used for outer and inner map?\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map)
|
||||||
|
{
|
||||||
|
return map->type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
|
||||||
|
map->type == BPF_MAP_TYPE_HASH_OF_MAPS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
|
||||||
|
struct bpf_elf_ctx *ctx, int *have_map_in_map)
|
||||||
|
{
|
||||||
|
int fd, ret, map_inner_fd = 0;
|
||||||
|
|
||||||
fd = bpf_probe_pinned(name, ctx, map->pinning);
|
fd = bpf_probe_pinned(name, ctx, map->pinning);
|
||||||
if (fd > 0) {
|
if (fd > 0) {
|
||||||
|
|
@ -1381,11 +1506,29 @@ static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (have_map_in_map && bpf_is_map_in_map_type(map)) {
|
||||||
|
(*have_map_in_map)++;
|
||||||
|
if (map->inner_id)
|
||||||
|
return 0;
|
||||||
|
fprintf(stderr, "Map \'%s\' cannot be created since no inner map ID defined!\n",
|
||||||
|
name);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!have_map_in_map && bpf_is_map_in_map_type(map)) {
|
||||||
|
map_inner_fd = bpf_find_map_id(ctx, map->inner_id);
|
||||||
|
if (map_inner_fd < 0) {
|
||||||
|
fprintf(stderr, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n",
|
||||||
|
name, map->inner_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
errno = 0;
|
errno = 0;
|
||||||
fd = bpf_map_create(map->type, map->size_key, map->size_value,
|
fd = bpf_map_create(map->type, map->size_key, map->size_value,
|
||||||
map->max_elem, map->flags);
|
map->max_elem, map->flags, map_inner_fd);
|
||||||
if (fd < 0 || ctx->verbose) {
|
if (fd < 0 || ctx->verbose) {
|
||||||
bpf_map_report(fd, name, map, ctx);
|
bpf_map_report(fd, name, map, ctx, map_inner_fd);
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
@ -1430,21 +1573,63 @@ static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
|
||||||
|
|
||||||
static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
|
static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
|
||||||
{
|
{
|
||||||
|
int i, j, ret, fd, inner_fd, inner_idx, have_map_in_map = 0;
|
||||||
const char *map_name;
|
const char *map_name;
|
||||||
int i, fd;
|
|
||||||
|
|
||||||
for (i = 0; i < ctx->map_num; i++) {
|
for (i = 0; i < ctx->map_num; i++) {
|
||||||
map_name = bpf_map_fetch_name(ctx, i);
|
map_name = bpf_map_fetch_name(ctx, i);
|
||||||
if (!map_name)
|
if (!map_name)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
fd = bpf_map_attach(map_name, &ctx->maps[i], ctx);
|
fd = bpf_map_attach(map_name, &ctx->maps[i], ctx,
|
||||||
|
&have_map_in_map);
|
||||||
|
if (fd < 0)
|
||||||
|
return fd;
|
||||||
|
|
||||||
|
ctx->map_fds[i] = !fd ? -1 : fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
|
||||||
|
if (ctx->map_fds[i] >= 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
map_name = bpf_map_fetch_name(ctx, i);
|
||||||
|
if (!map_name)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
fd = bpf_map_attach(map_name, &ctx->maps[i], ctx,
|
||||||
|
NULL);
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
return fd;
|
return fd;
|
||||||
|
|
||||||
ctx->map_fds[i] = fd;
|
ctx->map_fds[i] = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
|
||||||
|
if (!ctx->maps[i].id ||
|
||||||
|
ctx->maps[i].inner_id ||
|
||||||
|
ctx->maps[i].inner_idx == -1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
inner_fd = ctx->map_fds[i];
|
||||||
|
inner_idx = ctx->maps[i].inner_idx;
|
||||||
|
|
||||||
|
for (j = 0; j < ctx->map_num; j++) {
|
||||||
|
if (!bpf_is_map_in_map_type(&ctx->maps[j]))
|
||||||
|
continue;
|
||||||
|
if (ctx->maps[j].inner_id != ctx->maps[i].id)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ret = bpf_map_update(ctx->map_fds[j], &inner_idx,
|
||||||
|
&inner_fd, BPF_ANY);
|
||||||
|
if (ret < 0) {
|
||||||
|
bpf_report_map_in_map(ctx->map_fds[j],
|
||||||
|
inner_fd, inner_idx);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,6 @@
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,6 @@
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,6 @@
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
|
||||||
|
|
@ -29,9 +29,9 @@ csum - checksum update action
|
||||||
The
|
The
|
||||||
.B csum
|
.B csum
|
||||||
action triggers checksum recalculation of specified packet headers. It is
|
action triggers checksum recalculation of specified packet headers. It is
|
||||||
commonly used after packet editing using the
|
commonly used to fix incorrect checksums after the
|
||||||
.B pedit
|
.B pedit
|
||||||
action to fix for then incorrect checksums.
|
action has modified the packet content.
|
||||||
.SH OPTIONS
|
.SH OPTIONS
|
||||||
.TP
|
.TP
|
||||||
.I TARGET
|
.I TARGET
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@
|
||||||
*/
|
*/
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
|
||||||
|
|
@ -230,6 +230,9 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
|
||||||
b, sizeof(b)));
|
b, sizeof(b)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tb[TCA_BPF_ID])
|
||||||
|
bpf_dump_prog_info(f, rta_getattr_u32(tb[TCA_BPF_ID]));
|
||||||
|
|
||||||
if (tb[TCA_BPF_POLICE]) {
|
if (tb[TCA_BPF_POLICE]) {
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
tc_print_police(f, tb[TCA_BPF_POLICE]);
|
tc_print_police(f, tb[TCA_BPF_POLICE]);
|
||||||
|
|
|
||||||
|
|
@ -186,6 +186,9 @@ static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg)
|
||||||
b, sizeof(b)));
|
b, sizeof(b)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tb[TCA_ACT_BPF_ID])
|
||||||
|
bpf_dump_prog_info(f, rta_getattr_u32(tb[TCA_ACT_BPF_ID]));
|
||||||
|
|
||||||
print_action_control(f, "default-action ", parm->action, "\n");
|
print_action_control(f, "default-action ", parm->action, "\n");
|
||||||
fprintf(f, "\tindex %u ref %d bind %d", parm->index, parm->refcnt,
|
fprintf(f, "\tindex %u ref %d bind %d", parm->index, parm->refcnt,
|
||||||
parm->bindcnt);
|
parm->bindcnt);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue