|
|
|
|
@ -116,6 +116,7 @@ enum bpf_cmd {
|
|
|
|
|
BPF_LINK_GET_FD_BY_ID,
|
|
|
|
|
BPF_LINK_GET_NEXT_ID,
|
|
|
|
|
BPF_ENABLE_STATS,
|
|
|
|
|
BPF_ITER_CREATE,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum bpf_map_type {
|
|
|
|
|
@ -146,6 +147,7 @@ enum bpf_map_type {
|
|
|
|
|
BPF_MAP_TYPE_SK_STORAGE,
|
|
|
|
|
BPF_MAP_TYPE_DEVMAP_HASH,
|
|
|
|
|
BPF_MAP_TYPE_STRUCT_OPS,
|
|
|
|
|
BPF_MAP_TYPE_RINGBUF,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Note that tracing related programs such as
|
|
|
|
|
@ -218,6 +220,12 @@ enum bpf_attach_type {
|
|
|
|
|
BPF_TRACE_FEXIT,
|
|
|
|
|
BPF_MODIFY_RETURN,
|
|
|
|
|
BPF_LSM_MAC,
|
|
|
|
|
BPF_TRACE_ITER,
|
|
|
|
|
BPF_CGROUP_INET4_GETPEERNAME,
|
|
|
|
|
BPF_CGROUP_INET6_GETPEERNAME,
|
|
|
|
|
BPF_CGROUP_INET4_GETSOCKNAME,
|
|
|
|
|
BPF_CGROUP_INET6_GETSOCKNAME,
|
|
|
|
|
BPF_XDP_DEVMAP,
|
|
|
|
|
__MAX_BPF_ATTACH_TYPE
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
@ -228,6 +236,8 @@ enum bpf_link_type {
|
|
|
|
|
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
|
|
|
|
|
BPF_LINK_TYPE_TRACING = 2,
|
|
|
|
|
BPF_LINK_TYPE_CGROUP = 3,
|
|
|
|
|
BPF_LINK_TYPE_ITER = 4,
|
|
|
|
|
BPF_LINK_TYPE_NETNS = 5,
|
|
|
|
|
|
|
|
|
|
MAX_BPF_LINK_TYPE,
|
|
|
|
|
};
|
|
|
|
|
@ -612,6 +622,11 @@ union bpf_attr {
|
|
|
|
|
__u32 type;
|
|
|
|
|
} enable_stats;
|
|
|
|
|
|
|
|
|
|
struct { /* struct used by BPF_ITER_CREATE command */
|
|
|
|
|
__u32 link_fd;
|
|
|
|
|
__u32 flags;
|
|
|
|
|
} iter_create;
|
|
|
|
|
|
|
|
|
|
} __attribute__((aligned(8)));
|
|
|
|
|
|
|
|
|
|
/* The description below is an attempt at providing documentation to eBPF
|
|
|
|
|
@ -667,8 +682,8 @@ union bpf_attr {
|
|
|
|
|
* For tracing programs, safely attempt to read *size* bytes from
|
|
|
|
|
* kernel space address *unsafe_ptr* and store the data in *dst*.
|
|
|
|
|
*
|
|
|
|
|
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
|
|
|
|
|
* instead.
|
|
|
|
|
* Generally, use **bpf_probe_read_user**\ () or
|
|
|
|
|
* **bpf_probe_read_kernel**\ () instead.
|
|
|
|
|
* Return
|
|
|
|
|
* 0 on success, or a negative error in case of failure.
|
|
|
|
|
*
|
|
|
|
|
@ -676,7 +691,7 @@ union bpf_attr {
|
|
|
|
|
* Description
|
|
|
|
|
* Return the time elapsed since system boot, in nanoseconds.
|
|
|
|
|
* Does not include time the system was suspended.
|
|
|
|
|
* See: clock_gettime(CLOCK_MONOTONIC)
|
|
|
|
|
* See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
|
|
|
|
|
* Return
|
|
|
|
|
* Current *ktime*.
|
|
|
|
|
*
|
|
|
|
|
@ -1535,11 +1550,11 @@ union bpf_attr {
|
|
|
|
|
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
|
|
|
|
|
* Description
|
|
|
|
|
* Copy a NUL terminated string from an unsafe kernel address
|
|
|
|
|
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
|
|
|
|
|
* *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
|
|
|
|
|
* more details.
|
|
|
|
|
*
|
|
|
|
|
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
|
|
|
|
|
* instead.
|
|
|
|
|
* Generally, use **bpf_probe_read_user_str**\ () or
|
|
|
|
|
* **bpf_probe_read_kernel_str**\ () instead.
|
|
|
|
|
* Return
|
|
|
|
|
* On success, the strictly positive length of the string,
|
|
|
|
|
* including the trailing NUL character. On error, a negative
|
|
|
|
|
@ -1567,7 +1582,7 @@ union bpf_attr {
|
|
|
|
|
*
|
|
|
|
|
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
|
|
|
|
|
* Description
|
|
|
|
|
* Equivalent to bpf_get_socket_cookie() helper that accepts
|
|
|
|
|
* Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
|
|
|
|
|
* *skb*, but gets socket from **struct bpf_sock_ops** context.
|
|
|
|
|
* Return
|
|
|
|
|
* A 8-byte long non-decreasing number.
|
|
|
|
|
@ -1596,6 +1611,7 @@ union bpf_attr {
|
|
|
|
|
* The option value of length *optlen* is pointed by *optval*.
|
|
|
|
|
*
|
|
|
|
|
* *bpf_socket* should be one of the following:
|
|
|
|
|
*
|
|
|
|
|
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
|
|
|
|
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
|
|
|
|
* and **BPF_CGROUP_INET6_CONNECT**.
|
|
|
|
|
@ -1619,6 +1635,13 @@ union bpf_attr {
|
|
|
|
|
* Grow or shrink the room for data in the packet associated to
|
|
|
|
|
* *skb* by *len_diff*, and according to the selected *mode*.
|
|
|
|
|
*
|
|
|
|
|
* By default, the helper will reset any offloaded checksum
|
|
|
|
|
* indicator of the skb to CHECKSUM_NONE. This can be avoided
|
|
|
|
|
* by the following flag:
|
|
|
|
|
*
|
|
|
|
|
* * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
|
|
|
|
|
* checksum data of the skb to CHECKSUM_NONE.
|
|
|
|
|
*
|
|
|
|
|
* There are two supported modes at this time:
|
|
|
|
|
*
|
|
|
|
|
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
|
|
|
|
|
@ -1664,12 +1687,12 @@ union bpf_attr {
|
|
|
|
|
*
|
|
|
|
|
* The lower two bits of *flags* are used as the return code if
|
|
|
|
|
* the map lookup fails. This is so that the return value can be
|
|
|
|
|
* one of the XDP program return codes up to XDP_TX, as chosen by
|
|
|
|
|
* the caller. Any higher bits in the *flags* argument must be
|
|
|
|
|
* one of the XDP program return codes up to **XDP_TX**, as chosen
|
|
|
|
|
* by the caller. Any higher bits in the *flags* argument must be
|
|
|
|
|
* unset.
|
|
|
|
|
*
|
|
|
|
|
* See also bpf_redirect(), which only supports redirecting to an
|
|
|
|
|
* ifindex, but doesn't require a map to do so.
|
|
|
|
|
* See also **bpf_redirect**\ (), which only supports redirecting
|
|
|
|
|
* to an ifindex, but doesn't require a map to do so.
|
|
|
|
|
* Return
|
|
|
|
|
* **XDP_REDIRECT** on success, or the value of the two lower bits
|
|
|
|
|
* of the *flags* argument on error.
|
|
|
|
|
@ -1777,7 +1800,7 @@ union bpf_attr {
|
|
|
|
|
* the time running for event since last normalization. The
|
|
|
|
|
* enabled and running times are accumulated since the perf event
|
|
|
|
|
* open. To achieve scaling factor between two invocations of an
|
|
|
|
|
* eBPF program, users can can use CPU id as the key (which is
|
|
|
|
|
* eBPF program, users can use CPU id as the key (which is
|
|
|
|
|
* typical for perf array usage model) to remember the previous
|
|
|
|
|
* value and do the calculation inside the eBPF program.
|
|
|
|
|
* Return
|
|
|
|
|
@ -1804,6 +1827,7 @@ union bpf_attr {
|
|
|
|
|
* *opval* and of length *optlen*.
|
|
|
|
|
*
|
|
|
|
|
* *bpf_socket* should be one of the following:
|
|
|
|
|
*
|
|
|
|
|
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
|
|
|
|
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
|
|
|
|
* and **BPF_CGROUP_INET6_CONNECT**.
|
|
|
|
|
@ -1825,7 +1849,7 @@ union bpf_attr {
|
|
|
|
|
* The first argument is the context *regs* on which the kprobe
|
|
|
|
|
* works.
|
|
|
|
|
*
|
|
|
|
|
* This helper works by setting setting the PC (program counter)
|
|
|
|
|
* This helper works by setting the PC (program counter)
|
|
|
|
|
* to an override function which is run in place of the original
|
|
|
|
|
* probed function. This means the probed function is not run at
|
|
|
|
|
* all. The replacement function just returns with the required
|
|
|
|
|
@ -1994,18 +2018,19 @@ union bpf_attr {
|
|
|
|
|
*
|
|
|
|
|
* This helper works for IPv4 and IPv6, TCP and UDP sockets. The
|
|
|
|
|
* domain (*addr*\ **->sa_family**) must be **AF_INET** (or
|
|
|
|
|
* **AF_INET6**). Looking for a free port to bind to can be
|
|
|
|
|
* expensive, therefore binding to port is not permitted by the
|
|
|
|
|
* helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
|
|
|
|
|
* must be set to zero.
|
|
|
|
|
* **AF_INET6**). It's advised to pass zero port (**sin_port**
|
|
|
|
|
* or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
|
|
|
|
|
* behavior and lets the kernel efficiently pick up an unused
|
|
|
|
|
* port as long as 4-tuple is unique. Passing non-zero port might
|
|
|
|
|
* lead to degraded performance.
|
|
|
|
|
* Return
|
|
|
|
|
* 0 on success, or a negative error in case of failure.
|
|
|
|
|
*
|
|
|
|
|
* int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
|
|
|
|
|
* Description
|
|
|
|
|
* Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
|
|
|
|
|
* only possible to shrink the packet as of this writing,
|
|
|
|
|
* therefore *delta* must be a negative integer.
|
|
|
|
|
* possible to both shrink and grow the packet tail.
|
|
|
|
|
* Shrink done via *delta* being a negative integer.
|
|
|
|
|
*
|
|
|
|
|
* A call to this helper is susceptible to change the underlying
|
|
|
|
|
* packet buffer. Therefore, at load time, all checks on pointers
|
|
|
|
|
@ -2637,7 +2662,6 @@ union bpf_attr {
|
|
|
|
|
*
|
|
|
|
|
* *th* points to the start of the TCP header, while *th_len*
|
|
|
|
|
* contains **sizeof**\ (**struct tcphdr**).
|
|
|
|
|
*
|
|
|
|
|
* Return
|
|
|
|
|
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
|
|
|
|
|
* error otherwise.
|
|
|
|
|
@ -2820,7 +2844,6 @@ union bpf_attr {
|
|
|
|
|
*
|
|
|
|
|
* *th* points to the start of the TCP header, while *th_len*
|
|
|
|
|
* contains the length of the TCP header.
|
|
|
|
|
*
|
|
|
|
|
* Return
|
|
|
|
|
* On success, lower 32 bits hold the generated SYN cookie in
|
|
|
|
|
* followed by 16 bits which hold the MSS value for that cookie,
|
|
|
|
|
@ -2903,7 +2926,7 @@ union bpf_attr {
|
|
|
|
|
* // size, after checking its boundaries.
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* In comparison, using **bpf_probe_read_user()** helper here
|
|
|
|
|
* In comparison, using **bpf_probe_read_user**\ () helper here
|
|
|
|
|
* instead to read the string would require to estimate the length
|
|
|
|
|
* at compile time, and would often result in copying more memory
|
|
|
|
|
* than necessary.
|
|
|
|
|
@ -2921,14 +2944,14 @@ union bpf_attr {
|
|
|
|
|
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
|
|
|
|
|
* Description
|
|
|
|
|
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
|
|
|
|
|
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
|
|
|
|
|
* to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
|
|
|
|
|
* Return
|
|
|
|
|
* On success, the strictly positive length of the string, including
|
|
|
|
|
* the trailing NUL character. On error, a negative value.
|
|
|
|
|
*
|
|
|
|
|
* int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
|
|
|
|
|
* Description
|
|
|
|
|
* Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
|
|
|
|
|
* Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
|
|
|
|
|
* *rcv_nxt* is the ack_seq to be sent out.
|
|
|
|
|
* Return
|
|
|
|
|
* 0 on success, or a negative error in case of failure.
|
|
|
|
|
@ -2956,7 +2979,7 @@ union bpf_attr {
|
|
|
|
|
* int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
|
|
|
|
|
* Description
|
|
|
|
|
* For an eBPF program attached to a perf event, retrieve the
|
|
|
|
|
* branch records (struct perf_branch_entry) associated to *ctx*
|
|
|
|
|
* branch records (**struct perf_branch_entry**) associated to *ctx*
|
|
|
|
|
* and store it in the buffer pointed by *buf* up to size
|
|
|
|
|
* *size* bytes.
|
|
|
|
|
* Return
|
|
|
|
|
@ -2968,7 +2991,7 @@ union bpf_attr {
|
|
|
|
|
* branch entries. If this flag is set, *buf* may be NULL.
|
|
|
|
|
*
|
|
|
|
|
* **-EINVAL** if arguments invalid or **size** not a multiple
|
|
|
|
|
* of sizeof(struct perf_branch_entry).
|
|
|
|
|
* of **sizeof**\ (**struct perf_branch_entry**\ ).
|
|
|
|
|
*
|
|
|
|
|
* **-ENOENT** if architecture does not support branch records.
|
|
|
|
|
*
|
|
|
|
|
@ -2976,8 +2999,8 @@ union bpf_attr {
|
|
|
|
|
* Description
|
|
|
|
|
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
|
|
|
|
|
* *namespace* will be returned in *nsdata*.
|
|
|
|
|
*
|
|
|
|
|
* On failure, the returned value is one of the following:
|
|
|
|
|
* Return
|
|
|
|
|
* 0 on success, or one of the following in case of failure:
|
|
|
|
|
*
|
|
|
|
|
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
|
|
|
|
|
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
|
|
|
|
|
@ -3016,8 +3039,8 @@ union bpf_attr {
|
|
|
|
|
* a global identifier that can be assumed unique. If *ctx* is
|
|
|
|
|
* NULL, then the helper returns the cookie for the initial
|
|
|
|
|
* network namespace. The cookie itself is very similar to that
|
|
|
|
|
* of bpf_get_socket_cookie() helper, but for network namespaces
|
|
|
|
|
* instead of sockets.
|
|
|
|
|
* of **bpf_get_socket_cookie**\ () helper, but for network
|
|
|
|
|
* namespaces instead of sockets.
|
|
|
|
|
* Return
|
|
|
|
|
* A 8-byte long opaque number.
|
|
|
|
|
*
|
|
|
|
|
@ -3052,22 +3075,183 @@ union bpf_attr {
|
|
|
|
|
*
|
|
|
|
|
* The *flags* argument must be zero.
|
|
|
|
|
* Return
|
|
|
|
|
* 0 on success, or a negative errno in case of failure.
|
|
|
|
|
* 0 on success, or a negative error in case of failure:
|
|
|
|
|
*
|
|
|
|
|
* * **-EINVAL** Unsupported flags specified.
|
|
|
|
|
* * **-ENOENT** Socket is unavailable for assignment.
|
|
|
|
|
* * **-ENETUNREACH** Socket is unreachable (wrong netns).
|
|
|
|
|
* * **-EOPNOTSUPP** Unsupported operation, for example a
|
|
|
|
|
* call from outside of TC ingress.
|
|
|
|
|
* * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
|
|
|
|
|
* **-EINVAL** if specified *flags* are not supported.
|
|
|
|
|
*
|
|
|
|
|
* **-ENOENT** if the socket is unavailable for assignment.
|
|
|
|
|
*
|
|
|
|
|
* **-ENETUNREACH** if the socket is unreachable (wrong netns).
|
|
|
|
|
*
|
|
|
|
|
* **-EOPNOTSUPP** if the operation is not supported, for example
|
|
|
|
|
* a call from outside of TC ingress.
|
|
|
|
|
*
|
|
|
|
|
* **-ESOCKTNOSUPPORT** if the socket type is not supported
|
|
|
|
|
* (reuseport).
|
|
|
|
|
*
|
|
|
|
|
* u64 bpf_ktime_get_boot_ns(void)
|
|
|
|
|
* Description
|
|
|
|
|
* Return the time elapsed since system boot, in nanoseconds.
|
|
|
|
|
* Does include the time the system was suspended.
|
|
|
|
|
* See: clock_gettime(CLOCK_BOOTTIME)
|
|
|
|
|
* See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
|
|
|
|
|
* Return
|
|
|
|
|
* Current *ktime*.
|
|
|
|
|
*
|
|
|
|
|
* int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
|
|
|
|
|
* Description
|
|
|
|
|
* **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
|
|
|
|
|
* out the format string.
|
|
|
|
|
* The *m* represents the seq_file. The *fmt* and *fmt_size* are for
|
|
|
|
|
* the format string itself. The *data* and *data_len* are format string
|
|
|
|
|
* arguments. The *data* are a **u64** array and corresponding format string
|
|
|
|
|
* values are stored in the array. For strings and pointers where pointees
|
|
|
|
|
* are accessed, only the pointer values are stored in the *data* array.
|
|
|
|
|
* The *data_len* is the size of *data* in bytes.
|
|
|
|
|
*
|
|
|
|
|
* Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
|
|
|
|
|
* Reading kernel memory may fail due to either invalid address or
|
|
|
|
|
* valid address but requiring a major memory fault. If reading kernel memory
|
|
|
|
|
* fails, the string for **%s** will be an empty string, and the ip
|
|
|
|
|
* address for **%p{i,I}{4,6}** will be 0. Not returning error to
|
|
|
|
|
* bpf program is consistent with what **bpf_trace_printk**\ () does for now.
|
|
|
|
|
* Return
|
|
|
|
|
* 0 on success, or a negative error in case of failure:
|
|
|
|
|
*
|
|
|
|
|
* **-EBUSY** if per-CPU memory copy buffer is busy, can try again
|
|
|
|
|
* by returning 1 from bpf program.
|
|
|
|
|
*
|
|
|
|
|
* **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
|
|
|
|
|
*
|
|
|
|
|
* **-E2BIG** if *fmt* contains too many format specifiers.
|
|
|
|
|
*
|
|
|
|
|
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
|
|
|
|
*
|
|
|
|
|
* int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
|
|
|
|
|
* Description
|
|
|
|
|
* **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
|
|
|
|
|
* The *m* represents the seq_file. The *data* and *len* represent the
|
|
|
|
|
* data to write in bytes.
|
|
|
|
|
* Return
|
|
|
|
|
* 0 on success, or a negative error in case of failure:
|
|
|
|
|
*
|
|
|
|
|
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
|
|
|
|
*
|
|
|
|
|
* u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
|
|
|
|
|
* Description
|
|
|
|
|
* Return the cgroup v2 id of the socket *sk*.
|
|
|
|
|
*
|
|
|
|
|
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
|
|
|
|
|
* returned from **bpf_sk_lookup_xxx**\ (),
|
|
|
|
|
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
|
|
|
|
|
* same as in **bpf_skb_cgroup_id**\ ().
|
|
|
|
|
*
|
|
|
|
|
* This helper is available only if the kernel was compiled with
|
|
|
|
|
* the **CONFIG_SOCK_CGROUP_DATA** configuration option.
|
|
|
|
|
* Return
|
|
|
|
|
* The id is returned or 0 in case the id could not be retrieved.
|
|
|
|
|
*
|
|
|
|
|
* u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
|
|
|
|
|
* Description
|
|
|
|
|
* Return id of cgroup v2 that is ancestor of cgroup associated
|
|
|
|
|
* with the *sk* at the *ancestor_level*. The root cgroup is at
|
|
|
|
|
* *ancestor_level* zero and each step down the hierarchy
|
|
|
|
|
* increments the level. If *ancestor_level* == level of cgroup
|
|
|
|
|
* associated with *sk*, then return value will be same as that
|
|
|
|
|
* of **bpf_sk_cgroup_id**\ ().
|
|
|
|
|
*
|
|
|
|
|
* The helper is useful to implement policies based on cgroups
|
|
|
|
|
* that are upper in hierarchy than immediate cgroup associated
|
|
|
|
|
* with *sk*.
|
|
|
|
|
*
|
|
|
|
|
* The format of returned id and helper limitations are same as in
|
|
|
|
|
* **bpf_sk_cgroup_id**\ ().
|
|
|
|
|
* Return
|
|
|
|
|
* The id is returned or 0 in case the id could not be retrieved.
|
|
|
|
|
*
|
|
|
|
|
* void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
|
|
|
|
|
* Description
|
|
|
|
|
* Copy *size* bytes from *data* into a ring buffer *ringbuf*.
|
|
|
|
|
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
|
|
|
|
|
* new data availability is sent.
|
|
|
|
|
* IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
|
|
|
|
|
* new data availability is sent unconditionally.
|
|
|
|
|
* Return
|
|
|
|
|
* 0, on success;
|
|
|
|
|
* < 0, on error.
|
|
|
|
|
*
|
|
|
|
|
* void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
|
|
|
|
|
* Description
|
|
|
|
|
* Reserve *size* bytes of payload in a ring buffer *ringbuf*.
|
|
|
|
|
* Return
|
|
|
|
|
* Valid pointer with *size* bytes of memory available; NULL,
|
|
|
|
|
* otherwise.
|
|
|
|
|
*
|
|
|
|
|
* void bpf_ringbuf_submit(void *data, u64 flags)
|
|
|
|
|
* Description
|
|
|
|
|
* Submit reserved ring buffer sample, pointed to by *data*.
|
|
|
|
|
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
|
|
|
|
|
* new data availability is sent.
|
|
|
|
|
* IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
|
|
|
|
|
* new data availability is sent unconditionally.
|
|
|
|
|
* Return
|
|
|
|
|
* Nothing. Always succeeds.
|
|
|
|
|
*
|
|
|
|
|
* void bpf_ringbuf_discard(void *data, u64 flags)
|
|
|
|
|
* Description
|
|
|
|
|
* Discard reserved ring buffer sample, pointed to by *data*.
|
|
|
|
|
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
|
|
|
|
|
* new data availability is sent.
|
|
|
|
|
* IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
|
|
|
|
|
* new data availability is sent unconditionally.
|
|
|
|
|
* Return
|
|
|
|
|
* Nothing. Always succeeds.
|
|
|
|
|
*
|
|
|
|
|
* u64 bpf_ringbuf_query(void *ringbuf, u64 flags)
|
|
|
|
|
* Description
|
|
|
|
|
* Query various characteristics of provided ring buffer. What
|
|
|
|
|
* exactly is queries is determined by *flags*:
|
|
|
|
|
* - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
|
|
|
|
|
* - BPF_RB_RING_SIZE - the size of ring buffer;
|
|
|
|
|
* - BPF_RB_CONS_POS - consumer position (can wrap around);
|
|
|
|
|
* - BPF_RB_PROD_POS - producer(s) position (can wrap around);
|
|
|
|
|
* Data returned is just a momentary snapshots of actual values
|
|
|
|
|
* and could be inaccurate, so this facility should be used to
|
|
|
|
|
* power heuristics and for reporting, not to make 100% correct
|
|
|
|
|
* calculation.
|
|
|
|
|
* Return
|
|
|
|
|
* Requested value, or 0, if flags are not recognized.
|
|
|
|
|
*
|
|
|
|
|
* int bpf_csum_level(struct sk_buff *skb, u64 level)
|
|
|
|
|
* Description
|
|
|
|
|
* Change the skbs checksum level by one layer up or down, or
|
|
|
|
|
* reset it entirely to none in order to have the stack perform
|
|
|
|
|
* checksum validation. The level is applicable to the following
|
|
|
|
|
* protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
|
|
|
|
|
* | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
|
|
|
|
|
* through **bpf_skb_adjust_room**\ () helper with passing in
|
|
|
|
|
* **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
|
|
|
|
|
* to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
|
|
|
|
|
* the UDP header is removed. Similarly, an encap of the latter
|
|
|
|
|
* into the former could be accompanied by a helper call to
|
|
|
|
|
* **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
|
|
|
|
|
* skb is still intended to be processed in higher layers of the
|
|
|
|
|
* stack instead of just egressing at tc.
|
|
|
|
|
*
|
|
|
|
|
* There are three supported level settings at this time:
|
|
|
|
|
*
|
|
|
|
|
* * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
|
|
|
|
|
* with CHECKSUM_UNNECESSARY.
|
|
|
|
|
* * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
|
|
|
|
|
* with CHECKSUM_UNNECESSARY.
|
|
|
|
|
* * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
|
|
|
|
|
* sets CHECKSUM_NONE to force checksum validation by the stack.
|
|
|
|
|
* * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
|
|
|
|
|
* skb->csum_level.
|
|
|
|
|
* Return
|
|
|
|
|
* 0 on success, or a negative error in case of failure. In the
|
|
|
|
|
* case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
|
|
|
|
|
* is returned or the error code -EACCES in case the skb is not
|
|
|
|
|
* subject to CHECKSUM_UNNECESSARY.
|
|
|
|
|
*/
|
|
|
|
|
#define __BPF_FUNC_MAPPER(FN) \
|
|
|
|
|
FN(unspec), \
|
|
|
|
|
@ -3195,7 +3379,17 @@ union bpf_attr {
|
|
|
|
|
FN(get_netns_cookie), \
|
|
|
|
|
FN(get_current_ancestor_cgroup_id), \
|
|
|
|
|
FN(sk_assign), \
|
|
|
|
|
FN(ktime_get_boot_ns),
|
|
|
|
|
FN(ktime_get_boot_ns), \
|
|
|
|
|
FN(seq_printf), \
|
|
|
|
|
FN(seq_write), \
|
|
|
|
|
FN(sk_cgroup_id), \
|
|
|
|
|
FN(sk_ancestor_cgroup_id), \
|
|
|
|
|
FN(ringbuf_output), \
|
|
|
|
|
FN(ringbuf_reserve), \
|
|
|
|
|
FN(ringbuf_submit), \
|
|
|
|
|
FN(ringbuf_discard), \
|
|
|
|
|
FN(ringbuf_query), \
|
|
|
|
|
FN(csum_level),
|
|
|
|
|
|
|
|
|
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
|
|
|
|
* function eBPF program intends to call
|
|
|
|
|
@ -3272,6 +3466,14 @@ enum {
|
|
|
|
|
BPF_F_CURRENT_NETNS = (-1L),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* BPF_FUNC_csum_level level values. */
|
|
|
|
|
enum {
|
|
|
|
|
BPF_CSUM_LEVEL_QUERY,
|
|
|
|
|
BPF_CSUM_LEVEL_INC,
|
|
|
|
|
BPF_CSUM_LEVEL_DEC,
|
|
|
|
|
BPF_CSUM_LEVEL_RESET,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* BPF_FUNC_skb_adjust_room flags. */
|
|
|
|
|
enum {
|
|
|
|
|
BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
|
|
|
|
|
@ -3279,6 +3481,7 @@ enum {
|
|
|
|
|
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
|
|
|
|
|
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
|
|
|
|
|
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
|
|
|
|
|
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
@ -3305,6 +3508,29 @@ enum {
|
|
|
|
|
BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and
|
|
|
|
|
* BPF_FUNC_bpf_ringbuf_output flags.
|
|
|
|
|
*/
|
|
|
|
|
enum {
|
|
|
|
|
BPF_RB_NO_WAKEUP = (1ULL << 0),
|
|
|
|
|
BPF_RB_FORCE_WAKEUP = (1ULL << 1),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* BPF_FUNC_bpf_ringbuf_query flags */
|
|
|
|
|
enum {
|
|
|
|
|
BPF_RB_AVAIL_DATA = 0,
|
|
|
|
|
BPF_RB_RING_SIZE = 1,
|
|
|
|
|
BPF_RB_CONS_POS = 2,
|
|
|
|
|
BPF_RB_PROD_POS = 3,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* BPF ring buffer constants */
|
|
|
|
|
enum {
|
|
|
|
|
BPF_RINGBUF_BUSY_BIT = (1U << 31),
|
|
|
|
|
BPF_RINGBUF_DISCARD_BIT = (1U << 30),
|
|
|
|
|
BPF_RINGBUF_HDR_SZ = 8,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Mode for BPF_FUNC_skb_adjust_room helper. */
|
|
|
|
|
enum bpf_adj_room_mode {
|
|
|
|
|
BPF_ADJ_ROOM_NET,
|
|
|
|
|
@ -3437,6 +3663,7 @@ struct bpf_sock {
|
|
|
|
|
__u32 dst_ip4;
|
|
|
|
|
__u32 dst_ip6[4];
|
|
|
|
|
__u32 state;
|
|
|
|
|
__s32 rx_queue_mapping;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct bpf_tcp_sock {
|
|
|
|
|
@ -3530,6 +3757,8 @@ struct xdp_md {
|
|
|
|
|
/* Below access go through struct xdp_rxq_info */
|
|
|
|
|
__u32 ingress_ifindex; /* rxq->dev->ifindex */
|
|
|
|
|
__u32 rx_queue_index; /* rxq->queue_index */
|
|
|
|
|
|
|
|
|
|
__u32 egress_ifindex; /* txq->dev->ifindex */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum sk_action {
|
|
|
|
|
@ -3552,6 +3781,8 @@ struct sk_msg_md {
|
|
|
|
|
__u32 remote_port; /* Stored in network byte order */
|
|
|
|
|
__u32 local_port; /* stored in host byte order */
|
|
|
|
|
__u32 size; /* Total size of sk_msg */
|
|
|
|
|
|
|
|
|
|
__bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct sk_reuseport_md {
|
|
|
|
|
@ -3658,6 +3889,10 @@ struct bpf_link_info {
|
|
|
|
|
__u64 cgroup_id;
|
|
|
|
|
__u32 attach_type;
|
|
|
|
|
} cgroup;
|
|
|
|
|
struct {
|
|
|
|
|
__u32 netns_ino;
|
|
|
|
|
__u32 attach_type;
|
|
|
|
|
} netns;
|
|
|
|
|
};
|
|
|
|
|
} __attribute__((aligned(8)));
|
|
|
|
|
|
|
|
|
|
@ -3673,7 +3908,7 @@ struct bpf_sock_addr {
|
|
|
|
|
__u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
|
|
|
|
|
* Stored in network byte order.
|
|
|
|
|
*/
|
|
|
|
|
__u32 user_port; /* Allows 4-byte read and write.
|
|
|
|
|
__u32 user_port; /* Allows 1,2,4-byte read and 4-byte write.
|
|
|
|
|
* Stored in network byte order
|
|
|
|
|
*/
|
|
|
|
|
__u32 family; /* Allows 4-byte read, but no write */
|
|
|
|
|
|