Merge branch 'main' into next

Signed-off-by: David Ahern <dsahern@kernel.org>
This commit is contained in:
David Ahern 2021-03-01 00:07:57 +00:00
commit 455c9f5361
30 changed files with 549 additions and 205 deletions

View File

@ -13,6 +13,7 @@ DCBOBJ = dcb.o \
dcb_maxrate.o \
dcb_pfc.o
TARGETS += dcb
LDLIBS += -lm
endif

View File

@ -15,6 +15,9 @@
#include "json_writer.h"
#include "color.h"
#define _IS_JSON_CONTEXT(type) (is_json_context() && (type & PRINT_JSON || type & PRINT_ANY))
#define _IS_FP_CONTEXT(type) (!is_json_context() && (type & PRINT_FP || type & PRINT_ANY))
json_writer_t *get_json_writer(void);
/*

View File

@ -1656,22 +1656,30 @@ union bpf_attr {
* networking traffic statistics as it provides a global socket
* identifier that can be assumed unique.
* Return
* A 8-byte long non-decreasing number on success, or 0 if the
* socket field is missing inside *skb*.
* A 8-byte long unique number on success, or 0 if the socket
* field is missing inside *skb*.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
* *skb*, but gets socket from **struct bpf_sock_addr** context.
* Return
* A 8-byte long non-decreasing number.
* A 8-byte long unique number.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
* Description
* Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
* *skb*, but gets socket from **struct bpf_sock_ops** context.
* Return
* A 8-byte long non-decreasing number.
* A 8-byte long unique number.
*
* u64 bpf_get_socket_cookie(struct sock *sk)
* Description
* Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
* *sk*, but gets socket from a BTF **struct sock**. This helper
* also works for sleepable programs.
* Return
* A 8-byte long unique number or 0 if *sk* is NULL.
*
* u32 bpf_get_socket_uid(struct sk_buff *skb)
* Return
@ -2231,6 +2239,9 @@ union bpf_attr {
* * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
* packet is not forwarded or needs assist from full stack
*
* If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU
* was exceeded and output params->mtu_result contains the MTU.
*
* long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a sockhash *map* referencing sockets.
@ -3836,6 +3847,69 @@ union bpf_attr {
* Return
* A pointer to a struct socket on success or NULL if the file is
* not a socket.
*
* long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
* Description
* Check ctx packet size against exceeding MTU of net device (based
* on *ifindex*). This helper will likely be used in combination
* with helpers that adjust/change the packet size.
*
* The argument *len_diff* can be used for querying with a planned
* size change. This allows to check MTU prior to changing packet
* ctx. Providing an *len_diff* adjustment that is larger than the
* actual packet size (resulting in negative packet size) will in
* principle not exceed the MTU, why it is not considered a
* failure. Other BPF-helpers are needed for performing the
* planned size change, why the responsability for catch a negative
* packet size belong in those helpers.
*
* Specifying *ifindex* zero means the MTU check is performed
* against the current net device. This is practical if this isn't
* used prior to redirect.
*
* The Linux kernel route table can configure MTUs on a more
* specific per route level, which is not provided by this helper.
* For route level MTU checks use the **bpf_fib_lookup**\ ()
* helper.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** for tc cls_act programs.
*
* The *flags* argument can be a combination of one or more of the
* following values:
*
* **BPF_MTU_CHK_SEGS**
* This flag will only works for *ctx* **struct sk_buff**.
* If packet context contains extra packet segment buffers
* (often knows as GSO skb), then MTU check is harder to
* check at this point, because in transmit path it is
* possible for the skb packet to get re-segmented
* (depending on net device features). This could still be
* a MTU violation, so this flag enables performing MTU
* check against segments, with a different violation
* return code to tell it apart. Check cannot use len_diff.
*
* On return *mtu_len* pointer contains the MTU value of the net
* device. Remember the net device configured MTU is the L3 size,
* which is returned here and XDP and TX length operate at L2.
* Helper take this into account for you, but remember when using
* MTU value in your BPF-code. On input *mtu_len* must be a valid
* pointer and be initialized (to zero), else verifier will reject
* BPF program.
*
* Return
* * 0 on success, and populate MTU value in *mtu_len* pointer.
*
* * < 0 if any input argument is invalid (*mtu_len* not updated)
*
* MTU violations return positive values, but also populate MTU
* value in *mtu_len* pointer, as this can be needed for
* implementing PMTU handing:
*
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -4001,6 +4075,7 @@ union bpf_attr {
FN(ktime_get_coarse_ns), \
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@ -4501,6 +4576,7 @@ struct bpf_prog_info {
__aligned_u64 prog_tags;
__u64 run_time_ns;
__u64 run_cnt;
__u64 recursion_misses;
} __attribute__((aligned(8)));
struct bpf_map_info {
@ -4981,9 +5057,13 @@ struct bpf_fib_lookup {
__be16 sport;
__be16 dport;
/* total length of packet from network header - used for MTU check */
__u16 tot_len;
union { /* used for MTU check */
/* input to lookup */
__u16 tot_len; /* L3 length from network hdr (iph->tot_len) */
/* output: MTU value */
__u16 mtu_result;
};
/* input: L3 device index for lookup
* output: device index from FIB lookup
*/
@ -5029,6 +5109,17 @@ struct bpf_redir_neigh {
};
};
/* bpf_check_mtu flags*/
enum bpf_check_mtu_flags {
BPF_MTU_CHK_SEGS = (1U << 0),
};
enum bpf_check_mtu_ret {
BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */
BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */
BPF_MTU_CHK_RET_SEGS_TOOBIG, /* GSO re-segmentation needed to fwd */
};
enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */

View File

@ -103,6 +103,8 @@ struct mptcp_info {
__u64 mptcpi_write_seq;
__u64 mptcpi_snd_una;
__u64 mptcpi_rcv_nxt;
__u8 mptcpi_local_addr_used;
__u8 mptcpi_local_addr_max;
};
/*

View File

@ -593,6 +593,7 @@ enum {
TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */
TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */
TCA_FLOWER_KEY_CT_FLAGS_REPLY = 1 << 5, /* Packet is in the reply direction. */
__TCA_FLOWER_KEY_CT_FLAGS_MAX,
};
enum {

View File

@ -1 +1 @@
static const char version[] = "5.10.0";
static const char version[] = "5.11.0";

View File

@ -922,6 +922,7 @@ int print_linkinfo(struct nlmsghdr *n, void *arg)
const char *name;
unsigned int m_flag = 0;
SPRINT_BUF(b1);
bool truncated_vfs = false;
if (n->nlmsg_type != RTM_NEWLINK && n->nlmsg_type != RTM_DELLINK)
return 0;
@ -1199,15 +1200,18 @@ int print_linkinfo(struct nlmsghdr *n, void *arg)
if ((do_link || show_details) && tb[IFLA_VFINFO_LIST] && tb[IFLA_NUM_VF]) {
struct rtattr *i, *vflist = tb[IFLA_VFINFO_LIST];
int rem = RTA_PAYLOAD(vflist);
int rem = RTA_PAYLOAD(vflist), count = 0;
open_json_array(PRINT_JSON, "vfinfo_list");
for (i = RTA_DATA(vflist); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
open_json_object(NULL);
print_vfinfo(fp, ifi, i);
close_json_object();
count++;
}
close_json_array(PRINT_JSON, NULL);
if (count != rta_getattr_u32(tb[IFLA_NUM_VF]))
truncated_vfs = true;
}
if (tb[IFLA_PROP_LIST]) {
@ -1228,6 +1232,9 @@ int print_linkinfo(struct nlmsghdr *n, void *arg)
print_string(PRINT_FP, NULL, "%s", "\n");
fflush(fp);
/* prettier here if stderr and stdout go to the same place */
if (truncated_vfs)
fprintf(stderr, "Truncated VF list: %s\n", name);
return 1;
}

View File

@ -22,9 +22,11 @@ static void print_explain(FILE *f)
" [ srcportmin PORT ]\n"
" [ [no]multiproto ]\n"
"\n"
"Where: PORT := 0-65535\n"
" PROTO := NUMBER | ip | mpls\n"
" SRCPORTMIN := 0-65535\n"
"Where: PORT := UDP_PORT\n"
" PROTO := ETHERTYPE\n"
"\n"
"Note: ETHERTYPE can be given as number or as protocol name (\"ipv4\", \"ipv6\",\n"
" \"mpls_uc\", etc.).\n"
);
}

View File

@ -2071,7 +2071,18 @@ static int iproute_get(int argc, char **argv)
if (addr.bytelen)
addattr_l(&req.n, sizeof(req),
RTA_DST, &addr.data, addr.bytelen);
req.r.rtm_dst_len = addr.bitlen;
if (req.r.rtm_family == AF_INET && addr.bitlen != 32) {
fprintf(stderr,
"Warning: /%u as prefix is invalid, only /32 (or none) is supported.\n",
addr.bitlen);
req.r.rtm_dst_len = 32;
} else if (req.r.rtm_family == AF_INET6 && addr.bitlen != 128) {
fprintf(stderr,
"Warning: /%u as prefix is invalid, only /128 (or none) is supported.\n",
addr.bitlen);
req.r.rtm_dst_len = 128;
} else
req.r.rtm_dst_len = addr.bitlen;
address_found = true;
}
argc--; argv++;

View File

@ -891,13 +891,15 @@ static int parse_encap_seg6local(struct rtattr *rta, size_t len, int *argcp,
NEXT_ARG();
if (table_ok++)
duparg2("table", *argv);
rtnl_rttable_a2n(&table, *argv);
if (rtnl_rttable_a2n(&table, *argv))
invarg("invalid table id\n", *argv);
ret = rta_addattr32(rta, len, SEG6_LOCAL_TABLE, table);
} else if (strcmp(*argv, "vrftable") == 0) {
NEXT_ARG();
if (vrftable_ok++)
duparg2("vrftable", *argv);
rtnl_rttable_a2n(&vrftable, *argv);
if (rtnl_rttable_a2n(&vrftable, *argv))
invarg("invalid vrf table id\n", *argv);
ret = rta_addattr32(rta, len, SEG6_LOCAL_VRFTABLE,
vrftable);
} else if (strcmp(*argv, "nh4") == 0) {

View File

@ -278,8 +278,8 @@ static int vrf_configure_cgroup(const char *path, int ifindex)
*/
prog_fd = prog_load(ifindex);
if (prog_fd < 0) {
fprintf(stderr, "Failed to load BPF prog: '%s'\n",
strerror(errno));
fprintf(stderr, "Failed to load BPF prog: '%s'\n%s",
strerror(errno), bpf_log_buf);
if (errno != EPERM) {
fprintf(stderr,

View File

@ -3,8 +3,8 @@ include ../config.mk
CFLAGS += -fPIC
UTILOBJ = utils.o rt_names.o ll_map.o ll_types.o ll_proto.o ll_addr.o \
inet_proto.o namespace.o json_writer.o json_print.o \
UTILOBJ = utils.o utils_math.o rt_names.o ll_map.o ll_types.o ll_proto.o ll_addr.o \
inet_proto.o namespace.o json_writer.o json_print.o json_print_math.o \
names.o color.o bpf_legacy.o bpf_glue.o exec.o fs.o cg_map.o
ifeq ($(HAVE_ELF),y)

View File

@ -14,7 +14,8 @@ int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns,
size_t size_log)
{
#ifdef HAVE_LIBBPF
return bpf_load_program(type, insns, size_insns, license, 0, log, size_log);
return bpf_load_program(type, insns, size_insns / sizeof(struct bpf_insn),
license, 0, log, size_log);
#else
return bpf_prog_load_dev(type, insns, size_insns, license, 0, log, size_log);
#endif

View File

@ -510,20 +510,14 @@ static int bpf_mnt_fs(const char *target)
static int bpf_mnt_check_target(const char *target)
{
struct stat sb = {};
int ret;
ret = stat(target, &sb);
if (ret) {
ret = mkdir(target, S_IRWXU);
if (ret) {
fprintf(stderr, "mkdir %s failed: %s\n", target,
strerror(errno));
return ret;
}
}
ret = mkdir(target, S_IRWXU);
if (ret && errno != EEXIST)
fprintf(stderr, "mkdir %s failed: %s\n", target,
strerror(errno));
return 0;
return ret;
}
static int bpf_valid_mntpt(const char *mnt, unsigned long magic)

View File

@ -157,7 +157,8 @@ __u64 get_cgroup2_id(const char *path)
memcpy(cg_id.bytes, fhp->f_handle, sizeof(__u64));
out:
close(mnt_fd);
if (mnt_fd >= 0)
close(mnt_fd);
free(mnt);
return cg_id.id;
@ -179,16 +180,16 @@ char *get_cgroup2_path(__u64 id, bool full)
char *path = NULL;
char fd_path[64];
int link_len;
char *mnt;
char *mnt = NULL;
if (!id) {
fprintf(stderr, "Invalid cgroup2 ID\n");
return NULL;
goto out;
}
mnt = find_cgroup2_mount(false);
if (!mnt)
return NULL;
goto out;
mnt_fd = open(mnt, O_RDONLY);
if (mnt_fd < 0) {
@ -225,8 +226,10 @@ char *get_cgroup2_path(__u64 id, bool full)
"Failed to allocate memory for cgroup2 path\n");
out:
close(fd);
close(mnt_fd);
if (fd >= 0)
close(fd);
if (mnt_fd >= 0)
close(mnt_fd);
free(mnt);
return path;
@ -253,7 +256,7 @@ int make_path(const char *path, mode_t mode)
*delim = '\0';
rc = mkdir(dir, mode);
if (mkdir(dir, mode) != 0 && errno != EEXIST) {
if (rc && errno != EEXIST) {
fprintf(stderr, "mkdir failed for %s: %s\n",
dir, strerror(errno));
goto out;

View File

@ -11,16 +11,12 @@
#include <stdarg.h>
#include <stdio.h>
#include <math.h>
#include "utils.h"
#include "json_print.h"
static json_writer_t *_jw;
#define _IS_JSON_CONTEXT(type) ((type & PRINT_JSON || type & PRINT_ANY) && _jw)
#define _IS_FP_CONTEXT(type) (!_jw && (type & PRINT_FP || type & PRINT_ANY))
static void __new_json_obj(int json, bool have_array)
{
if (json) {
@ -342,32 +338,3 @@ int print_color_rate(bool use_iec, enum output_type type, enum color_attr color,
free(buf);
return rc;
}
char *sprint_size(__u32 sz, char *buf)
{
long kilo = 1024;
long mega = kilo * kilo;
size_t len = SPRINT_BSIZE - 1;
double tmp = sz;
if (sz >= mega && fabs(mega * rint(tmp / mega) - sz) < 1024)
snprintf(buf, len, "%gMb", rint(tmp / mega));
else if (sz >= kilo && fabs(kilo * rint(tmp / kilo) - sz) < 16)
snprintf(buf, len, "%gKb", rint(tmp / kilo));
else
snprintf(buf, len, "%ub", sz);
return buf;
}
int print_color_size(enum output_type type, enum color_attr color,
const char *key, const char *fmt, __u32 sz)
{
SPRINT_BUF(buf);
if (_IS_JSON_CONTEXT(type))
return print_color_uint(type, color, key, "%u", sz);
sprint_size(sz, buf);
return print_color_string(type, color, key, fmt, buf);
}

37
lib/json_print_math.c Normal file
View File

@ -0,0 +1,37 @@
// SPDX-License-Identifier: GPL-2.0+
#include <stdarg.h>
#include <stdio.h>
#include <math.h>
#include "utils.h"
#include "json_print.h"
char *sprint_size(__u32 sz, char *buf)
{
long kilo = 1024;
long mega = kilo * kilo;
size_t len = SPRINT_BSIZE - 1;
double tmp = sz;
if (sz >= mega && fabs(mega * rint(tmp / mega) - sz) < 1024)
snprintf(buf, len, "%gMb", rint(tmp / mega));
else if (sz >= kilo && fabs(kilo * rint(tmp / kilo) - sz) < 16)
snprintf(buf, len, "%gKb", rint(tmp / kilo));
else
snprintf(buf, len, "%ub", sz);
return buf;
}
int print_color_size(enum output_type type, enum color_attr color,
const char *key, const char *fmt, __u32 sz)
{
SPRINT_BUF(buf);
if (_IS_JSON_CONTEXT(type))
return print_color_uint(type, color, key, "%u", sz);
sprint_size(sz, buf);
return print_color_string(type, color, key, fmt, buf);
}

View File

@ -122,8 +122,14 @@ int netns_foreach(int (*func)(char *nsname, void *arg), void *arg)
struct dirent *entry;
dir = opendir(NETNS_RUN_DIR);
if (!dir)
if (!dir) {
if (errno == ENOENT)
return 0;
fprintf(stderr, "Failed to open directory %s: %s\n",
NETNS_RUN_DIR, strerror(errno));
return -1;
}
while ((entry = readdir(dir)) != NULL) {
if (strcmp(entry->d_name, ".") == 0)

View File

@ -513,120 +513,6 @@ int get_addr64(__u64 *ap, const char *cp)
return 1;
}
/* See http://physics.nist.gov/cuu/Units/binary.html */
static const struct rate_suffix {
const char *name;
double scale;
} suffixes[] = {
{ "bit", 1. },
{ "Kibit", 1024. },
{ "kbit", 1000. },
{ "mibit", 1024.*1024. },
{ "mbit", 1000000. },
{ "gibit", 1024.*1024.*1024. },
{ "gbit", 1000000000. },
{ "tibit", 1024.*1024.*1024.*1024. },
{ "tbit", 1000000000000. },
{ "Bps", 8. },
{ "KiBps", 8.*1024. },
{ "KBps", 8000. },
{ "MiBps", 8.*1024*1024. },
{ "MBps", 8000000. },
{ "GiBps", 8.*1024.*1024.*1024. },
{ "GBps", 8000000000. },
{ "TiBps", 8.*1024.*1024.*1024.*1024. },
{ "TBps", 8000000000000. },
{ NULL }
};
int get_rate(unsigned int *rate, const char *str)
{
char *p;
double bps = strtod(str, &p);
const struct rate_suffix *s;
if (p == str)
return -1;
for (s = suffixes; s->name; ++s) {
if (strcasecmp(s->name, p) == 0) {
bps *= s->scale;
p += strlen(p);
break;
}
}
if (*p)
return -1; /* unknown suffix */
bps /= 8; /* -> bytes per second */
*rate = bps;
/* detect if an overflow happened */
if (*rate != floor(bps))
return -1;
return 0;
}
int get_rate64(__u64 *rate, const char *str)
{
char *p;
double bps = strtod(str, &p);
const struct rate_suffix *s;
if (p == str)
return -1;
for (s = suffixes; s->name; ++s) {
if (strcasecmp(s->name, p) == 0) {
bps *= s->scale;
p += strlen(p);
break;
}
}
if (*p)
return -1; /* unknown suffix */
bps /= 8; /* -> bytes per second */
*rate = bps;
return 0;
}
int get_size(unsigned int *size, const char *str)
{
double sz;
char *p;
sz = strtod(str, &p);
if (p == str)
return -1;
if (*p) {
if (strcasecmp(p, "kb") == 0 || strcasecmp(p, "k") == 0)
sz *= 1024;
else if (strcasecmp(p, "gb") == 0 || strcasecmp(p, "g") == 0)
sz *= 1024*1024*1024;
else if (strcasecmp(p, "gbit") == 0)
sz *= 1024*1024*1024/8;
else if (strcasecmp(p, "mb") == 0 || strcasecmp(p, "m") == 0)
sz *= 1024*1024;
else if (strcasecmp(p, "mbit") == 0)
sz *= 1024*1024/8;
else if (strcasecmp(p, "kbit") == 0)
sz *= 1024/8;
else if (strcasecmp(p, "b") != 0)
return -1;
}
*size = sz;
/* detect if an overflow happened */
if (*size != floor(sz))
return -1;
return 0;
}
static void set_address_type(inet_prefix *addr)
{
switch (addr->family) {

123
lib/utils_math.c Normal file
View File

@ -0,0 +1,123 @@
// SPDX-License-Identifier: GPL-2.0+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <asm/types.h>
#include "utils.h"
/* See http://physics.nist.gov/cuu/Units/binary.html */
static const struct rate_suffix {
const char *name;
double scale;
} suffixes[] = {
{ "bit", 1. },
{ "Kibit", 1024. },
{ "kbit", 1000. },
{ "mibit", 1024.*1024. },
{ "mbit", 1000000. },
{ "gibit", 1024.*1024.*1024. },
{ "gbit", 1000000000. },
{ "tibit", 1024.*1024.*1024.*1024. },
{ "tbit", 1000000000000. },
{ "Bps", 8. },
{ "KiBps", 8.*1024. },
{ "KBps", 8000. },
{ "MiBps", 8.*1024*1024. },
{ "MBps", 8000000. },
{ "GiBps", 8.*1024.*1024.*1024. },
{ "GBps", 8000000000. },
{ "TiBps", 8.*1024.*1024.*1024.*1024. },
{ "TBps", 8000000000000. },
{ NULL }
};
int get_rate(unsigned int *rate, const char *str)
{
char *p;
double bps = strtod(str, &p);
const struct rate_suffix *s;
if (p == str)
return -1;
for (s = suffixes; s->name; ++s) {
if (strcasecmp(s->name, p) == 0) {
bps *= s->scale;
p += strlen(p);
break;
}
}
if (*p)
return -1; /* unknown suffix */
bps /= 8; /* -> bytes per second */
*rate = bps;
/* detect if an overflow happened */
if (*rate != floor(bps))
return -1;
return 0;
}
int get_rate64(__u64 *rate, const char *str)
{
char *p;
double bps = strtod(str, &p);
const struct rate_suffix *s;
if (p == str)
return -1;
for (s = suffixes; s->name; ++s) {
if (strcasecmp(s->name, p) == 0) {
bps *= s->scale;
p += strlen(p);
break;
}
}
if (*p)
return -1; /* unknown suffix */
bps /= 8; /* -> bytes per second */
*rate = bps;
return 0;
}
int get_size(unsigned int *size, const char *str)
{
double sz;
char *p;
sz = strtod(str, &p);
if (p == str)
return -1;
if (*p) {
if (strcasecmp(p, "kb") == 0 || strcasecmp(p, "k") == 0)
sz *= 1024;
else if (strcasecmp(p, "gb") == 0 || strcasecmp(p, "g") == 0)
sz *= 1024*1024*1024;
else if (strcasecmp(p, "gbit") == 0)
sz *= 1024*1024*1024/8;
else if (strcasecmp(p, "mb") == 0 || strcasecmp(p, "m") == 0)
sz *= 1024*1024;
else if (strcasecmp(p, "mbit") == 0)
sz *= 1024*1024/8;
else if (strcasecmp(p, "kbit") == 0)
sz *= 1024/8;
else if (strcasecmp(p, "b") != 0)
return -1;
}
*size = sz;
/* detect if an overflow happened */
if (*size != floor(sz))
return -1;
return 0;
}

View File

@ -397,7 +397,8 @@ bridge FDB.
.TP
.BR "flood on " or " flood off "
Controls whether a given port will flood unicast traffic for which there is no FDB entry. By default this flag is on.
Controls whether unicast traffic for which there is no FDB entry will be
flooded towards this given port. By default this flag is on.
.TP
.B hwmode
@ -413,8 +414,8 @@ switch.
.TP
.BR "mcast_flood on " or " mcast_flood off "
Controls whether a given port will flood multicast traffic for which
there is no MDB entry. By default this flag is on.
Controls whether multicast traffic for which there is no MDB entry will be
flooded towards this given port. By default this flag is on.
.TP
.BR "mcast_to_unicast on " or " mcast_to_unicast off "
@ -514,7 +515,14 @@ the Ethernet MAC address.
the interface to which this address is associated.
.B local
- is a local permanent fdb entry
- is a local permanent fdb entry, which means that the bridge will not forward
frames with this destination MAC address and VLAN ID, but terminate them
locally. This flag is default unless "static" or "dynamic" are explicitly
specified.
.sp
.B permanent
- this is a synonym for "local"
.sp
.B static
@ -526,11 +534,21 @@ the interface to which this address is associated.
.sp
.B self
- the address is associated with the port drivers fdb. Usually hardware.
- the operation is fulfilled directly by the driver for the specified network
device. If the network device belongs to a master like a bridge, then the
bridge is bypassed and not notified of this operation (and if the device does
notify the bridge, it is driver-specific behavior and not mandated by this
flag, check the driver for more details). The "bridge fdb add" command can also
be used on the bridge device itself, and in this case, the added fdb entries
will be locally terminated (not forwarded). In the latter case, the "self" flag
is mandatory. The flag is set by default if "master" is not specified.
.sp
.B master
- the address is associated with master devices fdb. Usually software (default).
- if the specified network device is a port that belongs to a master device
such as a bridge, the operation is fulfilled by the master device's driver,
which may in turn notify the port driver too of the address. If the specified
device is a master itself, such as a bridge, this flag is invalid.
.sp
.B router

View File

@ -1307,9 +1307,9 @@ For a link of type
the following additional arguments are supported:
.BI "ip link add " DEVICE
.BI type " bareudp " dstport " PORT " ethertype " ETHERTYPE"
.BI type " bareudp " dstport " PORT " ethertype " PROTO"
[
.BI srcportmin " SRCPORTMIN "
.BI srcportmin " PORT "
] [
.RB [ no ] multiproto
]
@ -1320,11 +1320,14 @@ the following additional arguments are supported:
- specifies the destination port for the UDP tunnel.
.sp
.BI ethertype " ETHERTYPE"
.BI ethertype " PROTO"
- specifies the ethertype of the L3 protocol being tunnelled.
.B ethertype
can be given as plain Ethernet protocol number or using the protocol name
("ipv4", "ipv6", "mpls_uc", etc.).
.sp
.BI srcportmin " SRCPORTMIN"
.BI srcportmin " PORT"
- selects the lowest value of the UDP tunnel source port range.
.sp
@ -1332,11 +1335,11 @@ the following additional arguments are supported:
- activates support for protocols similar to the one
.RB "specified by " ethertype .
When
.I ETHERTYPE
.B ethertype
is "mpls_uc" (that is, unicast MPLS), this allows the tunnel to also handle
multicast MPLS.
When
.I ETHERTYPE
.B ethertype
is "ipv4", this allows the tunnel to also handle IPv6. This option is disabled
by default.

View File

@ -440,6 +440,113 @@ states except for
- opposite to
.B bucket
.SH EXPRESSION
.B EXPRESSION
allows filtering based on specific criteria.
.B EXPRESSION
consists of a series of predicates combined by boolean operators. The possible operators in increasing
order of precedence are
.B or
(or | or ||),
.B and
(or & or &&), and
.B not
(or !). If no operator is between consecutive predicates, an implicit
.B and
operator is assumed. Subexpressions can be grouped with "(" and ")".
.P
The following predicates are supported:
.TP
.B {dst|src} [=] HOST
Test if the destination or source matches HOST. See HOST SYNTAX for details.
.TP
.B {dport|sport} [OP] [FAMILY:]:PORT
Compare the destination or source port to PORT. OP can be any of "<", "<=", "=", "!=",
">=" and ">". Following normal arithmetic rules. FAMILY and PORT are as described in
HOST SYNTAX below.
.TP
.B dev [=|!=] DEVICE
Match based on the device the connection uses. DEVICE can either be a device name or the
index of the interface.
.TP
.B fwmark [=|!=] MASK
Matches based on the fwmark value for the connection. This can either be a specific mark value
or a mark value followed by a "/" and a bitmask of which bits to use in the comparison. For example
"fwmark = 0x01/0x03" would match if the two least significant bits of the fwmark were 0x01.
.TP
.B cgroup [=|!=] PATH
Match if the connection is part of a cgroup at the given path.
.TP
.B autobound
Match if the port or path of the source address was automatically allocated
(rather than explicitly specified).
.P
Most operators have aliases. If no operator is supplied "=" is assumed.
Each of the following groups of operators are all equivalent:
.RS
.IP \(bu 2
= == eq
.IP \(bu
!= ne neq
.IP \(bu
> gt
.IP \(bu
< lt
.IP \(bu
>= ge geq
.IP \(bu
<= le leq
.IP \(bu
! not
.IP \(bu
| || or
.IP \(bu
& && and
.RE
.SH HOST SYNTAX
.P
The general host syntax is [FAMILY:]ADDRESS[:PORT].
.P
FAMILY must be one of the families supported by the -f option. If not given
it defaults to the family given with the -f option, and if that is also
missing, will assume either inet or inet6. Note that all host conditions in the
expression should either all be the same family or be only inet and inet6. If there
is some other mixture of families, the results will probably be unexpected.
.P
The form of ADDRESS and PORT depends on the family used. "*" can be used as
a wildcard for either the address or port. The details for each family are as
follows:
.TP
.B unix
ADDRESS is a glob pattern (see
.BR fnmatch (3))
that will be matched case-insensitively against the unix socket's address. Both path and abstract
names are supported. Unix addresses do not support a port, and "*" cannot be used as a wildcard.
.TP
.B link
ADDRESS is the case-insensitive name of an Ethernet protocol to match. PORT
is either a device name or a device index for the desired link device, as seen
in the output of ip link.
.TP
.B netlink
ADDRESS is a descriptor of the netlink family. Possible values come from
/etc/iproute2/nl_protos. PORT is the port id of the socket, which is usually
the same as the owning process id. The value "kernel" can be used to represent
the kernel (port id of 0).
.TP
.B vsock
ADDRESS is an integer representing the CID address, and PORT is the port.
.TP
.BR inet \ and\ inet6
ADDRESS is an ip address (either v4 or v6 depending on the family) or a DNS
hostname that resolves to an ip address of the required version. An ipv6
address must be enclosed in "[" and "]" to disambiguate the port separator. The
address may additionally have a prefix length given in CIDR notation (a slash
followed by the prefix length in bits). PORT is either the numerical
socket port, or the service name for the port to match.
.SH USAGE EXAMPLES
.TP
.B ss -t -a

View File

@ -92,7 +92,11 @@ in the schedule;
clockid
.br
Specifies the clock to be used by qdisc's internal timer for measuring
time and scheduling events.
time and scheduling events. This argument must be omitted when using the
full-offload feature (flags 0x2), since in that case, the clockid is
implicitly /dev/ptpN (where N is given by
.B ethtool -T eth0 | grep 'PTP Hardware Clock'
), and therefore not necessarily synchronized with the system's CLOCK_TAI.
.TP
sched-entry
@ -115,13 +119,27 @@ before moving to the next entry.
.TP
flags
.br
Specifies different modes for taprio. Currently, only txtime-assist is
supported which can be enabled by setting it to 0x1. In this mode, taprio will
set the transmit timestamp depending on the interval in which the packet needs
to be transmitted. It will then utililize the
This is a bit mask which specifies different modes for taprio.
.RS
.TP
.I 0x1
Enables the txtime-assist feature. In this mode, taprio will set the transmit
timestamp depending on the interval in which the packet needs to be
transmitted. It will then utililize the
.BR etf(8)
qdisc to sort and transmit the packets at the right time. The second example
can be used as a reference to configure this mode.
.TP
.I 0x2
Enables the full-offload feature. In this mode, taprio will pass the gate
control list to the NIC which will execute it cyclically in hardware.
When using full-offload, there is no need to specify the
.B clockid
argument.
The txtime-assist and full-offload features are mutually exclusive, i.e.
setting flags to 0x3 is invalid.
.RE
.TP
txtime-delay
@ -178,5 +196,28 @@ for more information about configuring the ETF qdisc.
offload delta 200000 clockid CLOCK_TAI
.EE
The following is a schedule in full offload mode. The
.B base-time
is 200 ns and the
.B cycle-time
is implicitly calculated as the sum of all
.B sched-entry
durations (i.e. 20 us + 20 us + 60 us = 100 us). Although the base-time is in
the past, the hardware will start executing the schedule at a PTP time equal to
the smallest integer multiple of 100 us, plus 200 ns, that is larger than the
NIC's current PTP time.
.EX
# tc qdisc add dev eth0 parent root taprio \\
num_tc 8 \\
map 0 1 2 3 4 5 6 7 \\
queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \\
base-time 200 \\
sched-entry S 80 20000 \\
sched-entry S a0 20000 \\
sched-entry S df 60000 \\
flags 0x2
.EE
.SH AUTHORS
Vinicius Costa Gomes <vinicius.gomes@intel.com>

View File

@ -3404,7 +3404,7 @@ static int tcpdiag_send(int fd, int protocol, struct filter *f)
struct iovec iov[3];
int iovlen = 1;
if (protocol == IPPROTO_UDP)
if (protocol == IPPROTO_UDP || protocol == IPPROTO_MPTCP)
return -1;
if (protocol == IPPROTO_TCP)
@ -3623,6 +3623,14 @@ static int inet_show_netlink(struct filter *f, FILE *dump_fp, int protocol)
if (preferred_family == PF_INET6)
family = PF_INET6;
/* extended protocol will use INET_DIAG_REQ_PROTOCOL,
* not supported by older kernels. On such kernel
* rtnl_dump will bail with rtnl_dump_error().
* Suppress the error to avoid confusing the user
*/
if (protocol > 255)
rth.flags |= RTNL_HANDLE_F_SUPPRESS_NLERR;
again:
if ((err = sockdiag_send(family, rth.fd, protocol, f)))
goto Exit;

View File

@ -85,6 +85,7 @@ struct rd_cmd {
* Parser interface
*/
bool rd_no_arg(struct rd *rd);
bool rd_is_multiarg(struct rd *rd);
void rd_arg_inc(struct rd *rd);
char *rd_argv(struct rd *rd);

View File

@ -502,6 +502,12 @@ static int stat_get_arg(struct rd *rd, const char *arg)
return -EINVAL;
rd_arg_inc(rd);
if (rd_is_multiarg(rd)) {
pr_err("The parameter %s shouldn't include range\n", arg);
return -EINVAL;
}
value = strtol(rd_argv(rd), &endp, 10);
rd_arg_inc(rd);
@ -523,6 +529,8 @@ static int stat_one_qp_bind(struct rd *rd)
return ret;
lqpn = stat_get_arg(rd, "lqpn");
if (lqpn < 0)
return lqpn;
rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_SET,
&seq, (NLM_F_REQUEST | NLM_F_ACK));
@ -537,6 +545,9 @@ static int stat_one_qp_bind(struct rd *rd)
if (rd_argc(rd)) {
cntn = stat_get_arg(rd, "cntn");
if (cntn < 0)
return cntn;
mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
cntn);
}
@ -607,13 +618,23 @@ static int stat_one_qp_unbind(struct rd *rd)
unsigned int portid;
uint32_t seq;
if (rd_no_arg(rd)) {
stat_help(rd);
return -EINVAL;
}
ret = rd_build_filter(rd, stat_valid_filters);
if (ret)
return ret;
cntn = stat_get_arg(rd, "cntn");
if (cntn < 0)
return cntn;
if (rd_argc(rd)) {
lqpn = stat_get_arg(rd, "lqpn");
if (lqpn < 0)
return lqpn;
return do_stat_qp_unbind_lqpn(rd, cntn, lqpn);
}

View File

@ -47,6 +47,13 @@ bool rd_no_arg(struct rd *rd)
return rd_argc(rd) == 0;
}
bool rd_is_multiarg(struct rd *rd)
{
if (!rd_argc(rd))
return false;
return strpbrk(rd_argv(rd), ",-") != NULL;
}
/*
* Possible input:output
* dev/port | first port | is_dump_all

View File

@ -427,7 +427,7 @@ static int print_gate_list(struct rtattr *list)
__u32 index = 0, interval = 0;
__u8 gate_state = 0;
__s32 ipv = -1, maxoctets = -1;
char buf[22];
SPRINT_BUF(buf);
parse_rtattr_nested(tb, TCA_GATE_ENTRY_MAX, item);
@ -490,7 +490,7 @@ static int print_gate(struct action_util *au, FILE *f, struct rtattr *arg)
__s64 base_time = 0;
__s64 cycle_time = 0;
__s64 cycle_time_ext = 0;
char buf[22];
SPRINT_BUF(buf);
int prio = -1;
if (arg == NULL)

1
vdpa/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
vdpa