Merge branch 'mptcp' into next

Paolo Abeni  says:

====================

This introduces support for the MPTCP PM netlink interface, allowing admins
to configure several aspects of the MPTCP path manager. The subcommand is
documented with a newly added man-page.

This series also includes support for MPTCP subflow diag.

====================

Signed-off-by: David Ahern <dsahern@gmail.com>
This commit is contained in:
David Ahern 2020-04-29 16:50:25 +00:00
commit d38f2a10dd
7 changed files with 649 additions and 2 deletions

View File

@ -11,7 +11,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \
iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o ipila.o \
ipvrf.o iplink_xstats.o ipseg6.o iplink_netdevsim.o iplink_rmnet.o \
ipnexthop.o
ipnexthop.o ipmptcp.o
RTMONOBJ=rtmon.o

View File

@ -51,7 +51,7 @@ static void usage(void)
"where OBJECT := { link | address | addrlabel | route | rule | neigh | ntable |\n"
" tunnel | tuntap | maddress | mroute | mrule | monitor | xfrm |\n"
" netns | l2tp | fou | macsec | tcp_metrics | token | netconf | ila |\n"
" vrf | sr | nexthop }\n"
" vrf | sr | nexthop | mptcp }\n"
" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n"
" -h[uman-readable] | -iec | -j[son] | -p[retty] |\n"
" -f[amily] { inet | inet6 | mpls | bridge | link } |\n"
@ -103,6 +103,7 @@ static const struct cmd {
{ "vrf", do_ipvrf},
{ "sr", do_seg6 },
{ "nexthop", do_ipnh },
{ "mptcp", do_mptcp },
{ "help", do_help },
{ 0 }
};

View File

@ -83,6 +83,7 @@ void vrf_reset(void);
int netns_identify_pid(const char *pidstr, char *name, int len);
int do_seg6(int argc, char **argv);
int do_ipnh(int argc, char **argv);
int do_mptcp(int argc, char **argv);
int iplink_get(char *name, __u32 filt_mask);
int iplink_ifla_xstats(int argc, char **argv);

436
ip/ipmptcp.c Normal file
View File

@ -0,0 +1,436 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <string.h>
#include <rt_names.h>
#include <errno.h>
#include <linux/genetlink.h>
#include <linux/mptcp.h>
#include "utils.h"
#include "ip_common.h"
#include "libgenl.h"
#include "json_print.h"
static void usage(void)
{
fprintf(stderr,
"Usage: ip mptcp endpoint add ADDRESS [ dev NAME ] [ id ID ]\n"
" [ FLAG-LIST ]\n"
" ip mptcp endpoint delete id ID\n"
" ip mptcp endpoint show [ id ID ]\n"
" ip mptcp endpoint flush\n"
" ip mptcp limits set [ subflows NR ] [ add_addr_accepted NR ]\n"
" ip mptcp limits show\n"
"FLAG-LIST := [ FLAG-LIST ] FLAG\n"
"FLAG := [ signal | subflow | backup ]\n");
exit(-1);
}
/* netlink socket */
static struct rtnl_handle genl_rth = { .fd = -1 };
static int genl_family = -1;
#define MPTCP_BUFLEN 4096
#define MPTCP_REQUEST(_req, _cmd, _flags) \
GENL_REQUEST(_req, MPTCP_BUFLEN, genl_family, 0, \
MPTCP_PM_VER, _cmd, _flags)
/* Mapping from argument to address flag mask */
static const struct {
const char *name;
unsigned long value;
} mptcp_addr_flag_names[] = {
{ "signal", MPTCP_PM_ADDR_FLAG_SIGNAL },
{ "subflow", MPTCP_PM_ADDR_FLAG_SUBFLOW },
{ "backup", MPTCP_PM_ADDR_FLAG_BACKUP },
};
static void print_mptcp_addr_flags(unsigned int flags)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(mptcp_addr_flag_names); i++) {
unsigned long mask = mptcp_addr_flag_names[i].value;
if (flags & mask) {
print_string(PRINT_FP, NULL, "%s ",
mptcp_addr_flag_names[i].name);
print_bool(PRINT_JSON,
mptcp_addr_flag_names[i].name, NULL, true);
}
flags &= ~mask;
}
if (flags) {
/* unknown flags */
SPRINT_BUF(b1);
snprintf(b1, sizeof(b1), "%02x", flags);
print_string(PRINT_ANY, "rawflags", "rawflags %s ", b1);
}
}
static int get_flags(const char *arg, __u32 *flags)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(mptcp_addr_flag_names); i++) {
if (strcmp(arg, mptcp_addr_flag_names[i].name))
continue;
*flags |= mptcp_addr_flag_names[i].value;
return 0;
}
return -1;
}
static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n,
bool adding)
{
struct rtattr *attr_addr;
bool addr_set = false;
inet_prefix address;
bool id_set = false;
__u32 index = 0;
__u32 flags = 0;
__u8 id = 0;
ll_init_map(&rth);
while (argc > 0) {
if (get_flags(*argv, &flags) == 0) {
} else if (matches(*argv, "id") == 0) {
NEXT_ARG();
if (get_u8(&id, *argv, 0))
invarg("invalid ID\n", *argv);
id_set = true;
} else if (matches(*argv, "dev") == 0) {
const char *ifname;
NEXT_ARG();
ifname = *argv;
if (check_ifname(ifname))
invarg("invalid interface name\n", ifname);
index = ll_name_to_index(ifname);
if (!index)
invarg("device does not exist\n", ifname);
} else if (get_addr(&address, *argv, AF_UNSPEC) == 0) {
addr_set = true;
} else {
invarg("unknown argument", *argv);
}
NEXT_ARG_FWD();
}
if (!addr_set && adding)
missarg("ADDRESS");
if (!id_set && !adding)
missarg("ID");
attr_addr = addattr_nest(n, MPTCP_BUFLEN,
MPTCP_PM_ATTR_ADDR | NLA_F_NESTED);
if (id_set)
addattr8(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_ID, id);
if (flags)
addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_FLAGS, flags);
if (index)
addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_IF_IDX, index);
if (addr_set) {
int type;
addattr16(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_FAMILY,
address.family);
type = address.family == AF_INET ? MPTCP_PM_ADDR_ATTR_ADDR4 :
MPTCP_PM_ADDR_ATTR_ADDR6;
addattr_l(n, MPTCP_BUFLEN, type, &address.data,
address.bytelen);
}
addattr_nest_end(n, attr_addr);
return 0;
}
static int mptcp_addr_modify(int argc, char **argv, int cmd)
{
MPTCP_REQUEST(req, cmd, NLM_F_REQUEST);
int ret;
ret = mptcp_parse_opt(argc, argv, &req.n, cmd == MPTCP_PM_CMD_ADD_ADDR);
if (ret)
return ret;
if (rtnl_talk(&genl_rth, &req.n, NULL) < 0)
return -2;
return 0;
}
static int print_mptcp_addrinfo(struct rtattr *addrinfo)
{
struct rtattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
__u8 family = AF_UNSPEC, addr_attr_type;
const char *ifname;
unsigned int flags;
int index;
__u16 id;
parse_rtattr_nested(tb, MPTCP_PM_ADDR_ATTR_MAX, addrinfo);
open_json_object(NULL);
if (tb[MPTCP_PM_ADDR_ATTR_FAMILY])
family = rta_getattr_u8(tb[MPTCP_PM_ADDR_ATTR_FAMILY]);
addr_attr_type = family == AF_INET ? MPTCP_PM_ADDR_ATTR_ADDR4 :
MPTCP_PM_ADDR_ATTR_ADDR6;
if (tb[addr_attr_type]) {
print_string(PRINT_ANY, "address", "%s ",
format_host_rta(family, tb[addr_attr_type]));
}
if (tb[MPTCP_PM_ADDR_ATTR_ID]) {
id = rta_getattr_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
print_uint(PRINT_ANY, "id", "id %u ", id);
}
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) {
flags = rta_getattr_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
print_mptcp_addr_flags(flags);
}
if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) {
index = rta_getattr_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
ifname = index ? ll_index_to_name(index) : NULL;
if (ifname)
print_string(PRINT_ANY, "dev", "dev %s ", ifname);
}
close_json_object();
print_string(PRINT_FP, NULL, "\n", NULL);
fflush(stdout);
return 0;
}
static int print_mptcp_addr(struct nlmsghdr *n, void *arg)
{
struct rtattr *tb[MPTCP_PM_ATTR_MAX + 1];
struct genlmsghdr *ghdr;
struct rtattr *addrinfo;
int len = n->nlmsg_len;
if (n->nlmsg_type != genl_family)
return 0;
len -= NLMSG_LENGTH(GENL_HDRLEN);
if (len < 0)
return -1;
ghdr = NLMSG_DATA(n);
parse_rtattr_flags(tb, MPTCP_PM_ATTR_MAX, (void *) ghdr + GENL_HDRLEN,
len, NLA_F_NESTED);
addrinfo = tb[MPTCP_PM_ATTR_ADDR];
if (!addrinfo)
return -1;
ll_init_map(&rth);
return print_mptcp_addrinfo(addrinfo);
}
static int mptcp_addr_dump(void)
{
MPTCP_REQUEST(req, MPTCP_PM_CMD_GET_ADDR, NLM_F_REQUEST | NLM_F_DUMP);
if (rtnl_send(&genl_rth, &req.n, req.n.nlmsg_len) < 0) {
perror("Cannot send show request");
exit(1);
}
new_json_obj(json);
if (rtnl_dump_filter(&genl_rth, print_mptcp_addr, stdout) < 0) {
fprintf(stderr, "Dump terminated\n");
delete_json_obj();
fflush(stdout);
return -2;
}
close_json_object();
fflush(stdout);
return 0;
}
static int mptcp_addr_show(int argc, char **argv)
{
MPTCP_REQUEST(req, MPTCP_PM_CMD_GET_ADDR, NLM_F_REQUEST);
struct nlmsghdr *answer;
int ret;
if (!argv)
return mptcp_addr_dump();
ret = mptcp_parse_opt(argc, argv, &req.n, false);
if (ret)
return ret;
if (rtnl_talk(&genl_rth, &req.n, &answer) < 0)
return -2;
return print_mptcp_addr(answer, stdout);
}
static int mptcp_addr_flush(int argc, char **argv)
{
MPTCP_REQUEST(req, MPTCP_PM_CMD_FLUSH_ADDRS, NLM_F_REQUEST);
if (rtnl_talk(&genl_rth, &req.n, NULL) < 0)
return -2;
return 0;
}
static int mptcp_parse_limit(int argc, char **argv, struct nlmsghdr *n)
{
bool set_rcv_add_addrs = false;
bool set_subflows = false;
__u32 rcv_add_addrs = 0;
__u32 subflows = 0;
while (argc > 0) {
if (matches(*argv, "subflows") == 0) {
NEXT_ARG();
if (get_u32(&subflows, *argv, 0))
invarg("invalid subflows\n", *argv);
set_subflows = true;
} else if (matches(*argv, "add_addr_accepted") == 0) {
NEXT_ARG();
if (get_u32(&rcv_add_addrs, *argv, 0))
invarg("invalid add_addr_accepted\n", *argv);
set_rcv_add_addrs = true;
} else {
invarg("unknown limit", *argv);
}
NEXT_ARG_FWD();
}
if (set_rcv_add_addrs)
addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ATTR_RCV_ADD_ADDRS,
rcv_add_addrs);
if (set_subflows)
addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ATTR_SUBFLOWS, subflows);
return set_rcv_add_addrs || set_subflows;
}
static int print_mptcp_limit(struct nlmsghdr *n, void *arg)
{
struct rtattr *tb[MPTCP_PM_ATTR_MAX + 1];
struct genlmsghdr *ghdr;
int len = n->nlmsg_len;
__u32 val;
if (n->nlmsg_type != genl_family)
return 0;
len -= NLMSG_LENGTH(GENL_HDRLEN);
if (len < 0)
return -1;
ghdr = NLMSG_DATA(n);
parse_rtattr(tb, MPTCP_PM_ATTR_MAX, (void *) ghdr + GENL_HDRLEN, len);
open_json_object(NULL);
if (tb[MPTCP_PM_ATTR_RCV_ADD_ADDRS]) {
val = rta_getattr_u32(tb[MPTCP_PM_ATTR_RCV_ADD_ADDRS]);
print_uint(PRINT_ANY, "add_addr_accepted",
"add_addr_accepted %d ", val);
}
if (tb[MPTCP_PM_ATTR_SUBFLOWS]) {
val = rta_getattr_u32(tb[MPTCP_PM_ATTR_SUBFLOWS]);
print_uint(PRINT_ANY, "subflows", "subflows %d ", val);
}
print_string(PRINT_FP, NULL, "%s", "\n");
fflush(stdout);
close_json_object();
return 0;
}
static int mptcp_limit_get_set(int argc, char **argv, int cmd)
{
bool do_get = cmd == MPTCP_PM_CMD_GET_LIMITS;
MPTCP_REQUEST(req, cmd, NLM_F_REQUEST);
struct nlmsghdr *answer;
int ret;
ret = mptcp_parse_limit(argc, argv, &req.n);
if (ret < 0)
return -1;
if (rtnl_talk(&genl_rth, &req.n, do_get ? &answer : NULL) < 0)
return -2;
if (do_get)
return print_mptcp_limit(answer, stdout);
return 0;
}
int do_mptcp(int argc, char **argv)
{
if (argc == 0)
usage();
if (matches(*argv, "help") == 0)
usage();
if (genl_init_handle(&genl_rth, MPTCP_PM_NAME, &genl_family))
exit(1);
if (matches(*argv, "endpoint") == 0) {
NEXT_ARG_FWD();
if (argc == 0)
return mptcp_addr_show(0, NULL);
if (matches(*argv, "add") == 0)
return mptcp_addr_modify(argc-1, argv+1,
MPTCP_PM_CMD_ADD_ADDR);
if (matches(*argv, "delete") == 0)
return mptcp_addr_modify(argc-1, argv+1,
MPTCP_PM_CMD_DEL_ADDR);
if (matches(*argv, "show") == 0)
return mptcp_addr_show(argc-1, argv+1);
if (matches(*argv, "flush") == 0)
return mptcp_addr_flush(argc-1, argv+1);
goto unknown;
}
if (matches(*argv, "limits") == 0) {
NEXT_ARG_FWD();
if (argc == 0)
return mptcp_limit_get_set(0, NULL,
MPTCP_PM_CMD_GET_LIMITS);
if (matches(*argv, "set") == 0)
return mptcp_limit_get_set(argc-1, argv+1,
MPTCP_PM_CMD_SET_LIMITS);
if (matches(*argv, "show") == 0)
return mptcp_limit_get_set(argc-1, argv+1,
MPTCP_PM_CMD_GET_LIMITS);
}
unknown:
fprintf(stderr, "Command \"%s\" is unknown, try \"ip mptcp help\".\n",
*argv);
exit(-1);
}

142
man/man8/ip-mptcp.8 Normal file
View File

@ -0,0 +1,142 @@
.TH IP\-MPTCP 8 "4 Apr 2020" "iproute2" "Linux"
.SH "NAME"
ip-mptcp \- MPTCP path manager configuration
.SH "SYNOPSIS"
.sp
.ad l
.in +8
.ti -8
.B ip
.RI "[ " OPTIONS " ]"
.B mptcp
.RB "{ "
.B endpoint
.RB " | "
.B limits
.RB " | "
.B help
.RB " }"
.sp
.ti -8
.BR "ip mptcp endpoint add "
.IR IFADDR
.RB "[ " dev
.IR IFNAME " ]"
.RB "[ " id
.I ID
.RB "] [ "
.I FLAG-LIST
.RB "] "
.ti -8
.BR "ip mptcp endpoint del id "
.I ID
.ti -8
.BR "ip mptcp endpoint show "
.RB "[ " id
.I ID
.RB "]"
.ti -8
.BR "ip mptcp endpoint flush"
.ti -8
.IR FLAG-LIST " := [ " FLAG-LIST " ] " FLAG
.ti -8
.IR FLAG " := ["
.B signal
.RB "|"
.B subflow
.RB "|"
.B backup
.RB "]"
.ti -8
.BR "ip mptcp limits set "
.RB "[ "
.B subflow
.IR SUBFLOW_NR " ]"
.RB "[ "
.B add_addr_accepted
.IR ADD_ADDR_ACCEPTED_NR " ]"
.ti -8
.BR "ip mptcp limits show"
.SH DESCRIPTION
MPTCP is a transport protocol built on top of TCP that allows TCP
connections to use multiple paths to maximize resource usage and increase
redundancy. The ip-mptcp sub-commands allow configuring several aspects of the
MPTCP path manager, which is in charge of subflows creation:
.P
The
.B endpoint
object specifies the IP addresses that will be used and/or announced for
additional subflows:
.TS
l l.
ip mptcp endpoint add add new MPTCP endpoint
ip mptcp endpoint delete delete existing MPTCP endpoint
ip mptcp endpoint show get existing MPTCP endpoint
ip mptcp endpoint flush flush all existing MPTCP endpoints
.TE
.TP
.IR ID
is a unique numeric identifier for the given endpoint
.TP
.BR signal
the endpoint will be announced/signalled to each peer via an ADD_ADDR MPTCP
sub-option
.TP
.BR subflow
if additional subflow creation is allowed by MPTCP limits, the endpoint will
be used as the source address to create an additional subflow after that
the MPTCP connection is established.
.TP
.BR backup
the endpoint will be announced as a backup address, if this is a
.BR signal
endpoint, or the subflow will be created as a backup one if this is a
.BR subflow
endpoint
.sp
.PP
The
.B limits
object specifies the constraints for subflow creations:
.TS
l l.
ip mptcp limits show get current MPTCP subflow creation limits
ip mptcp limits set change the MPTCP subflow creation limits
.TE
.TP
.IR SUBFLOW_NR
specifies the maximum number of additional subflows allowed for each MPTCP
connection. Additional subflows can be created due to: incoming accepted
ADD_ADDR option, local
.BR subflow
endpoints, additional subflows started by the peer.
.TP
.IR ADD_ADDR_ACCEPTED_NR
specifies the maximum number of ADD_ADDR suboptions accepted for each MPTCP
connection. The MPTCP path manager will try to create a new subflow for
each accepted ADD_ADDR option, respecting the
.IR SUBFLOW_NR
limit.
.SH AUTHOR
Original Manpage by Paolo Abeni <pabeni@redhat.com>

View File

@ -261,6 +261,11 @@ the pacing rate and max pacing rate
.TP
.B rcv_space:<rcv_space>
a helper variable for TCP internal auto tuning socket receive buffer
.P
.TP
.B tcp-ulp-mptcp flags:[MmBbJjecv] token:<rem_token(rem_id)/loc_token(loc_id)> seq:<sn> sfseq:<ssn> ssnoff:<off> maplen:<maplen>
MPTCP subflow information
.P
.RE
.TP
.B \-\-tos

View File

@ -53,6 +53,7 @@
#include <linux/tipc_netlink.h>
#include <linux/tipc_sockets_diag.h>
#include <linux/tls.h>
#include <linux/mptcp.h>
/* AF_VSOCK/PF_VSOCK is only provided since glibc 2.18 */
#ifndef PF_VSOCK
@ -2836,6 +2837,59 @@ static void tcp_tls_conf(const char *name, struct rtattr *attr)
}
}
static void mptcp_subflow_info(struct rtattr *tb[])
{
u_int32_t flags = 0;
if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) {
char caps[32 + 1] = { 0 }, *cap = &caps[0];
flags = rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_FLAGS]);
if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM)
*cap++ = 'M';
if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC)
*cap++ = 'm';
if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM)
*cap++ = 'J';
if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC)
*cap++ = 'j';
if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM)
*cap++ = 'B';
if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC)
*cap++ = 'b';
if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED)
*cap++ = 'e';
if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED)
*cap++ = 'c';
if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID)
*cap++ = 'v';
if (flags)
out(" flags:%s", caps);
}
if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] &&
tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] &&
tb[MPTCP_SUBFLOW_ATTR_ID_REM] &&
tb[MPTCP_SUBFLOW_ATTR_ID_LOC])
out(" token:%04x(id:%hhu)/%04x(id:%hhu)",
rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]),
rta_getattr_u8(tb[MPTCP_SUBFLOW_ATTR_ID_REM]),
rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]),
rta_getattr_u8(tb[MPTCP_SUBFLOW_ATTR_ID_LOC]));
if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])
out(" seq:%llx",
rta_getattr_u64(tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]));
if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])
out(" sfseq:%x",
rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]));
if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])
out(" ssnoff:%x",
rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]));
if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])
out(" maplen:%x",
rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]));
}
#define TCPI_HAS_OPT(info, opt) !!(info->tcpi_options & (opt))
static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
@ -3012,6 +3066,14 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
tcp_tls_conf("rxconf", tlsinfo[TLS_INFO_RXCONF]);
tcp_tls_conf("txconf", tlsinfo[TLS_INFO_TXCONF]);
}
if (ulpinfo[INET_ULP_INFO_MPTCP]) {
struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] =
{ 0 };
parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX,
ulpinfo[INET_ULP_INFO_MPTCP]);
mptcp_subflow_info(sfinfo);
}
}
}