diff --git a/include/libnetlink.h b/include/libnetlink.h index 503b3ec1..1ddba8dc 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -45,7 +45,8 @@ int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions) int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions, int protocol) __attribute__((warn_unused_result)); - +int rtnl_add_nl_group(struct rtnl_handle *rth, unsigned int group) + __attribute__((warn_unused_result)); void rtnl_close(struct rtnl_handle *rth); void rtnl_set_strict_dump(struct rtnl_handle *rth); @@ -92,6 +93,10 @@ int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int rtnl_dump_request_n(struct rtnl_handle *rth, struct nlmsghdr *n) __attribute__((warn_unused_result)); +int rtnl_nexthopdump_req(struct rtnl_handle *rth, int family, + req_filter_fn_t filter_fn) + __attribute__((warn_unused_result)); + struct rtnl_ctrl_data { int nsid; }; diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h new file mode 100644 index 00000000..b56c5b89 --- /dev/null +++ b/include/uapi/linux/nexthop.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_NEXTHOP_H +#define _LINUX_NEXTHOP_H + +#include + +struct nhmsg { + unsigned char nh_family; + unsigned char nh_scope; /* return only */ + unsigned char nh_protocol; /* Routing protocol that installed nh */ + unsigned char resvd; + unsigned int nh_flags; /* RTNH_F flags */ +}; + +/* entry in a nexthop group */ +struct nexthop_grp { + __u32 id; /* nexthop id - must exist */ + __u8 weight; /* weight of this nexthop */ + __u8 resvd1; + __u16 resvd2; +}; + +enum { + NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */ + __NEXTHOP_GRP_TYPE_MAX, +}; + +#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) + +enum { + NHA_UNSPEC, + NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */ + + NHA_GROUP, /* array of nexthop_grp */ + NHA_GROUP_TYPE, /* u16 one of NEXTHOP_GRP_TYPE */ + /* if NHA_GROUP attribute is added, no other attributes can be set */ + + NHA_BLACKHOLE, /* flag; nexthop used to blackhole packets */ + /* if NHA_BLACKHOLE is added, OIF, GATEWAY, ENCAP can not be set */ + + NHA_OIF, /* u32; nexthop device */ + NHA_GATEWAY, /* be32 (IPv4) or in6_addr (IPv6) gw address */ + NHA_ENCAP_TYPE, /* u16; lwt encap type */ + NHA_ENCAP, /* lwt encap data */ + + /* NHA_OIF can be appended to dump request to return only + * nexthops using given device + */ + NHA_GROUPS, /* flag; only return nexthop groups in dump */ + NHA_MASTER, /* u32; only return nexthops with given master dev */ + + __NHA_MAX, +}; + +#define NHA_MAX (__NHA_MAX - 1) +#endif diff --git a/ip/Makefile b/ip/Makefile index 7ce6e91a..5ab78d7d 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -10,7 +10,8 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \ link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \ iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \ iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o ipila.o \ - ipvrf.o iplink_xstats.o ipseg6.o iplink_netdevsim.o iplink_rmnet.o + ipvrf.o iplink_xstats.o ipseg6.o iplink_netdevsim.o iplink_rmnet.o \ + ipnexthop.o RTMONOBJ=rtmon.o diff --git a/ip/ip.c b/ip/ip.c index b71ae816..b46fd8dd 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -50,7 +50,7 @@ static void usage(void) "where OBJECT := { link | address | addrlabel | route | rule | neigh | ntable |\n" " tunnel | tuntap | maddress | mroute | mrule | monitor | xfrm |\n" " netns | l2tp | fou | macsec | tcp_metrics | token | netconf | ila |\n" - " vrf | sr }\n" + " vrf | sr | nexthop }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n" " -h[uman-readable] | -iec | -j[son] | -p[retty] |\n" " -f[amily] { inet | inet6 | mpls | bridge | link } |\n" @@ -100,6 +100,7 @@ static const struct cmd { { "netconf", do_ipnetconf }, { "vrf", do_ipvrf}, { "sr", do_seg6 }, + { "nexthop", do_ipnh }, { "help", do_help }, { 0 } }; diff --git a/ip/ip_common.h b/ip/ip_common.h index b4aa34a7..cd916ec8 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -47,6 +47,7 @@ int print_prefix(struct nlmsghdr *n, void *arg); int print_rule(struct nlmsghdr *n, void *arg); int print_netconf(struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n, void *arg); +int print_nexthop(struct nlmsghdr *n, void *arg); void netns_map_init(void); void netns_nsid_socket_init(void); int print_nsid(struct nlmsghdr *n, void *arg); @@ -80,6 +81,7 @@ int do_ipvrf(int argc, char **argv); void vrf_reset(void); int netns_identify_pid(const char *pidstr, char *name, int len); int do_seg6(int argc, char **argv); +int do_ipnh(int argc, char **argv); int iplink_get(char *name, __u32 filt_mask); int iplink_ifla_xstats(int argc, char **argv); @@ -134,7 +136,8 @@ int bond_parse_xstats(struct link_util *lu, int argc, char **argv); int bond_print_xstats(struct nlmsghdr *n, void *arg); /* iproute_lwtunnel.c */ -int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp); +int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp, + int encap_attr, int encap_type_attr); void lwt_print_encap(FILE *fp, struct rtattr *encap_type, struct rtattr *encap); /* iplink_xdp.c */ @@ -155,5 +158,8 @@ int name_is_vrf(const char *name); #endif void print_num(FILE *fp, unsigned int width, uint64_t count); - +void print_rt_flags(FILE *fp, unsigned int flags); +void print_rta_if(FILE *fp, const struct rtattr *rta, const char *prefix); +void print_rta_gateway(FILE *fp, unsigned char family, + const struct rtattr *rta); #endif /* _IP_COMMON_H_ */ diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c index 9ecc7fd2..685be52c 100644 --- a/ip/ipmonitor.c +++ b/ip/ipmonitor.c @@ -84,6 +84,12 @@ static int accept_msg(struct rtnl_ctrl_data *ctrl, } } + case RTM_NEWNEXTHOP: + case RTM_DELNEXTHOP: + print_headers(fp, "[NEXTHOP]", ctrl); + print_nexthop(n, arg); + return 0; + case RTM_NEWLINK: case RTM_DELLINK: ll_remember_index(n, NULL); @@ -161,6 +167,7 @@ static int accept_msg(struct rtnl_ctrl_data *ctrl, int do_ipmonitor(int argc, char **argv) { + int lnexthop = 0, nh_set = 1; char *file = NULL; unsigned int groups = 0; int llink = 0; @@ -202,30 +209,42 @@ int do_ipmonitor(int argc, char **argv) } else if (matches(*argv, "link") == 0) { llink = 1; groups = 0; + nh_set = 0; } else if (matches(*argv, "address") == 0) { laddr = 1; groups = 0; + nh_set = 0; } else if (matches(*argv, "route") == 0) { lroute = 1; groups = 0; + nh_set = 0; } else if (matches(*argv, "mroute") == 0) { lmroute = 1; groups = 0; + nh_set = 0; } else if (matches(*argv, "prefix") == 0) { lprefix = 1; groups = 0; + nh_set = 0; } else if (matches(*argv, "neigh") == 0) { lneigh = 1; groups = 0; + nh_set = 0; } else if (matches(*argv, "netconf") == 0) { lnetconf = 1; groups = 0; + nh_set = 0; } else if (matches(*argv, "rule") == 0) { lrule = 1; groups = 0; + nh_set = 0; } else if (matches(*argv, "nsid") == 0) { lnsid = 1; groups = 0; + nh_set = 0; + } else if (matches(*argv, "nexthop") == 0) { + lnexthop = 1; + groups = 0; } else if (strcmp(*argv, "all") == 0) { prefix_banner = 1; } else if (matches(*argv, "all-nsid") == 0) { @@ -297,6 +316,9 @@ int do_ipmonitor(int argc, char **argv) if (lnsid) { groups |= nl_mgrp(RTNLGRP_NSID); } + if (nh_set) + lnexthop = 1; + if (file) { FILE *fp; int err; @@ -313,6 +335,12 @@ int do_ipmonitor(int argc, char **argv) if (rtnl_open(&rth, groups) < 0) exit(1); + + if (lnexthop && rtnl_add_nl_group(&rth, RTNLGRP_NEXTHOP) < 0) { + fprintf(stderr, "Failed to add nexthop group to list\n"); + exit(1); + } + if (listen_all_nsid && rtnl_listen_all_nsid(&rth) < 0) exit(1); diff --git a/ip/ipnexthop.c b/ip/ipnexthop.c new file mode 100644 index 00000000..97f09e74 --- /dev/null +++ b/ip/ipnexthop.c @@ -0,0 +1,558 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ip nexthop + * + * Copyright (c) 2017-19 David Ahern + */ + +#include +#include +#include +#include +#include + +#include "utils.h" +#include "ip_common.h" + +static struct { + unsigned int flushed; + unsigned int groups; + unsigned int ifindex; + unsigned int master; +} filter; + +enum { + IPNH_LIST, + IPNH_FLUSH, +}; + +#define RTM_NHA(h) ((struct rtattr *)(((char *)(h)) + \ + NLMSG_ALIGN(sizeof(struct nhmsg)))) + +static void usage(void) __attribute__((noreturn)); + +static void usage(void) +{ + fprintf(stderr, + "Usage: ip nexthop { list | flush } SELECTOR\n" + " ip nexthop { add | replace } id ID NH [ protocol ID ]\n" + " ip nexthop { get| del } id ID\n" + "SELECTOR := [ id ID ] [ dev DEV ] [ vrf NAME ] [ master DEV ]\n" + " [ groups ]\n" + "NH := { blackhole | [ via ADDRESS ] [ dev DEV ] [ onlink ]\n" + " [ encap ENCAPTYPE ENCAPHDR ] | group GROUP ] }\n" + "GROUP := [ id[,weight]>//... ]\n" + "ENCAPTYPE := [ mpls ]\n" + "ENCAPHDR := [ MPLSLABEL ]\n"); + exit(-1); +} + +static int nh_dump_filter(struct nlmsghdr *nlh, int reqlen) +{ + int err; + + if (filter.ifindex) { + err = addattr32(nlh, reqlen, NHA_OIF, filter.ifindex); + if (err) + return err; + } + + if (filter.groups) { + addattr_l(nlh, reqlen, NHA_GROUPS, NULL, 0); + if (err) + return err; + } + + if (filter.master) { + addattr32(nlh, reqlen, NHA_MASTER, filter.master); + if (err) + return err; + } + + return 0; +} + +static struct rtnl_handle rth_del = { .fd = -1 }; + +static int delete_nexthop(__u32 id) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[64]; + } req = { + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)), + .n.nlmsg_flags = NLM_F_REQUEST, + .n.nlmsg_type = RTM_DELNEXTHOP, + .nhm.nh_family = AF_UNSPEC, + }; + + req.n.nlmsg_seq = ++rth_del.seq; + + addattr32(&req.n, sizeof(req), NHA_ID, id); + + if (rtnl_talk(&rth_del, &req.n, NULL) < 0) + return -1; + return 0; +} + +static int flush_nexthop(struct nlmsghdr *nlh, void *arg) +{ + struct nhmsg *nhm = NLMSG_DATA(nlh); + struct rtattr *tb[NHA_MAX+1]; + __u32 id = 0; + int len; + + len = nlh->nlmsg_len - NLMSG_SPACE(sizeof(*nhm)); + if (len < 0) { + fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); + return -1; + } + + parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len); + if (tb[NHA_ID]) + id = rta_getattr_u32(tb[NHA_ID]); + + if (id && !delete_nexthop(id)) + filter.flushed++; + + return 0; +} + +static int ipnh_flush(unsigned int all) +{ + int rc = -2; + + if (all) { + filter.groups = 1; + filter.ifindex = 0; + filter.master = 0; + } + + if (rtnl_open(&rth_del, 0) < 0) { + fprintf(stderr, "Cannot open rtnetlink\n"); + return EXIT_FAILURE; + } +again: + if (rtnl_nexthopdump_req(&rth, preferred_family, nh_dump_filter) < 0) { + perror("Cannot send dump request"); + goto out; + } + + if (rtnl_dump_filter(&rth, flush_nexthop, stdout) < 0) { + fprintf(stderr, "Dump terminated. Failed to flush nexthops\n"); + goto out; + } + + /* if deleting all, then remove groups first */ + if (all && filter.groups) { + filter.groups = 0; + goto again; + } + + rc = 0; +out: + rtnl_close(&rth_del); + if (!filter.flushed) + printf("Nothing to flush\n"); + else + printf("Flushed %d nexthops\n", filter.flushed); + + return rc; +} + +static void print_nh_group(FILE *fp, const struct rtattr *grps_attr) +{ + struct nexthop_grp *nhg = RTA_DATA(grps_attr); + int num = RTA_PAYLOAD(grps_attr) / sizeof(*nhg); + int i; + + if (!num || num * sizeof(*nhg) != RTA_PAYLOAD(grps_attr)) { + fprintf(fp, ""); + return; + } + + open_json_array(PRINT_JSON, "group"); + print_string(PRINT_FP, NULL, "%s", "group "); + for (i = 0; i < num; ++i) { + open_json_object(NULL); + + if (i) + print_string(PRINT_FP, NULL, "%s", "/"); + + print_uint(PRINT_ANY, "id", "%u", nhg[i].id); + if (nhg[i].weight) + print_uint(PRINT_ANY, "weight", ",%u", nhg[i].weight + 1); + + close_json_object(); + } + close_json_array(PRINT_JSON, NULL); +} + +int print_nexthop(struct nlmsghdr *n, void *arg) +{ + struct nhmsg *nhm = NLMSG_DATA(n); + struct rtattr *tb[NHA_MAX+1]; + FILE *fp = (FILE *)arg; + int len; + + SPRINT_BUF(b1); + + if (n->nlmsg_type != RTM_DELNEXTHOP && + n->nlmsg_type != RTM_NEWNEXTHOP) { + fprintf(stderr, "Not a nexthop: %08x %08x %08x\n", + n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); + return -1; + } + + len = n->nlmsg_len - NLMSG_SPACE(sizeof(*nhm)); + if (len < 0) { + close_json_object(); + fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); + return -1; + } + + parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len); + + open_json_object(NULL); + + if (n->nlmsg_type == RTM_DELROUTE) + print_bool(PRINT_ANY, "deleted", "Deleted ", true); + + if (tb[NHA_ID]) + print_uint(PRINT_ANY, "id", "id %u ", + rta_getattr_u32(tb[NHA_ID])); + + if (tb[NHA_GROUP]) + print_nh_group(fp, tb[NHA_GROUP]); + + if (tb[NHA_ENCAP]) + lwt_print_encap(fp, tb[NHA_ENCAP_TYPE], tb[NHA_ENCAP]); + + if (tb[NHA_GATEWAY]) + print_rta_gateway(fp, nhm->nh_family, tb[NHA_GATEWAY]); + + if (tb[NHA_OIF]) + print_rta_if(fp, tb[NHA_OIF], "dev"); + + if (nhm->nh_scope != RT_SCOPE_UNIVERSE || show_details > 0) { + print_string(PRINT_ANY, "scope", "scope %s ", + rtnl_rtscope_n2a(nhm->nh_scope, b1, sizeof(b1))); + } + + if (tb[NHA_BLACKHOLE]) + print_null(PRINT_ANY, "blackhole", "blackhole", NULL); + + if (nhm->nh_protocol != RTPROT_UNSPEC || show_details > 0) { + print_string(PRINT_ANY, "protocol", "proto %s ", + rtnl_rtprot_n2a(nhm->nh_protocol, b1, sizeof(b1))); + } + + if (tb[NHA_OIF]) + print_rt_flags(fp, nhm->nh_flags); + + print_string(PRINT_FP, NULL, "%s", "\n"); + close_json_object(); + fflush(fp); + + return 0; +} + +static int add_nh_group_attr(struct nlmsghdr *n, int maxlen, char *argv) +{ + struct nexthop_grp *grps; + int count = 0, i; + char *sep, *wsep; + + if (*argv != '\0') + count = 1; + + /* separator is '/' */ + sep = strchr(argv, '/'); + while (sep) { + count++; + sep = strchr(sep + 1, '/'); + } + + if (count == 0) + return -1; + + grps = calloc(count, sizeof(*grps)); + if (!grps) + return -1; + + for (i = 0; i < count; ++i) { + sep = strchr(argv, '/'); + if (sep) + *sep = '\0'; + + wsep = strchr(argv, ','); + if (wsep) + *wsep = '\0'; + + if (get_unsigned(&grps[i].id, argv, 0)) + return -1; + if (wsep) { + unsigned int w; + + wsep++; + if (get_unsigned(&w, wsep, 0) || w == 0 || w > 256) + invarg("\"weight\" is invalid\n", wsep); + grps[i].weight = w - 1; + } + + if (!sep) + break; + + argv = sep + 1; + } + + return addattr_l(n, maxlen, NHA_GROUP, grps, count * sizeof(*grps)); +} + +static int ipnh_modify(int cmd, unsigned int flags, int argc, char **argv) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[1024]; + } req = { + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)), + .n.nlmsg_flags = NLM_F_REQUEST | flags, + .n.nlmsg_type = cmd, + .nhm.nh_family = preferred_family, + }; + __u32 nh_flags = 0; + + while (argc > 0) { + if (!strcmp(*argv, "id")) { + __u32 id; + + NEXT_ARG(); + if (get_unsigned(&id, *argv, 0)) + invarg("invalid id value", *argv); + addattr32(&req.n, sizeof(req), NHA_ID, id); + } else if (!strcmp(*argv, "dev")) { + int ifindex; + + NEXT_ARG(); + ifindex = ll_name_to_index(*argv); + if (!ifindex) + invarg("Device does not exist\n", *argv); + addattr32(&req.n, sizeof(req), NHA_OIF, ifindex); + if (req.nhm.nh_family == AF_UNSPEC) + req.nhm.nh_family = AF_INET; + } else if (strcmp(*argv, "via") == 0) { + inet_prefix addr; + int family; + + NEXT_ARG(); + family = read_family(*argv); + if (family == AF_UNSPEC) + family = req.nhm.nh_family; + else + NEXT_ARG(); + get_addr(&addr, *argv, family); + if (req.nhm.nh_family == AF_UNSPEC) + req.nhm.nh_family = addr.family; + else if (req.nhm.nh_family != addr.family) + invarg("address family mismatch\n", *argv); + addattr_l(&req.n, sizeof(req), NHA_GATEWAY, + &addr.data, addr.bytelen); + } else if (strcmp(*argv, "encap") == 0) { + char buf[1024]; + struct rtattr *rta = (void *)buf; + + rta->rta_type = NHA_ENCAP; + rta->rta_len = RTA_LENGTH(0); + + lwt_parse_encap(rta, sizeof(buf), &argc, &argv, + NHA_ENCAP, NHA_ENCAP_TYPE); + + if (rta->rta_len > RTA_LENGTH(0)) { + addraw_l(&req.n, 1024, RTA_DATA(rta), + RTA_PAYLOAD(rta)); + } + } else if (!strcmp(*argv, "blackhole")) { + addattr_l(&req.n, sizeof(req), NHA_BLACKHOLE, NULL, 0); + if (req.nhm.nh_family == AF_UNSPEC) + req.nhm.nh_family = AF_INET; + } else if (!strcmp(*argv, "onlink")) { + nh_flags |= RTNH_F_ONLINK; + } else if (!strcmp(*argv, "group")) { + NEXT_ARG(); + + if (add_nh_group_attr(&req.n, sizeof(req), *argv)) + invarg("\"group\" value is invalid\n", *argv); + } else if (matches(*argv, "protocol") == 0) { + __u32 prot; + + NEXT_ARG(); + if (rtnl_rtprot_a2n(&prot, *argv)) + invarg("\"protocol\" value is invalid\n", *argv); + req.nhm.nh_protocol = prot; + } else if (strcmp(*argv, "help") == 0) { + usage(); + } else { + invarg("", *argv); + } + argc--; argv++; + } + + req.nhm.nh_flags = nh_flags; + + if (rtnl_talk(&rth, &req.n, NULL) < 0) + return -2; + + return 0; +} + +static int ipnh_get_id(__u32 id) +{ + struct { + struct nlmsghdr n; + struct nhmsg nhm; + char buf[1024]; + } req = { + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)), + .n.nlmsg_flags = NLM_F_REQUEST, + .n.nlmsg_type = RTM_GETNEXTHOP, + .nhm.nh_family = preferred_family, + }; + struct nlmsghdr *answer; + + addattr32(&req.n, sizeof(req), NHA_ID, id); + + if (rtnl_talk(&rth, &req.n, &answer) < 0) + return -2; + + new_json_obj(json); + + if (print_nexthop(answer, (void *)stdout) < 0) { + free(answer); + return -1; + } + + delete_json_obj(); + fflush(stdout); + + free(answer); + + return 0; +} + +static int ipnh_list_flush(int argc, char **argv, int action) +{ + unsigned int all = (argc == 0); + + while (argc > 0) { + if (!matches(*argv, "dev")) { + NEXT_ARG(); + filter.ifindex = ll_name_to_index(*argv); + if (!filter.ifindex) + invarg("Device does not exist\n", *argv); + } else if (!matches(*argv, "groups")) { + filter.groups = 1; + } else if (!matches(*argv, "master")) { + NEXT_ARG(); + filter.master = ll_name_to_index(*argv); + if (!filter.master) + invarg("Device does not exist\n", *argv); + } else if (matches(*argv, "vrf") == 0) { + NEXT_ARG(); + if (!name_is_vrf(*argv)) + invarg("Invalid VRF\n", *argv); + filter.master = ll_name_to_index(*argv); + if (!filter.master) + invarg("VRF does not exist\n", *argv); + } else if (!strcmp(*argv, "id")) { + __u32 id; + + NEXT_ARG(); + if (get_unsigned(&id, *argv, 0)) + invarg("invalid id value", *argv); + return ipnh_get_id(id); + } else if (matches(*argv, "help") == 0) { + usage(); + } else { + invarg("", *argv); + } + argc--; argv++; + } + + if (action == IPNH_FLUSH) + return ipnh_flush(all); + + if (rtnl_nexthopdump_req(&rth, preferred_family, nh_dump_filter) < 0) { + perror("Cannot send dump request"); + return -2; + } + + new_json_obj(json); + + if (rtnl_dump_filter(&rth, print_nexthop, stdout) < 0) { + fprintf(stderr, "Dump terminated\n"); + return -2; + } + + delete_json_obj(); + fflush(stdout); + + return 0; +} + +static int ipnh_get(int argc, char **argv) +{ + __u32 id = 0; + + while (argc > 0) { + if (!strcmp(*argv, "id")) { + NEXT_ARG(); + if (get_unsigned(&id, *argv, 0)) + invarg("invalid id value", *argv); + } else { + usage(); + } + argc--; argv++; + } + + if (!id) { + usage(); + return -1; + } + + return ipnh_get_id(id); +} + +int do_ipnh(int argc, char **argv) +{ + if (argc < 1) + return ipnh_list_flush(0, NULL, IPNH_LIST); + + if (!matches(*argv, "add")) + return ipnh_modify(RTM_NEWNEXTHOP, NLM_F_CREATE|NLM_F_EXCL, + argc-1, argv+1); + if (!matches(*argv, "replace")) + return ipnh_modify(RTM_NEWNEXTHOP, NLM_F_CREATE|NLM_F_REPLACE, + argc-1, argv+1); + if (!matches(*argv, "delete")) + return ipnh_modify(RTM_DELNEXTHOP, 0, argc-1, argv+1); + + if (!matches(*argv, "list") || + !matches(*argv, "show") || + !matches(*argv, "lst")) + return ipnh_list_flush(argc-1, argv+1, IPNH_LIST); + + if (!matches(*argv, "get")) + return ipnh_get(argc-1, argv+1); + + if (!matches(*argv, "flush")) + return ipnh_list_flush(argc-1, argv+1, IPNH_FLUSH); + + if (!matches(*argv, "help")) + usage(); + + fprintf(stderr, + "Command \"%s\" is unknown, try \"ip nexthop help\".\n", *argv); + exit(-1); +} diff --git a/ip/iproute.c b/ip/iproute.c index 2b3dcc5d..6b814225 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -80,7 +80,7 @@ static void usage(void) " [ table TABLE_ID ] [ proto RTPROTO ]\n" " [ scope SCOPE ] [ metric METRIC ]\n" " [ ttl-propagate { enabled | disabled } ]\n" - "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n" + "INFO_SPEC := { NH | nhid ID } OPTIONS FLAGS [ nexthop NH ]...\n" "NH := [ encap ENCAPTYPE ENCAPHDR ] [ via [ FAMILY ] ADDRESS ]\n" " [ dev STRING ] [ weight NUMBER ] NHFLAGS\n" "FAMILY := [ inet | inet6 | mpls | bridge | link ]\n" @@ -349,7 +349,7 @@ static void print_rtax_features(FILE *fp, unsigned int features) "features", "%#llx ", of); } -static void print_rt_flags(FILE *fp, unsigned int flags) +void print_rt_flags(FILE *fp, unsigned int flags) { open_json_array(PRINT_JSON, is_json_context() ? "flags" : ""); @@ -394,8 +394,7 @@ static void print_rt_pref(FILE *fp, unsigned int pref) } } -static void print_rta_if(FILE *fp, const struct rtattr *rta, - const char *prefix) +void print_rta_if(FILE *fp, const struct rtattr *rta, const char *prefix) { const char *ifname = ll_index_to_name(rta_getattr_u32(rta)); @@ -532,17 +531,16 @@ static void print_rta_newdst(FILE *fp, const struct rtmsg *r, } } -static void print_rta_gateway(FILE *fp, const struct rtmsg *r, - const struct rtattr *rta) +void print_rta_gateway(FILE *fp, unsigned char family, const struct rtattr *rta) { - const char *gateway = format_host_rta(r->rtm_family, rta); + const char *gateway = format_host_rta(family, rta); if (is_json_context()) print_string(PRINT_JSON, "gateway", NULL, gateway); else { fprintf(fp, "via "); print_color_string(PRINT_FP, - ifa_family_color(r->rtm_family), + ifa_family_color(family), NULL, "%s ", gateway); } } @@ -679,7 +677,8 @@ static void print_rta_multipath(FILE *fp, const struct rtmsg *r, if (tb[RTA_NEWDST]) print_rta_newdst(fp, r, tb[RTA_NEWDST]); if (tb[RTA_GATEWAY]) - print_rta_gateway(fp, r, tb[RTA_GATEWAY]); + print_rta_gateway(fp, r->rtm_family, + tb[RTA_GATEWAY]); if (tb[RTA_VIA]) print_rta_via(fp, tb[RTA_VIA]); if (tb[RTA_FLOW]) @@ -810,6 +809,10 @@ int print_route(struct nlmsghdr *n, void *arg) print_string(PRINT_ANY, "src", "from %s ", b1); } + if (tb[RTA_NH_ID]) + print_uint(PRINT_ANY, "nhid", "nhid %u ", + rta_getattr_u32(tb[RTA_NH_ID])); + if (tb[RTA_NEWDST]) print_rta_newdst(fp, r, tb[RTA_NEWDST]); @@ -822,7 +825,7 @@ int print_route(struct nlmsghdr *n, void *arg) } if (tb[RTA_GATEWAY] && filter.rvia.bitlen != host_len) - print_rta_gateway(fp, r, tb[RTA_GATEWAY]); + print_rta_gateway(fp, r->rtm_family, tb[RTA_GATEWAY]); if (tb[RTA_VIA]) print_rta_via(fp, tb[RTA_VIA]); @@ -997,7 +1000,8 @@ static int parse_one_nh(struct nlmsghdr *n, struct rtmsg *r, } else if (strcmp(*argv, "encap") == 0) { int old_len = rta->rta_len; - if (lwt_parse_encap(rta, len, &argc, &argv)) + if (lwt_parse_encap(rta, len, &argc, &argv, + RTA_ENCAP, RTA_ENCAP_TYPE)) return -1; rtnh->rtnh_len += rta->rta_len - old_len; } else if (strcmp(*argv, "as") == 0) { @@ -1080,6 +1084,7 @@ static int iproute_modify(int cmd, unsigned int flags, int argc, char **argv) int table_ok = 0; int raw = 0; int type_ok = 0; + __u32 nhid = 0; if (cmd != RTM_DELROUTE) { req.r.rtm_protocol = RTPROT_BOOT; @@ -1358,6 +1363,11 @@ static int iproute_modify(int cmd, unsigned int flags, int argc, char **argv) } else if (strcmp(*argv, "nexthop") == 0) { nhs_ok = 1; break; + } else if (!strcmp(*argv, "nhid")) { + NEXT_ARG(); + if (get_u32(&nhid, *argv, 0)) + invarg("\"id\" value is invalid\n", *argv); + addattr32(&req.n, sizeof(req), RTA_NH_ID, nhid); } else if (matches(*argv, "protocol") == 0) { __u32 prot; @@ -1416,7 +1426,8 @@ static int iproute_modify(int cmd, unsigned int flags, int argc, char **argv) rta->rta_type = RTA_ENCAP; rta->rta_len = RTA_LENGTH(0); - lwt_parse_encap(rta, sizeof(buf), &argc, &argv); + lwt_parse_encap(rta, sizeof(buf), &argc, &argv, + RTA_ENCAP, RTA_ENCAP_TYPE); if (rta->rta_len > RTA_LENGTH(0)) addraw_l(&req.n, 1024 @@ -1519,7 +1530,7 @@ static int iproute_modify(int cmd, unsigned int flags, int argc, char **argv) req.r.rtm_type == RTN_UNSPEC) { if (cmd == RTM_DELROUTE) req.r.rtm_scope = RT_SCOPE_NOWHERE; - else if (!gw_ok && !nhs_ok) + else if (!gw_ok && !nhs_ok && !nhid) req.r.rtm_scope = RT_SCOPE_LINK; } } diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c index 03217b8f..60f34a32 100644 --- a/ip/iproute_lwtunnel.c +++ b/ip/iproute_lwtunnel.c @@ -1111,7 +1111,8 @@ static int parse_encap_bpf(struct rtattr *rta, size_t len, int *argcp, return 0; } -int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp) +int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp, + int encap_attr, int encap_type_attr) { struct rtattr *nest; int argc = *argcp; @@ -1131,7 +1132,7 @@ int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp) exit(-1); } - nest = rta_nest(rta, len, RTA_ENCAP); + nest = rta_nest(rta, len, encap_attr); switch (type) { case LWTUNNEL_ENCAP_MPLS: ret = parse_encap_mpls(rta, len, &argc, &argv); @@ -1164,7 +1165,7 @@ int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp) rta_nest_end(rta, nest); - ret = rta_addattr16(rta, len, RTA_ENCAP_TYPE, type); + ret = rta_addattr16(rta, len, encap_type_attr, type); *argcp = argc; *argvp = argv; diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 0d48a3d4..af2a3bbf 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "libnetlink.h" @@ -173,6 +174,12 @@ void rtnl_set_strict_dump(struct rtnl_handle *rth) rth->flags |= RTNL_HANDLE_F_STRICT_CHK; } +int rtnl_add_nl_group(struct rtnl_handle *rth, unsigned int group) +{ + return setsockopt(rth->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, + &group, sizeof(group)); +} + void rtnl_close(struct rtnl_handle *rth) { if (rth->fd >= 0) { @@ -246,6 +253,32 @@ int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions) return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE); } +int rtnl_nexthopdump_req(struct rtnl_handle *rth, int family, + req_filter_fn_t filter_fn) +{ + struct { + struct nlmsghdr nlh; + struct nhmsg nhm; + char buf[128]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)), + .nlh.nlmsg_type = RTM_GETNEXTHOP, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .nhm.nh_family = family, + }; + + if (filter_fn) { + int err; + + err = filter_fn(&req.nlh, sizeof(req)); + if (err) + return err; + } + + return send(rth->fd, &req, sizeof(req), 0); +} + int rtnl_addrdump_req(struct rtnl_handle *rth, int family, req_filter_fn_t filter_fn) { @@ -1336,6 +1369,7 @@ struct rtattr *rta_nest(struct rtattr *rta, int maxlen, int type) struct rtattr *nest = RTA_TAIL(rta); rta_addattr_l(rta, maxlen, type, NULL, 0); + nest->rta_type |= NLA_F_NESTED; return nest; } diff --git a/man/man8/ip-nexthop.8 b/man/man8/ip-nexthop.8 new file mode 100644 index 00000000..da87ca3b --- /dev/null +++ b/man/man8/ip-nexthop.8 @@ -0,0 +1,196 @@ +.TH IP\-NEXTHOP 8 "30 May 2019" "iproute2" "Linux" +.SH "NAME" +ip-nexthop \- nexthop object management +.SH "SYNOPSIS" +.sp +.ad l +.in +8 +.ti -8 +.B ip +.RI "[ " ip-OPTIONS " ]" +.B nexthop +.RI " { " COMMAND " | " +.BR help " }" +.sp +.ti -8 + +.ti -8 +.BR "ip nexthop" " { " +.BR show " | " flush " } " +.I SELECTOR + +.ti -8 +.BR "ip nexthop" " { " add " | " replace " } id " +.I ID +.IR NH + +.ti -8 +.BR "ip nexthop" " { " get " | " del " } id " +.I ID + +.ti -8 +.IR SELECTOR " := " +.RB "[ " id +.IR ID " ] [ " +.B dev +.IR DEV " ] [ " +.B vrf +.IR NAME " ] [ " +.B master +.IR DEV " ] [ " +.BR groups " ] " + +.ti -8 +.IR NH " := { " +.BR blackhole " | [ " +.B via +.IR ADDRESS " ] [ " +.B dev +.IR DEV " ] [ " +.BR onlink " ] [ " +.B encap +.IR ENCAP " ] | " +.B group +.IR GROUP " } " + +.ti -8 +.IR ENCAP " := [ " +.IR ENCAP_MPLS " ] " + +.ti -8 +.IR ENCAP_MPLS " := " +.BR mpls " [ " +.IR LABEL " ] [" +.B ttl +.IR TTL " ]" + +.ti -8 +.IR GROUP " := " +.BR id "[," weight "[/...]" + +.SH DESCRIPTION +.B ip nexthop +is used to manipulate entries in the kernel's nexthop tables. +.TP +ip nexthop add id ID +add new nexthop entry +.TP +ip nexthop replace id ID +change the configuration of a nexthop or add new one +.RS +.TP +.BI via " [ FAMILY ] ADDRESS" +the address of the nexthop router, in the address family FAMILY. +Address family must match address family of nexthop instance. +.TP +.BI dev " NAME" +is the output device. +.TP +.B onlink +pretend that the nexthop is directly attached to this link, +even if it does not match any interface prefix. +.TP +.BI encap " ENCAPTYPE ENCAPHDR" +attach tunnel encapsulation attributes to this route. +.sp +.I ENCAPTYPE +is a string specifying the supported encapsulation type. Namely: + +.in +8 +.BI mpls +- encapsulation type MPLS +.sp +.in -8 +.I ENCAPHDR +is a set of encapsulation attributes specific to the +.I ENCAPTYPE. + +.in +8 +.B mpls +.in +2 +.I MPLSLABEL +- mpls label stack with labels separated by +.I "/" +.sp + +.B ttl +.I TTL +- TTL to use for MPLS header or 0 to inherit from IP header +.in -2 + +.TP +.BI group " GROUP" +create a nexthop group. Group specification is id with an optional +weight (id,weight) and a '/' as a separator between entries. +.TP +.B blackhole +create a blackhole nexthop +.RE + +.TP +ip nexthop delete id ID +delete nexthop with given id. +.RE + +.TP +ip nexthop show +show the contents of the nexthop table or the nexthops +selected by some criteria. +.RS +.TP +.BI dev " DEV " +show the nexthops using the given device. +.TP +.BI vrf " NAME " +show the nexthops using devices associated with the vrf name +.TP +.BI master " DEV " +show the nexthops using devices enslaved to given master device +.TP +.BI groups +show only nexthop groups +.RE +.TP +ip nexthop flush +flushes nexthops selected by some criteria. Criteria options are the same +as show. +.RE + +.TP +ip nexthop get id ID +get a single nexthop by id + +.SH EXAMPLES +.PP +ip nexthop ls +.RS 4 +Show all nexthop entries in the kernel. +.RE +.PP +ip nexthop add id 1 via 192.168.1.1 dev eth0 +.RS 4 +Adds an IPv4 nexthop with id 1 using the gateway 192.168.1.1 out device eth0. +.RE +.PP +ip nexthop add id 2 encap mpls 200/300 via 10.1.1.1 dev eth0 +.RS 4 +Adds an IPv4 nexthop with mpls encapsulation attributes attached to it. +.RE +.PP +ip nexthop add id 3 group 1/2 +.RS 4 +Adds a nexthop with id 3. The nexthop is a group using nexthops with ids +1 and 2 at equal weight. +.RE +.PP +ip nexthop add id 4 group 1,5/2,11 +.RS 4 +Adds a nexthop with id 4. The nexthop is a group using nexthops with ids +1 and 2 with nexthop 1 at weight 5 and nexthop 2 at weight 11. +.RE +.SH SEE ALSO +.br +.BR ip (8) + +.SH AUTHOR +Original Manpage by David Ahern diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index b9ae6e30..a61b263e 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -89,7 +89,9 @@ replace " } " .RB "{ " enabled " | " disabled " } ]" .ti -8 -.IR INFO_SPEC " := " "NH OPTIONS FLAGS" " [" +.IR INFO_SPEC " := { " NH " | " +.B nhid +.IR ID " } " "OPTIONS FLAGS" " [" .B nexthop .IR NH " ] ..." @@ -686,6 +688,10 @@ Discovery messages. Namely: - the route has a highest priority .sp +.TP +.BI nhid " ID" +use nexthop object with given id as nexthop specification. +.sp .TP .BI encap " ENCAPTYPE ENCAPHDR" attach tunnel encapsulation attributes to this route. @@ -1154,6 +1160,11 @@ ip -6 route add 2001:db8:1::/64 encap seg6 mode encap segs 2001:db8:42::1,2001:d .RS 4 Adds an IPv6 route with SRv6 encapsulation and two segments attached. .RE +.PP +ip route add 10.1.1.0/30 nhid 10 +.RS 4 +Adds an ipv4 route using nexthop object with id 10. +.RE .SH SEE ALSO .br .BR ip (8)