diff --git a/Makefile b/Makefile index 9dbb29f3..2e91c328 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,15 @@ -ROOTDIR=$(DESTDIR) -PREFIX=/usr -LIBDIR=$(PREFIX)/lib -SBINDIR=/sbin -CONFDIR=/etc/iproute2 -DATADIR=$(PREFIX)/share -DOCDIR=$(DATADIR)/doc/iproute2 -MANDIR=$(DATADIR)/man -ARPDDIR=/var/lib/arpd +PREFIX?=/usr +LIBDIR?=$(PREFIX)/lib +SBINDIR?=/sbin +CONFDIR?=/etc/iproute2 +DATADIR?=$(PREFIX)/share +DOCDIR?=$(DATADIR)/doc/iproute2 +MANDIR?=$(DATADIR)/man +ARPDDIR?=/var/lib/arpd +KERNEL_INCLUDE?=/usr/include # Path to db_185.h include -DBM_INCLUDE:=$(ROOTDIR)/usr/include +DBM_INCLUDE:=$(DESTDIR)/usr/include SHARED_LIBS = y @@ -26,6 +26,9 @@ ADDLIB+=dnet_ntop.o dnet_pton.o #options for ipx ADDLIB+=ipx_ntop.o ipx_pton.o +#options for mpls +ADDLIB+=mpls_ntop.o mpls_pton.o + CC = gcc HOSTCC = gcc DEFINES += -D_GNU_SOURCE @@ -33,10 +36,10 @@ CCOPTS = -O2 WFLAGS := -Wall -Wstrict-prototypes -Wmissing-prototypes WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2 -CFLAGS = $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) +CFLAGS := $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) $(CFLAGS) YACCFLAGS = -d -t -v -SUBDIRS=lib ip tc bridge misc netem genl man +SUBDIRS=lib ip tc bridge misc netem genl tipc man LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a LDLIBS += $(LIBNETLINK) diff --git a/README.iproute2+tc b/README.iproute2+tc index 6aa5d184..2a5638da 100644 --- a/README.iproute2+tc +++ b/README.iproute2+tc @@ -72,12 +72,16 @@ ip route add 10.11.12.0/24 dev eth1 via whatever realm 1 etc. The same thing can be made with rules. I still did not test ipchains, but they should work too. + +Setup and code example of BPF classifier and action can be found under +examples/bpf/, which should explain everything for getting started. + + Setup of rsvp and u32 classifiers is more hairy. If you read RSVP specs, you will understand how rsvp classifier works easily. What's about u32... That's example: - #! /bin/sh TC=/home/root/tc diff --git a/bridge/fdb.c b/bridge/fdb.c index c01a5020..278e55fd 100644 --- a/bridge/fdb.c +++ b/bridge/fdb.c @@ -131,12 +131,16 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (ifindex) { char ifname[IF_NAMESIZE]; - if (if_indextoname(ifindex, ifname)) + if (!tb[NDA_LINK_NETNSID] && + if_indextoname(ifindex, ifname)) fprintf(fp, "via %s ", ifname); else fprintf(fp, "via ifindex %u ", ifindex); } } + if (tb[NDA_LINK_NETNSID]) + fprintf(fp, "link-netnsid %d ", + rta_getattr_u32(tb[NDA_LINK_NETNSID])); if (show_stats && tb[NDA_CACHEINFO]) { struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]); @@ -155,7 +159,7 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (r->ndm_flags & NTF_ROUTER) fprintf(fp, "router "); if (r->ndm_flags & NTF_EXT_LEARNED) - fprintf(fp, "external "); + fprintf(fp, "offload "); fprintf(fp, "%s\n", state_n2a(r->ndm_state)); return 0; @@ -316,7 +320,7 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) if (d == NULL || addr == NULL) { fprintf(stderr, "Device and address are required arguments.\n"); - exit(-1); + return -1; } /* Assume self */ @@ -331,7 +335,7 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) abuf, abuf+1, abuf+2, abuf+3, abuf+4, abuf+5) != 6) { fprintf(stderr, "Invalid mac address %s\n", addr); - exit(-1); + return -1; } addattr_l(&req.n, sizeof(req), NDA_LLADDR, abuf, ETH_ALEN); @@ -358,8 +362,8 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) return -1; } - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) + return -1; return 0; } diff --git a/bridge/link.c b/bridge/link.c index c8555f82..a9b1262d 100644 --- a/bridge/link.c +++ b/bridge/link.c @@ -227,6 +227,7 @@ static void usage(void) fprintf(stderr, " [ learning_sync {on | off} ]\n"); fprintf(stderr, " [ flood {on | off} ]\n"); fprintf(stderr, " [ hwmode {vepa | veb} ]\n"); + fprintf(stderr, " [ self ] [ master ]\n"); fprintf(stderr, " bridge link show [dev DEV]\n"); exit(-1); } @@ -283,31 +284,31 @@ static int brlink_modify(int argc, char **argv) } else if (strcmp(*argv, "guard") == 0) { NEXT_ARG(); if (!on_off("guard", &bpdu_guard, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "hairpin") == 0) { NEXT_ARG(); if (!on_off("hairping", &hairpin, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "fastleave") == 0) { NEXT_ARG(); if (!on_off("fastleave", &fast_leave, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "root_block") == 0) { NEXT_ARG(); if (!on_off("root_block", &root_block, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "learning") == 0) { NEXT_ARG(); if (!on_off("learning", &learning, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "learning_sync") == 0) { NEXT_ARG(); if (!on_off("learning_sync", &learning_sync, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "flood") == 0) { NEXT_ARG(); if (!on_off("flood", &flood, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "cost") == 0) { NEXT_ARG(); cost = atoi(*argv); @@ -316,7 +317,19 @@ static int brlink_modify(int argc, char **argv) priority = atoi(*argv); } else if (strcmp(*argv, "state") == 0) { NEXT_ARG(); - state = atoi(*argv); + char *endptr; + size_t nstates = sizeof(port_states) / sizeof(*port_states); + state = strtol(*argv, &endptr, 10); + if (!(**argv != '\0' && *endptr == '\0')) { + for (state = 0; state < nstates; state++) + if (strcmp(port_states[state], *argv) == 0) + break; + if (state == nstates) { + fprintf(stderr, + "Error: invalid STP port state\n"); + return -1; + } + } } else if (strcmp(*argv, "hwmode") == 0) { NEXT_ARG(); flags = BRIDGE_FLAGS_SELF; @@ -328,10 +341,12 @@ static int brlink_modify(int argc, char **argv) fprintf(stderr, "Mode argument must be \"vepa\" or " "\"veb\".\n"); - exit(-1); + return -1; } } else if (strcmp(*argv, "self") == 0) { - flags = BRIDGE_FLAGS_SELF; + flags |= BRIDGE_FLAGS_SELF; + } else if (strcmp(*argv, "master") == 0) { + flags |= BRIDGE_FLAGS_MASTER; } else { usage(); } @@ -339,14 +354,14 @@ static int brlink_modify(int argc, char **argv) } if (d == NULL) { fprintf(stderr, "Device is a required argument.\n"); - exit(-1); + return -1; } req.ifm.ifi_index = ll_name_to_index(d); if (req.ifm.ifi_index == 0) { fprintf(stderr, "Cannot find bridge device \"%s\"\n", d); - exit(-1); + return -1; } /* Nested PROTINFO attribute. Contains: port flags, cost, priority and @@ -400,8 +415,8 @@ static int brlink_modify(int argc, char **argv) addattr_nest_end(&req.n, nest); } - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) + return -1; return 0; } diff --git a/bridge/mdb.c b/bridge/mdb.c index 6c1c938a..9a8ed540 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -145,12 +145,12 @@ static int mdb_show(int argc, char **argv) if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETMDB) < 0) { perror("Cannot send dump request"); - exit(1); + return -1; } if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) { fprintf(stderr, "Dump terminated\n"); - exit(1); + return -1; } return 0; @@ -198,7 +198,7 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) if (d == NULL || grp == NULL || p == NULL) { fprintf(stderr, "Device, group address and port name are required arguments.\n"); - exit(-1); + return -1; } req.bpm.ifindex = ll_name_to_index(d); @@ -224,8 +224,8 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry)); - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) + return -1; return 0; } diff --git a/bridge/monitor.c b/bridge/monitor.c index 9e1ed48c..d8341ec5 100644 --- a/bridge/monitor.c +++ b/bridge/monitor.c @@ -36,6 +36,7 @@ static void usage(void) } static int accept_msg(const struct sockaddr_nl *who, + struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n, void *arg) { FILE *fp = arg; diff --git a/bridge/vlan.c b/bridge/vlan.c index 9f6c84ee..ac2f5231 100644 --- a/bridge/vlan.c +++ b/bridge/vlan.c @@ -80,7 +80,7 @@ static int vlan_modify(int cmd, int argc, char **argv) if (d == NULL || vid == -1) { fprintf(stderr, "Device and VLAN ID are required arguments.\n"); - exit(-1); + return -1; } req.ifm.ifi_index = ll_name_to_index(d); @@ -131,8 +131,8 @@ static int vlan_modify(int cmd, int argc, char **argv) addattr_nest_end(&req.n, afspec); - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) + return -1; return 0; } diff --git a/configure b/configure index c3dacdba..f1325df8 100755 --- a/configure +++ b/configure @@ -201,7 +201,7 @@ check_setns() { cat >$TMPDIR/setnstest.c < -int main(int argc, char **argv) +int main(int argc, char **argv) { (void)setns(0,0); return 0; @@ -249,6 +249,29 @@ EOF rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest } +check_elf() +{ + cat >$TMPDIR/elftest.c < +#include +int main(void) +{ + Elf_Scn *scn; + GElf_Shdr shdr; + return elf_version(EV_CURRENT); +} +EOF + + if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1 + then + echo "TC_CONFIG_ELF:=y" >>Config + echo "yes" + else + echo "no" + fi + rm -f $TMPDIR/elftest.c $TMPDIR/elftest +} + check_selinux() # SELinux is a compile time option in the ss utility { @@ -287,5 +310,8 @@ check_setns echo -n "SELinux support: " check_selinux +echo -n "ELF support: " +check_elf + echo -e "\nDocs" check_docs diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex index e7a79a5d..ea147950 100644 --- a/doc/ip-cref.tex +++ b/doc/ip-cref.tex @@ -1432,6 +1432,17 @@ database. even if it does not match any interface prefix. One application of this option may be found in~\cite{IP-TUNNELS}. +\item \verb|pref PREF| + +--- the IPv6 route preference. +\verb|PREF| PREF is a string specifying the route preference as defined in +RFC4191 for Router Discovery messages. Namely: +\begin{itemize} +\item \verb|low| --- the route has a lowest priority. +\item \verb|medium| --- the route has a default priority. +\item \verb|high| --- the route has a highest priority. +\end{itemize} + \end{itemize} diff --git a/examples/bpf/bpf_agent.c b/examples/bpf/bpf_agent.c new file mode 100644 index 00000000..426b8800 --- /dev/null +++ b/examples/bpf/bpf_agent.c @@ -0,0 +1,258 @@ +/* + * eBPF user space agent part + * + * Simple, _self-contained_ user space agent for the eBPF kernel + * ebpf_prog.c program, which gets all map fds passed from tc via unix + * domain socket in one transaction and can thus keep referencing + * them from user space in order to read out (or possibly modify) + * map data. Here, just as a minimal example to display counters. + * + * The agent only uses the bpf(2) syscall API to read or possibly + * write to eBPF maps, it doesn't need to be aware of the low-level + * bytecode parts and/or ELF parsing bits. + * + * ! For more details, see header comment in bpf_prog.c ! + * + * gcc bpf_agent.c -o bpf_agent -Wall -O2 + * + * For example, a more complex user space agent could run on each + * host, reading and writing into eBPF maps used by tc classifier + * and actions. It would thus allow for implementing a distributed + * tc architecture, for example, which would push down central + * policies into eBPF maps, and thus altering run-time behaviour. + * + * -- Happy eBPF hacking! ;) + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Just some misc macros as min(), offsetof(), etc. */ +#include "../../include/utils.h" +/* Common code from fd passing. */ +#include "../../include/bpf_scm.h" +/* Common, shared definitions with ebpf_prog.c */ +#include "bpf_shared.h" +/* Mini syscall wrapper */ +#include "bpf_sys.h" + +static void bpf_dump_drops(int fd) +{ + int cpu, max; + + max = sysconf(_SC_NPROCESSORS_ONLN); + + printf(" `- number of drops:"); + for (cpu = 0; cpu < max; cpu++) { + long drops; + + assert(bpf_lookup_elem(fd, &cpu, &drops) == 0); + printf("\tcpu%d: %5ld", cpu, drops); + } + printf("\n"); +} + +static void bpf_dump_queue(int fd) +{ + /* Just for the same of the example. */ + int max_queue = 4, i; + + printf(" | nic queues:"); + for (i = 0; i < max_queue; i++) { + struct count_queue cq; + int ret; + + memset(&cq, 0, sizeof(cq)); + ret = bpf_lookup_elem(fd, &i, &cq); + assert(ret == 0 || (ret < 0 && errno == ENOENT)); + + printf("\tq%d:[pkts: %ld, mis: %ld]", + i, cq.total, cq.mismatch); + } + printf("\n"); +} + +static void bpf_dump_proto(int fd) +{ + uint8_t protos[] = { IPPROTO_TCP, IPPROTO_UDP, IPPROTO_ICMP }; + char *names[] = { "tcp", "udp", "icmp" }; + int i; + + printf(" ` protos:"); + for (i = 0; i < ARRAY_SIZE(protos); i++) { + struct count_tuple ct; + int ret; + + memset(&ct, 0, sizeof(ct)); + ret = bpf_lookup_elem(fd, &protos[i], &ct); + assert(ret == 0 || (ret < 0 && errno == ENOENT)); + + printf("\t%s:[pkts: %ld, bytes: %ld]", + names[i], ct.packets, ct.bytes); + } + printf("\n"); +} + +static void bpf_dump_map_data(int *tfd) +{ + int i; + + for (i = 0; i < 30; i++) { + const int period = 5; + + printf("data, period: %dsec\n", period); + + bpf_dump_drops(tfd[BPF_MAP_ID_DROPS]); + bpf_dump_queue(tfd[BPF_MAP_ID_QUEUE]); + bpf_dump_proto(tfd[BPF_MAP_ID_PROTO]); + + sleep(period); + } +} + +static void bpf_info_loop(int *fds, struct bpf_map_aux *aux) +{ + int i, tfd[BPF_MAP_ID_MAX]; + + printf("ver: %d\nobj: %s\ndev: %lu\nino: %lu\nmaps: %u\n", + aux->uds_ver, aux->obj_name, aux->obj_st.st_dev, + aux->obj_st.st_ino, aux->num_ent); + + for (i = 0; i < aux->num_ent; i++) { + printf("map%d:\n", i); + printf(" `- fd: %u\n", fds[i]); + printf(" | serial: %u\n", aux->ent[i].id); + printf(" | type: %u\n", aux->ent[i].type); + printf(" | max elem: %u\n", aux->ent[i].max_elem); + printf(" | size key: %u\n", aux->ent[i].size_key); + printf(" ` size val: %u\n", aux->ent[i].size_value); + + tfd[aux->ent[i].id] = fds[i]; + } + + bpf_dump_map_data(tfd); +} + +static void bpf_map_get_from_env(int *tfd) +{ + char key[64], *val; + int i; + + for (i = 0; i < BPF_MAP_ID_MAX; i++) { + memset(key, 0, sizeof(key)); + snprintf(key, sizeof(key), "BPF_MAP%d", i); + + val = secure_getenv(key); + assert(val != NULL); + + tfd[i] = atoi(val); + } +} + +static int bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux, + unsigned int entries) +{ + struct bpf_map_set_msg msg; + int *cmsg_buf, min_fd, i; + char *amsg_buf, *mmsg_buf; + + cmsg_buf = bpf_map_set_init(&msg, NULL, 0); + amsg_buf = (char *)msg.aux.ent; + mmsg_buf = (char *)&msg.aux; + + for (i = 0; i < entries; i += min_fd) { + struct cmsghdr *cmsg; + int ret; + + min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i); + + bpf_map_set_init_single(&msg, min_fd); + + ret = recvmsg(fd, &msg.hdr, 0); + if (ret <= 0) + return ret ? : -1; + + cmsg = CMSG_FIRSTHDR(&msg.hdr); + if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS) + return -EINVAL; + if (msg.hdr.msg_flags & MSG_CTRUNC) + return -EIO; + + min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd); + if (min_fd > entries || min_fd <= 0) + return -1; + + memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd); + memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd); + memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent)); + + if (i + min_fd == aux->num_ent) + break; + } + + return 0; +} + +int main(int argc, char **argv) +{ + int fds[BPF_SCM_MAX_FDS]; + struct bpf_map_aux aux; + struct sockaddr_un addr; + int fd, ret, i; + + /* When arguments are being passed, we take it as a path + * to a Unix domain socket, otherwise we grab the fds + * from the environment to demonstrate both possibilities. + */ + if (argc == 1) { + int tfd[BPF_MAP_ID_MAX]; + + bpf_map_get_from_env(tfd); + bpf_dump_map_data(tfd); + + return 0; + } + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + fprintf(stderr, "Cannot open socket: %s\n", + strerror(errno)); + exit(1); + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, argv[argc - 1], sizeof(addr.sun_path)); + + ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret < 0) { + fprintf(stderr, "Cannot bind to socket: %s\n", + strerror(errno)); + exit(1); + } + + memset(fds, 0, sizeof(fds)); + memset(&aux, 0, sizeof(aux)); + + ret = bpf_map_set_recv(fd, fds, &aux, BPF_SCM_MAX_FDS); + if (ret >= 0) + bpf_info_loop(fds, &aux); + + for (i = 0; i < aux.num_ent; i++) + close(fds[i]); + + close(fd); + return 0; +} diff --git a/examples/bpf/bpf_funcs.h b/examples/bpf/bpf_funcs.h new file mode 100644 index 00000000..1545fa9d --- /dev/null +++ b/examples/bpf/bpf_funcs.h @@ -0,0 +1,58 @@ +#ifndef __BPF_FUNCS__ +#define __BPF_FUNCS__ + +/* Misc macros. */ +#ifndef __maybe_unused +# define __maybe_unused __attribute__ ((__unused__)) +#endif + +#ifndef __section +# define __section(NAME) __attribute__((section(NAME), used)) +#endif + +#ifndef offsetof +# define offsetof __builtin_offsetof +#endif + +#ifndef htons +# define htons(x) __constant_htons((x)) +#endif + +#ifndef likely +# define likely(x) __builtin_expect(!!(x), 1) +#endif + +#ifndef unlikely +# define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +/* The verifier will translate them to actual function calls. */ +static void *(*bpf_map_lookup_elem)(void *map, void *key) __maybe_unused = + (void *) BPF_FUNC_map_lookup_elem; + +static int (*bpf_map_update_elem)(void *map, void *key, void *value, + unsigned long long flags) __maybe_unused = + (void *) BPF_FUNC_map_update_elem; + +static int (*bpf_map_delete_elem)(void *map, void *key) __maybe_unused = + (void *) BPF_FUNC_map_delete_elem; + +static unsigned int (*get_smp_processor_id)(void) __maybe_unused = + (void *) BPF_FUNC_get_smp_processor_id; + +static unsigned int (*get_prandom_u32)(void) __maybe_unused = + (void *) BPF_FUNC_get_prandom_u32; + +/* LLVM built-in functions that an eBPF C program may use to emit + * BPF_LD_ABS and BPF_LD_IND instructions. + */ +unsigned long long load_byte(void *skb, unsigned long long off) + asm ("llvm.bpf.load.byte"); + +unsigned long long load_half(void *skb, unsigned long long off) + asm ("llvm.bpf.load.half"); + +unsigned long long load_word(void *skb, unsigned long long off) + asm ("llvm.bpf.load.word"); + +#endif /* __BPF_FUNCS__ */ diff --git a/examples/bpf/bpf_prog.c b/examples/bpf/bpf_prog.c new file mode 100644 index 00000000..009febd0 --- /dev/null +++ b/examples/bpf/bpf_prog.c @@ -0,0 +1,496 @@ +/* + * eBPF kernel space program part + * + * Toy eBPF program for demonstration purposes, some parts derived from + * kernel tree's samples/bpf/sockex2_kern.c example. + * + * More background on eBPF, kernel tree: Documentation/networking/filter.txt + * + * Note, this file is rather large, and most classifier and actions are + * likely smaller to accomplish one specific use-case and are tailored + * for high performance. For performance reasons, you might also have the + * classifier and action already merged inside the classifier. + * + * In order to show various features it serves as a bigger programming + * example, which you should feel free to rip apart and experiment with. + * + * Compilation, configuration example: + * + * Note: as long as the BPF backend in LLVM is still experimental, + * you need to build LLVM with LLVM with --enable-experimental-targets=BPF + * Also, make sure your 4.1+ kernel is compiled with CONFIG_BPF_SYSCALL=y, + * and you have libelf.h and gelf.h headers and can link tc against -lelf. + * + * In case you need to sync kernel headers, go to your kernel source tree: + * # make headers_install INSTALL_HDR_PATH=/usr/ + * + * $ export PATH=/home/<...>/llvm/Debug+Asserts/bin/:$PATH + * $ clang -O2 -emit-llvm -c bpf_prog.c -o - | llc -march=bpf -filetype=obj -o bpf.o + * $ objdump -h bpf.o + * [...] + * 3 classifier 000007f8 0000000000000000 0000000000000000 00000040 2**3 + * CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE + * 4 action-mark 00000088 0000000000000000 0000000000000000 00000838 2**3 + * CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE + * 5 action-rand 00000098 0000000000000000 0000000000000000 000008c0 2**3 + * CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE + * 6 maps 00000030 0000000000000000 0000000000000000 00000958 2**2 + * CONTENTS, ALLOC, LOAD, DATA + * 7 license 00000004 0000000000000000 0000000000000000 00000988 2**0 + * CONTENTS, ALLOC, LOAD, DATA + * [...] + * # echo 1 > /proc/sys/net/core/bpf_jit_enable + * $ gcc bpf_agent.c -o bpf_agent -Wall -O2 + * # ./bpf_agent /tmp/bpf-uds (e.g. on a different terminal) + * # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \ + * action bpf obj bpf.o sec action-mark \ + * action bpf obj bpf.o sec action-rand ok + * # tc filter show dev em1 + * filter parent 1: protocol all pref 49152 bpf + * filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid 1:1 bpf.o:[classifier] + * action order 1: bpf bpf.o:[action-mark] default-action pipe + * index 52 ref 1 bind 1 + * + * action order 2: bpf bpf.o:[action-rand] default-action pipe + * index 53 ref 1 bind 1 + * + * action order 3: gact action pass + * random type none pass val 0 + * index 38 ref 1 bind 1 + * + * The same program can also be installed on ingress side (as opposed to above + * egress configuration), e.g.: + * + * # tc qdisc add dev em1 handle ffff: ingress + * # tc filter add dev em1 parent ffff: bpf obj ... + * + * Notes on BPF agent: + * + * In the above example, the bpf_agent creates the unix domain socket + * natively. "tc exec" can also spawn a shell and hold the socktes there: + * + * # tc exec bpf imp /tmp/bpf-uds + * # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \ + * action bpf obj bpf.o sec action-mark \ + * action bpf obj bpf.o sec action-rand ok + * sh-4.2# (shell spawned from tc exec) + * sh-4.2# bpf_agent + * [...] + * + * This will read out fds over environment and produce the same data dump + * as below. This has the advantage that the spawned shell owns the fds + * and thus if the agent is restarted, it can reattach to the same fds, also + * various programs can easily read/modify the data simultaneously from user + * space side. + * + * If the shell is unnecessary, the agent can also just be spawned directly + * via tc exec: + * + * # tc exec bpf imp /tmp/bpf-uds run bpf_agent + * # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \ + * action bpf obj bpf.o sec action-mark \ + * action bpf obj bpf.o sec action-rand ok + * + * BPF agent example output: + * + * ver: 1 + * obj: bpf.o + * dev: 64770 + * ino: 6045133 + * maps: 3 + * map0: + * `- fd: 4 + * | serial: 1 + * | type: 1 + * | max elem: 256 + * | size key: 1 + * ` size val: 16 + * map1: + * `- fd: 5 + * | serial: 2 + * | type: 1 + * | max elem: 1024 + * | size key: 4 + * ` size val: 16 + * map2: + * `- fd: 6 + * | serial: 3 + * | type: 2 + * | max elem: 64 + * | size key: 4 + * ` size val: 8 + * data, period: 5sec + * `- number of drops: cpu0: 0 cpu1: 0 cpu2: 0 cpu3: 0 + * | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 0, mis: 0] q3:[pkts: 0, mis: 0] + * ` protos: tcp:[pkts: 0, bytes: 0] udp:[pkts: 0, bytes: 0] icmp:[pkts: 0, bytes: 0] + * data, period: 5sec + * `- number of drops: cpu0: 5 cpu1: 0 cpu2: 0 cpu3: 1 + * | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 24, mis: 14] q3:[pkts: 0, mis: 0] + * ` protos: tcp:[pkts: 13, bytes: 1989] udp:[pkts: 10, bytes: 710] icmp:[pkts: 0, bytes: 0] + * data, period: 5sec + * `- number of drops: cpu0: 5 cpu1: 0 cpu2: 3 cpu3: 3 + * | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 39, mis: 21] q3:[pkts: 0, mis: 0] + * ` protos: tcp:[pkts: 20, bytes: 3549] udp:[pkts: 18, bytes: 1278] icmp:[pkts: 0, bytes: 0] + * [...] + * + * This now means, the below classifier and action pipeline has been loaded + * as eBPF bytecode into the kernel, the kernel has verified that the + * execution of the bytecode is "safe", and it has JITed the programs + * afterwards, so that upon invocation they're running on native speed. tc + * has transferred all map file descriptors to the bpf_agent via IPC and + * even after tc exits, the agent can read out or modify all map data. + * + * Note that the export to the uds is done only once in the classifier and + * not in the action. It's enough to export the (here) shared descriptors + * once. + * + * If you need to disassemble the generated JIT image (echo with 2), the + * kernel tree has under tools/net/ a small helper, you can invoke e.g. + * `bpf_jit_disasm -o`. + * + * Please find in the code below further comments. + * + * -- Happy eBPF hacking! ;) + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Common, shared definitions with ebpf_agent.c. */ +#include "bpf_shared.h" +/* Selection of BPF helper functions for our example. */ +#include "bpf_funcs.h" + +/* Could be defined here as well, or included from the header. */ +#define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 +#define TC_ACT_RECLASSIFY 1 +#define TC_ACT_SHOT 2 +#define TC_ACT_PIPE 3 +#define TC_ACT_STOLEN 4 +#define TC_ACT_QUEUED 5 +#define TC_ACT_REPEAT 6 + +/* Other, misc stuff. */ +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF + +/* eBPF map definitions, all placed in section "maps". */ +struct bpf_elf_map __section("maps") map_proto = { + .type = BPF_MAP_TYPE_HASH, + .id = BPF_MAP_ID_PROTO, + .size_key = sizeof(uint8_t), + .size_value = sizeof(struct count_tuple), + .max_elem = 256, +}; + +struct bpf_elf_map __section("maps") map_queue = { + .type = BPF_MAP_TYPE_HASH, + .id = BPF_MAP_ID_QUEUE, + .size_key = sizeof(uint32_t), + .size_value = sizeof(struct count_queue), + .max_elem = 1024, +}; + +struct bpf_elf_map __section("maps") map_drops = { + .type = BPF_MAP_TYPE_ARRAY, + .id = BPF_MAP_ID_DROPS, + .size_key = sizeof(uint32_t), + .size_value = sizeof(long), + .max_elem = 64, +}; + +/* Helper functions and definitions for the flow dissector used by the + * example classifier. This resembles the kernel's flow dissector to + * some extend and is just used as an example to show what's possible + * with eBPF. + */ +struct sockaddr; + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct flow_keys { + __u32 src; + __u32 dst; + union { + __u32 ports; + __u16 port16[2]; + }; + __s32 th_off; + __u8 ip_proto; +}; + +static inline int flow_ports_offset(__u8 ip_proto) +{ + switch (ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + default: + return 0; + case IPPROTO_AH: + return 4; + } +} + +static inline bool flow_is_frag(struct __sk_buff *skb, int nh_off) +{ + return !!(load_half(skb, nh_off + offsetof(struct iphdr, frag_off)) & + (IP_MF | IP_OFFSET)); +} + +static inline int flow_parse_ipv4(struct __sk_buff *skb, int nh_off, + __u8 *ip_proto, struct flow_keys *flow) +{ + __u8 ip_ver_len; + + if (unlikely(flow_is_frag(skb, nh_off))) + *ip_proto = 0; + else + *ip_proto = load_byte(skb, nh_off + offsetof(struct iphdr, + protocol)); + if (*ip_proto != IPPROTO_GRE) { + flow->src = load_word(skb, nh_off + offsetof(struct iphdr, saddr)); + flow->dst = load_word(skb, nh_off + offsetof(struct iphdr, daddr)); + } + + ip_ver_len = load_byte(skb, nh_off + 0 /* offsetof(struct iphdr, ihl) */); + if (likely(ip_ver_len == 0x45)) + nh_off += 20; + else + nh_off += (ip_ver_len & 0xF) << 2; + + return nh_off; +} + +static inline __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, int off) +{ + __u32 w0 = load_word(skb, off); + __u32 w1 = load_word(skb, off + sizeof(w0)); + __u32 w2 = load_word(skb, off + sizeof(w0) * 2); + __u32 w3 = load_word(skb, off + sizeof(w0) * 3); + + return w0 ^ w1 ^ w2 ^ w3; +} + +static inline int flow_parse_ipv6(struct __sk_buff *skb, int nh_off, + __u8 *ip_proto, struct flow_keys *flow) +{ + *ip_proto = load_byte(skb, nh_off + offsetof(struct ipv6hdr, nexthdr)); + + flow->src = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, saddr)); + flow->dst = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, daddr)); + + return nh_off + sizeof(struct ipv6hdr); +} + +static inline bool flow_dissector(struct __sk_buff *skb, + struct flow_keys *flow) +{ + int poff, nh_off = BPF_LL_OFF + ETH_HLEN; + __be16 proto = skb->protocol; + __u8 ip_proto; + + /* TODO: check for skb->vlan_tci, skb->vlan_proto first */ + if (proto == htons(ETH_P_8021AD)) { + proto = load_half(skb, nh_off + + offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); + nh_off += sizeof(struct vlan_hdr); + } + if (proto == htons(ETH_P_8021Q)) { + proto = load_half(skb, nh_off + + offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); + nh_off += sizeof(struct vlan_hdr); + } + + if (likely(proto == htons(ETH_P_IP))) + nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow); + else if (proto == htons(ETH_P_IPV6)) + nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow); + else + return false; + + switch (ip_proto) { + case IPPROTO_GRE: { + struct gre_hdr { + __be16 flags; + __be16 proto; + }; + + __u16 gre_flags = load_half(skb, nh_off + + offsetof(struct gre_hdr, flags)); + __u16 gre_proto = load_half(skb, nh_off + + offsetof(struct gre_hdr, proto)); + + if (gre_flags & (GRE_VERSION | GRE_ROUTING)) + break; + + nh_off += 4; + if (gre_flags & GRE_CSUM) + nh_off += 4; + if (gre_flags & GRE_KEY) + nh_off += 4; + if (gre_flags & GRE_SEQ) + nh_off += 4; + + if (gre_proto == ETH_P_8021Q) { + gre_proto = load_half(skb, nh_off + + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nh_off += sizeof(struct vlan_hdr); + } + if (gre_proto == ETH_P_IP) + nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow); + else if (gre_proto == ETH_P_IPV6) + nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow); + else + return false; + break; + } + case IPPROTO_IPIP: + nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow); + break; + case IPPROTO_IPV6: + nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow); + default: + break; + } + + nh_off += flow_ports_offset(ip_proto); + + flow->ports = load_word(skb, nh_off); + flow->th_off = nh_off; + flow->ip_proto = ip_proto; + + return true; +} + +static inline void cls_update_proto_map(const struct __sk_buff *skb, + const struct flow_keys *flow) +{ + uint8_t proto = flow->ip_proto; + struct count_tuple *ct, _ct; + + ct = bpf_map_lookup_elem(&map_proto, &proto); + if (likely(ct)) { + __sync_fetch_and_add(&ct->packets, 1); + __sync_fetch_and_add(&ct->bytes, skb->len); + return; + } + + /* No hit yet, we need to create a new entry. */ + _ct.packets = 1; + _ct.bytes = skb->len; + + bpf_map_update_elem(&map_proto, &proto, &_ct, BPF_ANY); +} + +static inline void cls_update_queue_map(const struct __sk_buff *skb) +{ + uint32_t queue = skb->queue_mapping; + struct count_queue *cq, _cq; + bool mismatch; + + mismatch = skb->queue_mapping != get_smp_processor_id(); + + cq = bpf_map_lookup_elem(&map_queue, &queue); + if (likely(cq)) { + __sync_fetch_and_add(&cq->total, 1); + if (mismatch) + __sync_fetch_and_add(&cq->mismatch, 1); + return; + } + + /* No hit yet, we need to create a new entry. */ + _cq.total = 1; + _cq.mismatch = mismatch ? 1 : 0; + + bpf_map_update_elem(&map_queue, &queue, &_cq, BPF_ANY); +} + +/* eBPF program definitions, placed in various sections, which can + * have custom section names. If custom names are in use, it's + * required to point tc to the correct section, e.g. + * + * tc filter add [...] bpf obj cls.o sec cls-tos [...] + * + * in case the program resides in __section("cls-tos"). + * + * Default section for cls_bpf is: "classifier", for act_bpf is: + * "action". Naturally, if for example multiple actions are present + * in the same file, they need to have distinct section names. + * + * It is however not required to have multiple programs sharing + * a file. + */ +__section("classifier") int cls_main(struct __sk_buff *skb) +{ + struct flow_keys flow; + + if (!flow_dissector(skb, &flow)) + return 0; /* No match in cls_bpf. */ + + cls_update_proto_map(skb, &flow); + cls_update_queue_map(skb); + + return flow.ip_proto; +} + +static inline void act_update_drop_map(void) +{ + uint32_t *count, cpu = get_smp_processor_id(); + + count = bpf_map_lookup_elem(&map_drops, &cpu); + if (count) + /* Only this cpu is accessing this element. */ + (*count)++; +} + +__section("action-mark") int act_mark_main(struct __sk_buff *skb) +{ + /* You could also mangle skb data here with the helper function + * BPF_FUNC_skb_store_bytes, etc. Or, alternatively you could + * do that already in the classifier itself as a merged combination + * of classifier'n'action model. + */ + + if (skb->mark == 0xcafe) { + act_update_drop_map(); + return TC_ACT_SHOT; + } + + /* Default configured tc opcode. */ + return TC_ACT_UNSPEC; +} + +__section("action-rand") int act_rand_main(struct __sk_buff *skb) +{ + /* Sorry, we're near event horizon ... */ + if ((get_prandom_u32() & 3) == 0) { + act_update_drop_map(); + return TC_ACT_SHOT; + } + + return TC_ACT_UNSPEC; +} + +/* Last but not least, the file contains a license. Some future helper + * functions may only be available with a GPL license. + */ +char __license[] __section("license") = "GPL"; diff --git a/examples/bpf/bpf_shared.h b/examples/bpf/bpf_shared.h new file mode 100644 index 00000000..46423eca --- /dev/null +++ b/examples/bpf/bpf_shared.h @@ -0,0 +1,26 @@ +#ifndef __BPF_SHARED__ +#define __BPF_SHARED__ + +#include + +#include "../../include/bpf_elf.h" + +enum { + BPF_MAP_ID_PROTO, + BPF_MAP_ID_QUEUE, + BPF_MAP_ID_DROPS, + __BPF_MAP_ID_MAX, +#define BPF_MAP_ID_MAX __BPF_MAP_ID_MAX +}; + +struct count_tuple { + long packets; /* type long for __sync_fetch_and_add() */ + long bytes; +}; + +struct count_queue { + long total; + long mismatch; +}; + +#endif /* __BPF_SHARED__ */ diff --git a/examples/bpf/bpf_sys.h b/examples/bpf/bpf_sys.h new file mode 100644 index 00000000..6e4f09e2 --- /dev/null +++ b/examples/bpf/bpf_sys.h @@ -0,0 +1,23 @@ +#ifndef __BPF_SYS__ +#define __BPF_SYS__ + +#include +#include + +static inline __u64 bpf_ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +static inline int bpf_lookup_elem(int fd, void *key, void *value) +{ + union bpf_attr attr = { + .map_fd = fd, + .key = bpf_ptr_to_u64(key), + .value = bpf_ptr_to_u64(value), + }; + + return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + +#endif /* __BPF_SYS__ */ diff --git a/examples/cbq.init-v0.7.3 b/examples/cbq.init-v0.7.3 index 35a0a05e..1bc0d446 100644 --- a/examples/cbq.init-v0.7.3 +++ b/examples/cbq.init-v0.7.3 @@ -578,14 +578,14 @@ cbq_show () { ### Check configuration and load DEVICES, DEVFIELDS and CLASSLIST from $1 cbq_init () { ### Get a list of configured classes - CLASSLIST=`find $1 \( -type f -or -type l \) -name 'cbq-*' \ - -not -name '*~' -maxdepth 1 -printf "%f\n"| sort` + CLASSLIST=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \ + -not -name '*~' -printf "%f\n"| sort` [ -z "$CLASSLIST" ] && cbq_failure "no configuration files found in $1!" ### Gather all DEVICE fields from $1/cbq-* - DEVFIELDS=`find $1 \( -type f -or -type l \) -name 'cbq-*' \ - -not -name '*~' -maxdepth 1| xargs sed -n 's/#.*//; \ + DEVFIELDS=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \ + -not -name '*~' | xargs sed -n 's/#.*//; \ s/[[:space:]]//g; /^DEVICE=[^,]*,[^,]*\(,[^,]*\)\?/ \ { s/.*=//; p; }'| sort -u` [ -z "$DEVFIELDS" ] && diff --git a/genl/ctrl.c b/genl/ctrl.c index 35461290..b7a8878c 100644 --- a/genl/ctrl.c +++ b/genl/ctrl.c @@ -67,7 +67,7 @@ int genl_ctrl_resolve_family(const char *family) addattr_l(nlh, 128, CTRL_ATTR_FAMILY_NAME, family, strlen(family) + 1); - if (rtnl_talk(&rth, nlh, 0, 0, nlh) < 0) { + if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) { fprintf(stderr, "Error talking to the kernel\n"); goto errout; } @@ -177,8 +177,9 @@ static int print_ctrl_grp(FILE *fp, struct rtattr *arg, __u32 ctrl_ver) /* * The controller sends one nlmsg per family */ -static int print_ctrl(const struct sockaddr_nl *who, struct nlmsghdr *n, - void *arg) +static int print_ctrl(const struct sockaddr_nl *who, + struct rtnl_ctrl_data *ctrl, + struct nlmsghdr *n, void *arg) { struct rtattr *tb[CTRL_ATTR_MAX + 1]; struct genlmsghdr *ghdr = NLMSG_DATA(n); @@ -281,6 +282,12 @@ static int print_ctrl(const struct sockaddr_nl *who, struct nlmsghdr *n, return 0; } +static int print_ctrl2(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) +{ + return print_ctrl(who, NULL, n, arg); +} + static int ctrl_list(int cmd, int argc, char **argv) { struct rtnl_handle rth; @@ -334,12 +341,12 @@ static int ctrl_list(int cmd, int argc, char **argv) goto ctrl_done; } - if (rtnl_talk(&rth, nlh, 0, 0, nlh) < 0) { + if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) { fprintf(stderr, "Error talking to the kernel\n"); goto ctrl_done; } - if (print_ctrl(NULL, nlh, (void *) stdout) < 0) { + if (print_ctrl2(NULL, nlh, (void *) stdout) < 0) { fprintf(stderr, "Dump terminated\n"); goto ctrl_done; } @@ -355,7 +362,7 @@ static int ctrl_list(int cmd, int argc, char **argv) goto ctrl_done; } - rtnl_dump_filter(&rth, print_ctrl, stdout); + rtnl_dump_filter(&rth, print_ctrl2, stdout); } @@ -408,5 +415,5 @@ static int parse_ctrl(struct genl_util *a, int argc, char **argv) struct genl_util ctrl_genl_util = { .name = "ctrl", .parse_genlopt = parse_ctrl, - .print_genlopt = print_ctrl, + .print_genlopt = print_ctrl2, }; diff --git a/include/SNAPSHOT.h b/include/SNAPSHOT.h index 35bda66a..8bd0c561 100644 --- a/include/SNAPSHOT.h +++ b/include/SNAPSHOT.h @@ -1 +1 @@ -static const char SNAPSHOT[] = "150210"; +static const char SNAPSHOT[] = "150413"; diff --git a/include/bpf_elf.h b/include/bpf_elf.h new file mode 100644 index 00000000..4bd6bb00 --- /dev/null +++ b/include/bpf_elf.h @@ -0,0 +1,33 @@ +#ifndef __BPF_ELF__ +#define __BPF_ELF__ + +#include + +/* Note: + * + * Below ELF section names and bpf_elf_map structure definition + * are not (!) kernel ABI. It's rather a "contract" between the + * application and the BPF loader in tc. For compatibility, the + * section names should stay as-is. Introduction of aliases, if + * needed, are a possibility, though. + */ + +/* ELF section names, etc */ +#define ELF_SECTION_LICENSE "license" +#define ELF_SECTION_MAPS "maps" +#define ELF_SECTION_CLASSIFIER "classifier" +#define ELF_SECTION_ACTION "action" + +#define ELF_MAX_MAPS 64 +#define ELF_MAX_LICENSE_LEN 128 + +/* ELF map definition */ +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 id; +}; + +#endif /* __BPF_ELF__ */ diff --git a/include/bpf_scm.h b/include/bpf_scm.h new file mode 100644 index 00000000..35117d11 --- /dev/null +++ b/include/bpf_scm.h @@ -0,0 +1,75 @@ +#ifndef __BPF_SCM__ +#define __BPF_SCM__ + +#include +#include + +#include "utils.h" +#include "bpf_elf.h" + +#define BPF_SCM_AUX_VER 1 +#define BPF_SCM_MAX_FDS ELF_MAX_MAPS +#define BPF_SCM_MSG_SIZE 1024 + +struct bpf_elf_st { + dev_t st_dev; + ino_t st_ino; +}; + +struct bpf_map_aux { + unsigned short uds_ver; + unsigned short num_ent; + char obj_name[64]; + struct bpf_elf_st obj_st; + struct bpf_elf_map ent[BPF_SCM_MAX_FDS]; +}; + +struct bpf_map_set_msg { + struct msghdr hdr; + struct iovec iov; + char msg_buf[BPF_SCM_MSG_SIZE]; + struct bpf_map_aux aux; +}; + +static inline int *bpf_map_set_init(struct bpf_map_set_msg *msg, + struct sockaddr_un *addr, + unsigned int addr_len) +{ + const unsigned int cmsg_ctl_len = sizeof(int) * BPF_SCM_MAX_FDS; + struct cmsghdr *cmsg; + + msg->iov.iov_base = &msg->aux; + msg->iov.iov_len = sizeof(msg->aux); + + msg->hdr.msg_iov = &msg->iov; + msg->hdr.msg_iovlen = 1; + + msg->hdr.msg_name = (struct sockaddr *)addr; + msg->hdr.msg_namelen = addr_len; + + BUILD_BUG_ON(sizeof(msg->msg_buf) < cmsg_ctl_len); + msg->hdr.msg_control = &msg->msg_buf; + msg->hdr.msg_controllen = cmsg_ctl_len; + + cmsg = CMSG_FIRSTHDR(&msg->hdr); + cmsg->cmsg_len = msg->hdr.msg_controllen; + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + + return (int *)CMSG_DATA(cmsg); +} + +static inline void bpf_map_set_init_single(struct bpf_map_set_msg *msg, + int num) +{ + struct cmsghdr *cmsg; + + msg->hdr.msg_controllen = CMSG_LEN(sizeof(int) * num); + msg->iov.iov_len = offsetof(struct bpf_map_aux, ent) + + sizeof(struct bpf_elf_map) * num; + + cmsg = CMSG_FIRSTHDR(&msg->hdr); + cmsg->cmsg_len = msg->hdr.msg_controllen; +} + +#endif /* __BPF_SCM__ */ diff --git a/include/color.h b/include/color.h new file mode 100644 index 00000000..b85003ae --- /dev/null +++ b/include/color.h @@ -0,0 +1,16 @@ +#ifndef __COLOR_H__ +#define __COLOR_H__ 1 + +enum color_attr { + COLOR_IFNAME, + COLOR_MAC, + COLOR_INET, + COLOR_INET6, + COLOR_OPERSTATE_UP, + COLOR_OPERSTATE_DOWN +}; + +void enable_color(void); +int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...); + +#endif diff --git a/include/libnetlink.h b/include/libnetlink.h index d081e542..968034ba 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -20,6 +20,8 @@ struct rtnl_handle __u32 dump; int proto; FILE *dump_fp; +#define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01 + int flags; }; extern int rcvbuf; @@ -41,9 +43,17 @@ extern int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len) __attribute__((warn_unused_result)); +struct rtnl_ctrl_data { + int nsid; +}; + typedef int (*rtnl_filter_t)(const struct sockaddr_nl *, struct nlmsghdr *n, void *); +typedef int (*rtnl_listen_filter_t)(const struct sockaddr_nl *, + struct rtnl_ctrl_data *, + struct nlmsghdr *n, void *); + struct rtnl_dump_filter_arg { rtnl_filter_t filter; @@ -54,8 +64,8 @@ extern int rtnl_dump_filter_l(struct rtnl_handle *rth, const struct rtnl_dump_filter_arg *arg); extern int rtnl_dump_filter(struct rtnl_handle *rth, rtnl_filter_t filter, void *arg); -extern int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, - unsigned groups, struct nlmsghdr *answer) +extern int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr *answer, size_t len) __attribute__((warn_unused_result)); extern int rtnl_send(struct rtnl_handle *rth, const void *buf, int) __attribute__((warn_unused_result)); @@ -118,9 +128,10 @@ static inline const char *rta_getattr_str(const struct rtattr *rta) return (const char *)RTA_DATA(rta); } -extern int rtnl_listen(struct rtnl_handle *, rtnl_filter_t handler, +extern int rtnl_listen_all_nsid(struct rtnl_handle *); +extern int rtnl_listen(struct rtnl_handle *, rtnl_listen_filter_t handler, void *jarg); -extern int rtnl_from_file(FILE *, rtnl_filter_t handler, +extern int rtnl_from_file(FILE *, rtnl_listen_filter_t handler, void *jarg); #define NLMSG_TAIL(nmsg) \ @@ -158,6 +169,14 @@ extern int rtnl_from_file(FILE *, rtnl_filter_t handler, #define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) #endif +#ifndef NETNS_RTA +#define NETNS_RTA(r) \ + ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg)))) +#endif +#ifndef NETNS_PAYLOAD +#define NETNS_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtgenmsg)) +#endif + /* User defined nlmsg_type which is used mostly for logging netlink * messages from dump file */ #define NLMSG_TSTAMP 15 diff --git a/include/linux/bpf.h b/include/linux/bpf.h new file mode 100644 index 00000000..a67e7fa6 --- /dev/null +++ b/include/linux/bpf.h @@ -0,0 +1,241 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef __LINUX_BPF_H__ +#define __LINUX_BPF_H__ + +#include +#include + +/* Extended instruction set based on top of classic BPF */ + +/* instruction classes */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG __MAX_BPF_REG + +struct bpf_insn { + __u8 code; /* opcode */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +/* BPF syscall commands */ +enum bpf_cmd { + /* create a map with given type and attributes + * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size) + * returns fd or negative error + * map is deleted when fd is closed + */ + BPF_MAP_CREATE, + + /* lookup key in a given map + * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key, attr->value + * returns zero and stores found elem into value + * or negative error + */ + BPF_MAP_LOOKUP_ELEM, + + /* create or update key/value pair in a given map + * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key, attr->value, attr->flags + * returns zero or negative error + */ + BPF_MAP_UPDATE_ELEM, + + /* find and delete elem by key in a given map + * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key + * returns zero or negative error + */ + BPF_MAP_DELETE_ELEM, + + /* lookup key in a given map and return next key + * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key, attr->next_key + * returns zero and stores next key or negative error + */ + BPF_MAP_GET_NEXT_KEY, + + /* verify and load eBPF program + * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size) + * Using attr->prog_type, attr->insns, attr->license + * returns fd or negative error + */ + BPF_PROG_LOAD, +}; + +enum bpf_map_type { + BPF_MAP_TYPE_UNSPEC, + BPF_MAP_TYPE_HASH, + BPF_MAP_TYPE_ARRAY, + BPF_MAP_TYPE_PROG_ARRAY, +}; + +enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, + BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, +}; + +#define BPF_PSEUDO_MAP_FD 1 + +/* flags for BPF_MAP_UPDATE_ELEM command */ +#define BPF_ANY 0 /* create new element or update existing */ +#define BPF_NOEXIST 1 /* create new element if it didn't exist */ +#define BPF_EXIST 2 /* update existing element */ + +union bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + __u32 map_fd; + __aligned_u64 key; + union { + __aligned_u64 value; + __aligned_u64 next_key; + }; + __u64 flags; + }; + + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* checked when prog_type=kprobe */ + }; +} __attribute__((aligned(8))); + +/* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call + */ +enum bpf_func_id { + BPF_FUNC_unspec, + BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ + BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ + BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ + BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ + BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ + BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ + BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ + BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ + + /** + * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet + * @skb: pointer to skb + * @offset: offset within packet from skb->mac_header + * @from: pointer where to copy bytes from + * @len: number of bytes to store into packet + * @flags: bit 0 - if true, recompute skb->csum + * other bits - reserved + * Return: 0 on success + */ + BPF_FUNC_skb_store_bytes, + + /** + * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum + * @skb: pointer to skb + * @offset: offset within packet where IP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * other bits - reserved + * Return: 0 on success + */ + BPF_FUNC_l3_csum_replace, + + /** + * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum + * @skb: pointer to skb + * @offset: offset within packet where TCP/UDP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * bit 4 - is pseudo header + * other bits - reserved + * Return: 0 on success + */ + BPF_FUNC_l4_csum_replace, + + /** + * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success + */ + BPF_FUNC_tail_call, + __BPF_FUNC_MAX_ID, +}; + +/* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +struct __sk_buff { + __u32 len; + __u32 pkt_type; + __u32 mark; + __u32 queue_mapping; + __u32 protocol; + __u32 vlan_present; + __u32 vlan_tci; + __u32 vlan_proto; + __u32 priority; +}; + +#endif /* __LINUX_BPF_H__ */ diff --git a/include/linux/can.h b/include/linux/can.h index d9ba97f3..4af39b08 100644 --- a/include/linux/can.h +++ b/include/linux/can.h @@ -95,11 +95,17 @@ typedef __u32 can_err_mask_t; * @can_dlc: frame payload length in byte (0 .. 8) aka data length code * N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1 * mapping of the 'data length code' to the real payload length + * @__pad: padding + * @__res0: reserved / padding + * @__res1: reserved / padding * @data: CAN frame payload (up to 8 byte) */ struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ __u8 can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */ + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 __res1; /* reserved / padding */ __u8 data[CAN_MAX_DLEN] __attribute__((aligned(8))); }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 8688a985..e4f2f74c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -77,9 +77,13 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_VLAN_TAG_PRESENT 48 #define SKF_AD_PAY_OFFSET 52 #define SKF_AD_RANDOM 56 -#define SKF_AD_MAX 60 -#define SKF_NET_OFF (-0x100000) -#define SKF_LL_OFF (-0x200000) +#define SKF_AD_VLAN_TPID 60 +#define SKF_AD_MAX 64 +#define SKF_NET_OFF (-0x100000) +#define SKF_LL_OFF (-0x200000) + +#define BPF_NET_OFF SKF_NET_OFF +#define BPF_LL_OFF SKF_LL_OFF #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/fou.h b/include/linux/fou.h index 8e638059..744c3238 100644 --- a/include/linux/fou.h +++ b/include/linux/fou.h @@ -14,6 +14,7 @@ enum { FOU_ATTR_AF, /* u8 */ FOU_ATTR_IPPROTO, /* u8 */ FOU_ATTR_TYPE, /* u8 */ + FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */ __FOU_ATTR_MAX, }; @@ -24,6 +25,7 @@ enum { FOU_CMD_UNSPEC, FOU_CMD_ADD, FOU_CMD_DEL, + FOU_CMD_GET, __FOU_CMD_MAX, }; diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index cc375e42..26f0ecff 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -50,6 +50,8 @@ enum { #define IFA_F_PERMANENT 0x80 #define IFA_F_MANAGETEMPADDR 0x100 #define IFA_F_NOPREFIXROUTE 0x200 +#define IFA_F_MCAUTOJOIN 0x400 +#define IFA_F_STABLE_PRIVACY 0x800 struct ifa_cacheinfo { __u32 ifa_prefered; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index ac64724c..7720ad34 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -147,6 +147,7 @@ enum { IFLA_CARRIER_CHANGES, IFLA_PHYS_SWITCH_ID, IFLA_LINK_NETNSID, + IFLA_PHYS_PORT_NAME, __IFLA_MAX }; @@ -213,6 +214,7 @@ enum { enum in6_addr_gen_mode { IN6_ADDR_GEN_MODE_EUI64, IN6_ADDR_GEN_MODE_NONE, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY, }; /* Bridge section */ @@ -222,6 +224,9 @@ enum { IFLA_BR_FORWARD_DELAY, IFLA_BR_HELLO_TIME, IFLA_BR_MAX_AGE, + IFLA_BR_AGEING_TIME, + IFLA_BR_STP_STATE, + IFLA_BR_PRIORITY, __IFLA_BR_MAX, }; @@ -245,6 +250,7 @@ enum { IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ IFLA_BRPORT_PROXYARP, /* proxy ARP */ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ + IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -372,6 +378,7 @@ enum { IFLA_VXLAN_REMCSUM_TX, IFLA_VXLAN_REMCSUM_RX, IFLA_VXLAN_GBP, + IFLA_VXLAN_REMCSUM_NOPARTIAL, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -381,6 +388,15 @@ struct ifla_vxlan_port_range { __be16 high; }; +/* GENEVE section */ +enum { + IFLA_GENEVE_UNSPEC, + IFLA_GENEVE_ID, + IFLA_GENEVE_REMOTE, + __IFLA_GENEVE_MAX +}; +#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) + /* Bonding section */ enum { @@ -408,6 +424,9 @@ enum { IFLA_BOND_AD_LACP_RATE, IFLA_BOND_AD_SELECT, IFLA_BOND_AD_INFO, + IFLA_BOND_AD_ACTOR_SYS_PRIO, + IFLA_BOND_AD_USER_PORT_KEY, + IFLA_BOND_AD_ACTOR_SYSTEM, __IFLA_BOND_MAX, }; @@ -456,6 +475,9 @@ enum { IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ + IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query + * on/off switch + */ __IFLA_VF_MAX, }; @@ -500,6 +522,11 @@ struct ifla_vf_link_state { __u32 link_state; }; +struct ifla_vf_rss_query_en { + __u32 vf; + __u32 setting; +}; + /* VF ports management section * * Nested layout of set/get msg is: diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 7438dad7..0fb76bb9 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -143,4 +143,8 @@ struct tcp_dctcp_info { __u32 dctcp_ab_tot; }; +union tcp_cc_info { + struct tcpvegas_info vegas; + struct tcp_dctcp_info dctcp; +}; #endif /* _INET_DIAG_H_ */ diff --git a/include/linux/mpls.h b/include/linux/mpls.h new file mode 100644 index 00000000..13c55eb8 --- /dev/null +++ b/include/linux/mpls.h @@ -0,0 +1,44 @@ +#ifndef _MPLS_H +#define _MPLS_H + +#include +#include + +/* Reference: RFC 5462, RFC 3032 + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Label | TC |S| TTL | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Label: Label Value, 20 bits + * TC: Traffic Class field, 3 bits + * S: Bottom of Stack, 1 bit + * TTL: Time to Live, 8 bits + */ + +struct mpls_label { + __be32 entry; +}; + +#define MPLS_LS_LABEL_MASK 0xFFFFF000 +#define MPLS_LS_LABEL_SHIFT 12 +#define MPLS_LS_TC_MASK 0x00000E00 +#define MPLS_LS_TC_SHIFT 9 +#define MPLS_LS_S_MASK 0x00000100 +#define MPLS_LS_S_SHIFT 8 +#define MPLS_LS_TTL_MASK 0x000000FF +#define MPLS_LS_TTL_SHIFT 0 + +/* Reserved labels */ +#define MPLS_LABEL_IPV4NULL 0 /* RFC3032 */ +#define MPLS_LABEL_RTALERT 1 /* RFC3032 */ +#define MPLS_LABEL_IPV6NULL 2 /* RFC3032 */ +#define MPLS_LABEL_IMPLNULL 3 /* RFC3032 */ +#define MPLS_LABEL_ENTROPY 7 /* RFC6790 */ +#define MPLS_LABEL_GAL 13 /* RFC5586 */ +#define MPLS_LABEL_OAMALERT 14 /* RFC3429 */ +#define MPLS_LABEL_EXTENSION 15 /* RFC7274 */ + +#endif /* _MPLS_H */ diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index 3873a355..2e35c61b 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -126,6 +126,7 @@ enum { NDTPA_PROXY_QLEN, /* u32 */ NDTPA_LOCKTIME, /* u64, msecs */ NDTPA_QUEUE_LENBYTES, /* u32 */ + NDTPA_MCAST_REPROBES, /* u32 */ __NDTPA_MAX }; #define NDTPA_MAX (__NDTPA_MAX - 1) diff --git a/include/linux/net_namespace.h b/include/linux/net_namespace.h new file mode 100644 index 00000000..9a92b7e1 --- /dev/null +++ b/include/linux/net_namespace.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2015 6WIND S.A. + * Author: Nicolas Dichtel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ +#ifndef _LINUX_NET_NAMESPACE_H_ +#define _LINUX_NET_NAMESPACE_H_ + +/* Attributes of RTM_NEWNSID/RTM_GETNSID messages */ +enum { + NETNSA_NONE, +#define NETNSA_NSID_NOT_ASSIGNED -1 + NETNSA_NSID, + NETNSA_PID, + NETNSA_FD, + __NETNSA_MAX, +}; + +#define NETNSA_MAX (__NETNSA_MAX - 1) + +#endif /* _LINUX_NET_NAMESPACE_H_ */ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index be0bc182..37931eea 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -49,11 +49,17 @@ enum nf_inet_hooks { NF_INET_NUMHOOKS }; +enum nf_dev_hooks { + NF_NETDEV_INGRESS, + NF_NETDEV_NUMHOOKS +}; + enum { NFPROTO_UNSPEC = 0, NFPROTO_INET = 1, NFPROTO_IPV4 = 2, NFPROTO_ARP = 3, + NFPROTO_NETDEV = 5, NFPROTO_BRIDGE = 7, NFPROTO_IPV6 = 10, NFPROTO_DECNET = 12, diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e0a09df1..0c89ddd7 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -108,6 +108,7 @@ struct nlmsgerr { #define NETLINK_NO_ENOBUFS 5 #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#define NETLINK_LISTEN_ALL_NSID 8 struct nl_pktinfo { __u32 group; diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 25731dfb..25af89fa 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -4,75 +4,6 @@ #include #include -/* I think i could have done better macros ; for now this is stolen from - * some arch/mips code - jhs -*/ -#define _TC_MAKE32(x) ((x)) - -#define _TC_MAKEMASK1(n) (_TC_MAKE32(1) << _TC_MAKE32(n)) -#define _TC_MAKEMASK(v,n) (_TC_MAKE32((_TC_MAKE32(1)<<(v))-1) << _TC_MAKE32(n)) -#define _TC_MAKEVALUE(v,n) (_TC_MAKE32(v) << _TC_MAKE32(n)) -#define _TC_GETVALUE(v,n,m) ((_TC_MAKE32(v) & _TC_MAKE32(m)) >> _TC_MAKE32(n)) - -/* verdict bit breakdown - * -bit 0: when set -> this packet has been munged already - -bit 1: when set -> It is ok to munge this packet - -bit 2,3,4,5: Reclassify counter - sort of reverse TTL - if exceeded -assume loop - -bit 6,7: Where this packet was last seen -0: Above the transmit example at the socket level -1: on the Ingress -2: on the Egress - -bit 8: when set --> Request not to classify on ingress. - -bits 9,10,11: redirect counter - redirect TTL. Loop avoidance - - * - * */ - -#define TC_MUNGED _TC_MAKEMASK1(0) -#define SET_TC_MUNGED(v) ( TC_MUNGED | (v & ~TC_MUNGED)) -#define CLR_TC_MUNGED(v) ( v & ~TC_MUNGED) - -#define TC_OK2MUNGE _TC_MAKEMASK1(1) -#define SET_TC_OK2MUNGE(v) ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE)) -#define CLR_TC_OK2MUNGE(v) ( v & ~TC_OK2MUNGE) - -#define S_TC_VERD _TC_MAKE32(2) -#define M_TC_VERD _TC_MAKEMASK(4,S_TC_VERD) -#define G_TC_VERD(x) _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD) -#define V_TC_VERD(x) _TC_MAKEVALUE(x,S_TC_VERD) -#define SET_TC_VERD(v,n) ((V_TC_VERD(n)) | (v & ~M_TC_VERD)) - -#define S_TC_FROM _TC_MAKE32(6) -#define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM) -#define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM) -#define V_TC_FROM(x) _TC_MAKEVALUE(x,S_TC_FROM) -#define SET_TC_FROM(v,n) ((V_TC_FROM(n)) | (v & ~M_TC_FROM)) -#define AT_STACK 0x0 -#define AT_INGRESS 0x1 -#define AT_EGRESS 0x2 - -#define TC_NCLS _TC_MAKEMASK1(8) -#define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS)) -#define CLR_TC_NCLS(v) ( v & ~TC_NCLS) - -#define S_TC_RTTL _TC_MAKE32(9) -#define M_TC_RTTL _TC_MAKEMASK(3,S_TC_RTTL) -#define G_TC_RTTL(x) _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL) -#define V_TC_RTTL(x) _TC_MAKEVALUE(x,S_TC_RTTL) -#define SET_TC_RTTL(v,n) ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL)) - -#define S_TC_AT _TC_MAKE32(12) -#define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) -#define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT) -#define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT) -#define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT)) /* Action attributes */ enum { @@ -93,8 +24,6 @@ enum { #define TCA_ACT_NOUNBIND 0 #define TCA_ACT_REPLACE 1 #define TCA_ACT_NOREPLACE 0 -#define MAX_REC_LOOP 4 -#define MAX_RED_LOOP 4 #define TC_ACT_UNSPEC (-1) #define TC_ACT_OK 0 @@ -397,11 +326,43 @@ enum { TCA_BPF_CLASSID, TCA_BPF_OPS_LEN, TCA_BPF_OPS, + TCA_BPF_FD, + TCA_BPF_NAME, __TCA_BPF_MAX, }; #define TCA_BPF_MAX (__TCA_BPF_MAX - 1) +/* Flower classifier */ + +enum { + TCA_FLOWER_UNSPEC, + TCA_FLOWER_CLASSID, + TCA_FLOWER_INDEV, + TCA_FLOWER_ACT, + TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ + TCA_FLOWER_KEY_IP_PROTO, /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC, /* be16 */ + TCA_FLOWER_KEY_TCP_DST, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC, /* be16 */ + TCA_FLOWER_KEY_UDP_DST, /* be16 */ + __TCA_FLOWER_MAX, +}; + +#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr { diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d62316ba..8d2530da 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -268,7 +268,8 @@ enum { TCA_GRED_STAB, TCA_GRED_DPS, TCA_GRED_MAX_P, - __TCA_GRED_MAX, + TCA_GRED_LIMIT, + __TCA_GRED_MAX, }; #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) @@ -679,6 +680,7 @@ enum { TCA_CODEL_LIMIT, TCA_CODEL_INTERVAL, TCA_CODEL_ECN, + TCA_CODEL_CE_THRESHOLD, __TCA_CODEL_MAX }; @@ -695,6 +697,7 @@ struct tc_codel_xstats { __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ __u32 dropping; /* are we in dropping state ? */ + __u32 ce_mark; /* number of CE marked packets because of ce_threshold */ }; /* FQ_CODEL */ @@ -707,6 +710,7 @@ enum { TCA_FQ_CODEL_ECN, TCA_FQ_CODEL_FLOWS, TCA_FQ_CODEL_QUANTUM, + TCA_FQ_CODEL_CE_THRESHOLD, __TCA_FQ_CODEL_MAX }; @@ -730,6 +734,7 @@ struct tc_fq_codel_qd_stats { */ __u32 new_flows_len; /* count of flows in new list */ __u32 old_flows_len; /* count of flows in old list */ + __u32 ce_mark; /* packets above ce_threshold */ }; struct tc_fq_codel_cl_stats { @@ -774,6 +779,8 @@ enum { TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + __TCA_FQ_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 3eb78105..502e507e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -134,6 +134,8 @@ enum { RTM_NEWNSID = 88, #define RTM_NEWNSID RTM_NEWNSID + RTM_DELNSID = 89, +#define RTM_DELNSID RTM_DELNSID RTM_GETNSID = 90, #define RTM_GETNSID RTM_GETNSID @@ -303,6 +305,9 @@ enum rtattr_type_t { RTA_TABLE, RTA_MARK, RTA_MFC_STATS, + RTA_VIA, + RTA_NEWDST, + RTA_PREF, __RTA_MAX }; @@ -332,6 +337,7 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_OFFLOAD 8 /* offloaded route */ /* Macros to handle hexthops */ @@ -344,6 +350,12 @@ struct rtnexthop { #define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) #define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) +/* RTA_VIA */ +struct rtvia { + __kernel_sa_family_t rtvia_family; + __u8 rtvia_addr[0]; +}; + /* RTM_CACHEINFO */ struct rta_cacheinfo { @@ -621,6 +633,10 @@ enum rtnetlink_groups { #define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF RTNLGRP_MDB, #define RTNLGRP_MDB RTNLGRP_MDB + RTNLGRP_MPLS_ROUTE, +#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE + RTNLGRP_NSID, +#define RTNLGRP_NSID RTNLGRP_NSID __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/include/linux/tc_act/tc_bpf.h b/include/linux/tc_act/tc_bpf.h index 5288bd77..07f17cc7 100644 --- a/include/linux/tc_act/tc_bpf.h +++ b/include/linux/tc_act/tc_bpf.h @@ -24,6 +24,8 @@ enum { TCA_ACT_BPF_PARMS, TCA_ACT_BPF_OPS_LEN, TCA_ACT_BPF_OPS, + TCA_ACT_BPF_FD, + TCA_ACT_BPF_NAME, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) diff --git a/include/linux/tc_act/tc_connmark.h b/include/linux/tc_act/tc_connmark.h new file mode 100644 index 00000000..994b0971 --- /dev/null +++ b/include/linux/tc_act/tc_connmark.h @@ -0,0 +1,22 @@ +#ifndef __UAPI_TC_CONNMARK_H +#define __UAPI_TC_CONNMARK_H + +#include +#include + +#define TCA_ACT_CONNMARK 14 + +struct tc_connmark { + tc_gen; + __u16 zone; +}; + +enum { + TCA_CONNMARK_UNSPEC, + TCA_CONNMARK_PARMS, + TCA_CONNMARK_TM, + __TCA_CONNMARK_MAX +}; +#define TCA_CONNMARK_MAX (__TCA_CONNMARK_MAX - 1) + +#endif diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f96e0158..1e9b4a62 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -112,6 +112,9 @@ enum { #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ #define TCP_TIMESTAMP 24 #define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ +#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ +#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ +#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ struct tcp_repair_opt { __u32 opt_code; @@ -189,6 +192,10 @@ struct tcp_info { __u64 tcpi_pacing_rate; __u64 tcpi_max_pacing_rate; + __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */ + __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */ }; /* for TCP_MD5SIG socket option */ diff --git a/include/linux/tipc_netlink.h b/include/linux/tipc_netlink.h new file mode 100644 index 00000000..d4c8f142 --- /dev/null +++ b/include/linux/tipc_netlink.h @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_TIPC_NETLINK_H_ +#define _LINUX_TIPC_NETLINK_H_ + +#define TIPC_GENL_V2_NAME "TIPCv2" +#define TIPC_GENL_V2_VERSION 0x1 + +/* Netlink commands */ +enum { + TIPC_NL_UNSPEC, + TIPC_NL_LEGACY, + TIPC_NL_BEARER_DISABLE, + TIPC_NL_BEARER_ENABLE, + TIPC_NL_BEARER_GET, + TIPC_NL_BEARER_SET, + TIPC_NL_SOCK_GET, + TIPC_NL_PUBL_GET, + TIPC_NL_LINK_GET, + TIPC_NL_LINK_SET, + TIPC_NL_LINK_RESET_STATS, + TIPC_NL_MEDIA_GET, + TIPC_NL_MEDIA_SET, + TIPC_NL_NODE_GET, + TIPC_NL_NET_GET, + TIPC_NL_NET_SET, + TIPC_NL_NAME_TABLE_GET, + + __TIPC_NL_CMD_MAX, + TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 +}; + +/* Top level netlink attributes */ +enum { + TIPC_NLA_UNSPEC, + TIPC_NLA_BEARER, /* nest */ + TIPC_NLA_SOCK, /* nest */ + TIPC_NLA_PUBL, /* nest */ + TIPC_NLA_LINK, /* nest */ + TIPC_NLA_MEDIA, /* nest */ + TIPC_NLA_NODE, /* nest */ + TIPC_NLA_NET, /* nest */ + TIPC_NLA_NAME_TABLE, /* nest */ + + __TIPC_NLA_MAX, + TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 +}; + +/* Bearer info */ +enum { + TIPC_NLA_BEARER_UNSPEC, + TIPC_NLA_BEARER_NAME, /* string */ + TIPC_NLA_BEARER_PROP, /* nest */ + TIPC_NLA_BEARER_DOMAIN, /* u32 */ + TIPC_NLA_BEARER_UDP_OPTS, /* nest */ + + __TIPC_NLA_BEARER_MAX, + TIPC_NLA_BEARER_MAX = __TIPC_NLA_BEARER_MAX - 1 +}; + +enum { + TIPC_NLA_UDP_UNSPEC, + TIPC_NLA_UDP_LOCAL, /* sockaddr_storage */ + TIPC_NLA_UDP_REMOTE, /* sockaddr_storage */ + + __TIPC_NLA_UDP_MAX, + TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1 +}; +/* Socket info */ +enum { + TIPC_NLA_SOCK_UNSPEC, + TIPC_NLA_SOCK_ADDR, /* u32 */ + TIPC_NLA_SOCK_REF, /* u32 */ + TIPC_NLA_SOCK_CON, /* nest */ + TIPC_NLA_SOCK_HAS_PUBL, /* flag */ + + __TIPC_NLA_SOCK_MAX, + TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1 +}; + +/* Link info */ +enum { + TIPC_NLA_LINK_UNSPEC, + TIPC_NLA_LINK_NAME, /* string */ + TIPC_NLA_LINK_DEST, /* u32 */ + TIPC_NLA_LINK_MTU, /* u32 */ + TIPC_NLA_LINK_BROADCAST, /* flag */ + TIPC_NLA_LINK_UP, /* flag */ + TIPC_NLA_LINK_ACTIVE, /* flag */ + TIPC_NLA_LINK_PROP, /* nest */ + TIPC_NLA_LINK_STATS, /* nest */ + TIPC_NLA_LINK_RX, /* u32 */ + TIPC_NLA_LINK_TX, /* u32 */ + + __TIPC_NLA_LINK_MAX, + TIPC_NLA_LINK_MAX = __TIPC_NLA_LINK_MAX - 1 +}; + +/* Media info */ +enum { + TIPC_NLA_MEDIA_UNSPEC, + TIPC_NLA_MEDIA_NAME, /* string */ + TIPC_NLA_MEDIA_PROP, /* nest */ + + __TIPC_NLA_MEDIA_MAX, + TIPC_NLA_MEDIA_MAX = __TIPC_NLA_MEDIA_MAX - 1 +}; + +/* Node info */ +enum { + TIPC_NLA_NODE_UNSPEC, + TIPC_NLA_NODE_ADDR, /* u32 */ + TIPC_NLA_NODE_UP, /* flag */ + + __TIPC_NLA_NODE_MAX, + TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1 +}; + +/* Net info */ +enum { + TIPC_NLA_NET_UNSPEC, + TIPC_NLA_NET_ID, /* u32 */ + TIPC_NLA_NET_ADDR, /* u32 */ + + __TIPC_NLA_NET_MAX, + TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1 +}; + +/* Name table info */ +enum { + TIPC_NLA_NAME_TABLE_UNSPEC, + TIPC_NLA_NAME_TABLE_PUBL, /* nest */ + + __TIPC_NLA_NAME_TABLE_MAX, + TIPC_NLA_NAME_TABLE_MAX = __TIPC_NLA_NAME_TABLE_MAX - 1 +}; + +/* Publication info */ +enum { + TIPC_NLA_PUBL_UNSPEC, + + TIPC_NLA_PUBL_TYPE, /* u32 */ + TIPC_NLA_PUBL_LOWER, /* u32 */ + TIPC_NLA_PUBL_UPPER, /* u32 */ + TIPC_NLA_PUBL_SCOPE, /* u32 */ + TIPC_NLA_PUBL_NODE, /* u32 */ + TIPC_NLA_PUBL_REF, /* u32 */ + TIPC_NLA_PUBL_KEY, /* u32 */ + + __TIPC_NLA_PUBL_MAX, + TIPC_NLA_PUBL_MAX = __TIPC_NLA_PUBL_MAX - 1 +}; + +/* Nest, connection info */ +enum { + TIPC_NLA_CON_UNSPEC, + + TIPC_NLA_CON_FLAG, /* flag */ + TIPC_NLA_CON_NODE, /* u32 */ + TIPC_NLA_CON_SOCK, /* u32 */ + TIPC_NLA_CON_TYPE, /* u32 */ + TIPC_NLA_CON_INST, /* u32 */ + + __TIPC_NLA_CON_MAX, + TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1 +}; + +/* Nest, link propreties. Valid for link, media and bearer */ +enum { + TIPC_NLA_PROP_UNSPEC, + + TIPC_NLA_PROP_PRIO, /* u32 */ + TIPC_NLA_PROP_TOL, /* u32 */ + TIPC_NLA_PROP_WIN, /* u32 */ + + __TIPC_NLA_PROP_MAX, + TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 +}; + +/* Nest, statistics info */ +enum { + TIPC_NLA_STATS_UNSPEC, + + TIPC_NLA_STATS_RX_INFO, /* u32 */ + TIPC_NLA_STATS_RX_FRAGMENTS, /* u32 */ + TIPC_NLA_STATS_RX_FRAGMENTED, /* u32 */ + TIPC_NLA_STATS_RX_BUNDLES, /* u32 */ + TIPC_NLA_STATS_RX_BUNDLED, /* u32 */ + TIPC_NLA_STATS_TX_INFO, /* u32 */ + TIPC_NLA_STATS_TX_FRAGMENTS, /* u32 */ + TIPC_NLA_STATS_TX_FRAGMENTED, /* u32 */ + TIPC_NLA_STATS_TX_BUNDLES, /* u32 */ + TIPC_NLA_STATS_TX_BUNDLED, /* u32 */ + TIPC_NLA_STATS_MSG_PROF_TOT, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_CNT, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_TOT, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P0, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P1, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P2, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P3, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P4, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P5, /* u32 */ + TIPC_NLA_STATS_MSG_LEN_P6, /* u32 */ + TIPC_NLA_STATS_RX_STATES, /* u32 */ + TIPC_NLA_STATS_RX_PROBES, /* u32 */ + TIPC_NLA_STATS_RX_NACKS, /* u32 */ + TIPC_NLA_STATS_RX_DEFERRED, /* u32 */ + TIPC_NLA_STATS_TX_STATES, /* u32 */ + TIPC_NLA_STATS_TX_PROBES, /* u32 */ + TIPC_NLA_STATS_TX_NACKS, /* u32 */ + TIPC_NLA_STATS_TX_ACKS, /* u32 */ + TIPC_NLA_STATS_RETRANSMITTED, /* u32 */ + TIPC_NLA_STATS_DUPLICATES, /* u32 */ + TIPC_NLA_STATS_LINK_CONGS, /* u32 */ + TIPC_NLA_STATS_MAX_QUEUE, /* u32 */ + TIPC_NLA_STATS_AVG_QUEUE, /* u32 */ + + __TIPC_NLA_STATS_MAX, + TIPC_NLA_STATS_MAX = __TIPC_NLA_STATS_MAX - 1 +}; + +#endif diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 3a1fd329..b8f54510 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -1,6 +1,7 @@ #ifndef _LINUX_XFRM_H #define _LINUX_XFRM_H +#include #include /* All of the structures in this file may not change size as they are @@ -13,6 +14,7 @@ typedef union { __be32 a4; __be32 a6[4]; + struct in6_addr in6; } xfrm_address_t; /* Ident of a specific xfrm_state. It is used on input to lookup diff --git a/include/ll_map.h b/include/ll_map.h index 4c78498e..b98a5714 100644 --- a/include/ll_map.h +++ b/include/ll_map.h @@ -10,5 +10,6 @@ extern const char *ll_index_to_name(unsigned idx); extern const char *ll_idx_n2a(unsigned idx, char *buf); extern int ll_index_to_type(unsigned idx); extern int ll_index_to_flags(unsigned idx); +extern unsigned namehash(const char *str); #endif /* __LL_MAP_H__ */ diff --git a/include/names.h b/include/names.h new file mode 100644 index 00000000..6fed5818 --- /dev/null +++ b/include/names.h @@ -0,0 +1,26 @@ +#ifndef DB_NAMES_H_ +#define DB_NAMES_H_ 1 + +#define IDNAME_MAX 256 + +struct db_entry { + struct db_entry *next; + unsigned int id; + char *name; +}; + +struct db_names { + unsigned int size; + struct db_entry *cached; + struct db_entry **hash; + int max; +}; + +struct db_names *db_names_alloc(void); +int db_names_load(struct db_names *db, const char *path); +void db_names_free(struct db_names *db); + +char *id_to_name(struct db_names *db, int id, char *name); +int name_to_id(struct db_names *db, int *id, const char *name); + +#endif diff --git a/include/namespace.h b/include/namespace.h index a2ac7dcc..5add9d26 100644 --- a/include/namespace.h +++ b/include/namespace.h @@ -3,6 +3,7 @@ #include #include +#include #include #include diff --git a/include/rt_names.h b/include/rt_names.h index c0ea4f98..921be060 100644 --- a/include/rt_names.h +++ b/include/rt_names.h @@ -22,7 +22,7 @@ int inet_proto_a2n(const char *buf); const char * ll_type_n2a(int type, char *buf, int len); -const char *ll_addr_n2a(unsigned char *addr, int alen, +const char *ll_addr_n2a(const unsigned char *addr, int alen, int type, char *buf, int blen); int ll_addr_a2n(char *lladdr, int len, const char *arg); diff --git a/include/utils.h b/include/utils.h index 3da22837..2277b745 100644 --- a/include/utils.h +++ b/include/utils.h @@ -50,10 +50,11 @@ extern void incomplete_command(void) __attribute__((noreturn)); typedef struct { - __u8 family; - __u8 bytelen; + __u16 flags; + __u16 bytelen; __s16 bitlen; - __u32 flags; + /* These next two fields match rtvia */ + __u16 family; __u32 data[8]; } inet_prefix; @@ -77,6 +78,13 @@ struct ipx_addr { u_int8_t ipx_node[IPX_NODE_LEN]; }; +#ifndef AF_MPLS +# define AF_MPLS 28 +#endif + +/* Maximum number of labels the mpls helpers support */ +#define MPLS_MAX_LABELS 8 + extern __u32 get_addr32(const char *name); extern int get_addr_1(inet_prefix *dst, const char *arg, int family); extern int get_prefix_1(inet_prefix *dst, char *arg, int family); @@ -101,11 +109,17 @@ extern int get_s8(__s8 *val, const char *arg, int base); extern char* hexstring_n2a(const __u8 *str, int len, char *buf, int blen); extern __u8* hexstring_a2n(const char *str, __u8 *buf, int blen); +extern int af_bit_len(int af); +extern int af_byte_len(int af); + extern const char *format_host(int af, int len, const void *addr, char *buf, int buflen); -extern const char *rt_addr_n2a(int af, const void *addr, +extern const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen); +extern int read_family(const char *name); +extern const char *family_name(int family); + void missarg(const char *) __attribute__((noreturn)); void invarg(const char *, const char *) __attribute__((noreturn)); void duparg(const char *, const char *) __attribute__((noreturn)); @@ -119,6 +133,9 @@ int dnet_pton(int af, const char *src, void *addr); const char *ipx_ntop(int af, const void *addr, char *str, size_t len); int ipx_pton(int af, const char *src, void *addr); +const char *mpls_ntop(int af, const void *addr, char *str, size_t len); +int mpls_pton(int af, const char *src, void *addr); + extern int __iproute2_hz_internal; extern int __get_hz(void); @@ -154,6 +171,25 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)])) + +#ifndef offsetof +# define offsetof(type, member) ((size_t) &((type *)0)->member) +#endif + +#ifndef min +# define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) +#endif + +#ifndef __check_format_string +# define __check_format_string(pos_str, pos_args) \ + __attribute__ ((format (printf, (pos_str), (pos_args)))) +#endif + extern int cmdlineno; extern ssize_t getcmdline(char **line, size_t *len, FILE *in); extern int makeargs(char *line, char *argv[], int maxargs); @@ -167,4 +203,6 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, extern int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, bool show_label); +char *int_to_str(int val, char *buf); + #endif /* __UTILS_H__ */ diff --git a/ip/Makefile b/ip/Makefile index 2c742f30..77653ecc 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -6,7 +6,8 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \ iplink_macvlan.o iplink_macvtap.o ipl2tp.o link_vti.o link_vti6.o \ iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \ link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \ - iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o + iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \ + iplink_geneve.o RTMONOBJ=rtmon.o diff --git a/ip/ip.c b/ip/ip.c index da16b15f..c23de740 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -23,6 +23,7 @@ #include "utils.h" #include "ip_common.h" #include "namespace.h" +#include "color.h" int preferred_family = AF_UNSPEC; int human_readable = 0; @@ -52,11 +53,11 @@ static void usage(void) " netns | l2tp | fou | tcp_metrics | token | netconf }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n" " -h[uman-readable] | -iec |\n" -" -f[amily] { inet | inet6 | ipx | dnet | bridge | link } |\n" +" -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n" " -4 | -6 | -I | -D | -B | -0 |\n" " -l[oops] { maximum-addr-flush-attempts } |\n" " -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n" -" -rc[vbuf] [size] | -n[etns] name | -a[ll] }\n"); +" -rc[vbuf] [size] | -n[etns] name | -a[ll] | -c[olor]}\n"); exit(-1); } @@ -190,21 +191,11 @@ int main(int argc, char **argv) argv++; if (argc <= 1) usage(); - if (strcmp(argv[1], "inet") == 0) - preferred_family = AF_INET; - else if (strcmp(argv[1], "inet6") == 0) - preferred_family = AF_INET6; - else if (strcmp(argv[1], "dnet") == 0) - preferred_family = AF_DECnet; - else if (strcmp(argv[1], "link") == 0) - preferred_family = AF_PACKET; - else if (strcmp(argv[1], "ipx") == 0) - preferred_family = AF_IPX; - else if (strcmp(argv[1], "bridge") == 0) - preferred_family = AF_BRIDGE; - else if (strcmp(argv[1], "help") == 0) + if (strcmp(argv[1], "help") == 0) usage(); else + preferred_family = read_family(argv[1]); + if (preferred_family == AF_UNSPEC) invarg("invalid protocol family", argv[1]); } else if (strcmp(opt, "-4") == 0) { preferred_family = AF_INET; @@ -216,6 +207,8 @@ int main(int argc, char **argv) preferred_family = AF_IPX; } else if (strcmp(opt, "-D") == 0) { preferred_family = AF_DECnet; + } else if (strcmp(opt, "-M") == 0) { + preferred_family = AF_MPLS; } else if (strcmp(opt, "-B") == 0) { preferred_family = AF_BRIDGE; } else if (matches(opt, "-human") == 0 || @@ -265,6 +258,8 @@ int main(int argc, char **argv) exit(-1); } rcvbuf = size; + } else if (matches(opt, "-color") == 0) { + enable_color(); } else if (matches(opt, "-help") == 0) { usage(); } else if (matches(opt, "-netns") == 0) { diff --git a/ip/ip_common.h b/ip/ip_common.h index 89a495ea..f120f5b9 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -33,7 +33,11 @@ extern int print_prefix(const struct sockaddr_nl *who, extern int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg); extern int print_netconf(const struct sockaddr_nl *who, + struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n, void *arg); +extern void netns_map_init(void); +extern int print_nsid(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg); extern int do_ipaddr(int argc, char **argv); extern int do_ipaddrlabel(int argc, char **argv); extern int do_iproute(int argc, char **argv); diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 3730424a..ef5f9eab 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -28,11 +28,13 @@ #include #include #include +#include #include "rt_names.h" #include "utils.h" #include "ll_map.h" #include "ip_common.h" +#include "color.h" enum { IPADD_LIST, @@ -84,7 +86,7 @@ static void usage(void) fprintf(stderr, " [-]tentative | [-]deprecated | [-]dadfailed | temporary |\n"); fprintf(stderr, " CONFFLAG-LIST ]\n"); fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n"); - fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute ]\n"); + fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute | autojoin ]\n"); fprintf(stderr, "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n"); fprintf(stderr, "LFT := forever | SECONDS\n"); @@ -135,8 +137,15 @@ static void print_operstate(FILE *f, __u8 state) { if (state >= sizeof(oper_states)/sizeof(oper_states[0])) fprintf(f, "state %#x ", state); - else - fprintf(f, "state %s ", oper_states[state]); + else { + fprintf(f, "state "); + if (strcmp(oper_states[state], "UP") == 0) + color_fprintf(f, COLOR_OPERSTATE_UP, "%s ", oper_states[state]); + else if (strcmp(oper_states[state], "DOWN") == 0) + color_fprintf(f, COLOR_OPERSTATE_DOWN, "%s ", oper_states[state]); + else + fprintf(f, "%s ", oper_states[state]); + } } int get_operstate(const char *name) @@ -605,7 +614,8 @@ int print_linkinfo(const struct sockaddr_nl *who, if (n->nlmsg_type == RTM_DELLINK) fprintf(fp, "Deleted "); - fprintf(fp, "%d: %s", ifi->ifi_index, + fprintf(fp, "%d: ", ifi->ifi_index); + color_fprintf(fp, COLOR_IFNAME, "%s", tb[IFLA_IFNAME] ? rta_getattr_str(tb[IFLA_IFNAME]) : ""); if (tb[IFLA_LINK]) { @@ -614,9 +624,13 @@ int print_linkinfo(const struct sockaddr_nl *who, if (iflink == 0) fprintf(fp, "@NONE: "); else { - fprintf(fp, "@%s: ", ll_idx_n2a(iflink, b1)); - m_flag = ll_index_to_flags(iflink); - m_flag = !(m_flag & IFF_UP); + if (tb[IFLA_LINK_NETNSID]) + fprintf(fp, "@if%d: ", iflink); + else { + fprintf(fp, "@%s: ", ll_idx_n2a(iflink, b1)); + m_flag = ll_index_to_flags(iflink); + m_flag = !(m_flag & IFF_UP); + } } } else { fprintf(fp, ": "); @@ -661,10 +675,11 @@ int print_linkinfo(const struct sockaddr_nl *who, fprintf(fp, " link/%s ", ll_type_n2a(ifi->ifi_type, b1, sizeof(b1))); if (tb[IFLA_ADDRESS]) { - fprintf(fp, "%s", ll_addr_n2a(RTA_DATA(tb[IFLA_ADDRESS]), - RTA_PAYLOAD(tb[IFLA_ADDRESS]), - ifi->ifi_type, - b1, sizeof(b1))); + color_fprintf(fp, COLOR_MAC, "%s", + ll_addr_n2a(RTA_DATA(tb[IFLA_ADDRESS]), + RTA_PAYLOAD(tb[IFLA_ADDRESS]), + ifi->ifi_type, + b1, sizeof(b1))); } if (tb[IFLA_BROADCAST]) { if (ifi->ifi_flags&IFF_POINTOPOINT) @@ -678,6 +693,15 @@ int print_linkinfo(const struct sockaddr_nl *who, } } + if (tb[IFLA_LINK_NETNSID]) { + int id = *(int*)RTA_DATA(tb[IFLA_LINK_NETNSID]); + + if (id >= 0) + fprintf(fp, " link-netnsid %d", id); + else + fprintf(fp, " link-netnsid unknown"); + } + if (tb[IFLA_PROMISCUITY] && show_details) fprintf(fp, " promiscuity %u ", *(int*)RTA_DATA(tb[IFLA_PROMISCUITY])); @@ -835,10 +859,21 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, fprintf(fp, " family %d ", ifa->ifa_family); if (rta_tb[IFA_LOCAL]) { - fprintf(fp, "%s", format_host(ifa->ifa_family, - RTA_PAYLOAD(rta_tb[IFA_LOCAL]), - RTA_DATA(rta_tb[IFA_LOCAL]), - abuf, sizeof(abuf))); + if (ifa->ifa_family == AF_INET) + color_fprintf(fp, COLOR_INET, "%s", format_host(ifa->ifa_family, + RTA_PAYLOAD(rta_tb[IFA_LOCAL]), + RTA_DATA(rta_tb[IFA_LOCAL]), + abuf, sizeof(abuf))); + else if (ifa->ifa_family == AF_INET6) + color_fprintf(fp, COLOR_INET6, "%s", format_host(ifa->ifa_family, + RTA_PAYLOAD(rta_tb[IFA_LOCAL]), + RTA_DATA(rta_tb[IFA_LOCAL]), + abuf, sizeof(abuf))); + else + fprintf(fp, "%s", format_host(ifa->ifa_family, + RTA_PAYLOAD(rta_tb[IFA_LOCAL]), + RTA_DATA(rta_tb[IFA_LOCAL]), + abuf, sizeof(abuf))); if (rta_tb[IFA_ADDRESS] == NULL || memcmp(RTA_DATA(rta_tb[IFA_ADDRESS]), RTA_DATA(rta_tb[IFA_LOCAL]), @@ -901,6 +936,10 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, ifa_flags &= ~IFA_F_NOPREFIXROUTE; fprintf(fp, "noprefixroute "); } + if (ifa_flags & IFA_F_MCAUTOJOIN) { + ifa_flags &= ~IFA_F_MCAUTOJOIN; + fprintf(fp, "autojoin "); + } if (!(ifa_flags & IFA_F_PERMANENT)) { fprintf(fp, "dynamic "); } else @@ -1072,7 +1111,9 @@ static int save_nlmsg(const struct sockaddr_nl *who, struct nlmsghdr *n, return ret == n->nlmsg_len ? 0 : ret; } -static int show_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, void *arg) +static int show_handler(const struct sockaddr_nl *nl, + struct rtnl_ctrl_data *ctrl, + struct nlmsghdr *n, void *arg) { struct ifaddrmsg *ifa = NLMSG_DATA(n); @@ -1089,7 +1130,9 @@ static int ipaddr_showdump(void) exit(rtnl_from_file(stdin, &show_handler, NULL)); } -static int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, void *arg) +static int restore_handler(const struct sockaddr_nl *nl, + struct rtnl_ctrl_data *ctrl, + struct nlmsghdr *n, void *arg) { int ret; @@ -1097,7 +1140,7 @@ static int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, voi ll_init_map(&rth); - ret = rtnl_talk(&rth, n, 0, 0, n); + ret = rtnl_talk(&rth, n, n, sizeof(*n)); if ((ret < 0) && (errno == EEXIST)) ret = 0; @@ -1340,6 +1383,9 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) } else if (strcmp(*argv, "noprefixroute") == 0) { filter.flags |= IFA_F_NOPREFIXROUTE; filter.flagmask |= IFA_F_NOPREFIXROUTE; + } else if (strcmp(*argv, "autojoin") == 0) { + filter.flags |= IFA_F_MCAUTOJOIN; + filter.flagmask |= IFA_F_MCAUTOJOIN; } else if (strcmp(*argv, "dadfailed") == 0) { filter.flags |= IFA_F_DADFAILED; filter.flagmask |= IFA_F_DADFAILED; @@ -1544,6 +1590,16 @@ static int default_scope(inet_prefix *lcl) return 0; } +static bool ipaddr_is_multicast(inet_prefix *a) +{ + if (a->family == AF_INET) + return IN_MULTICAST(ntohl(a->data[0])); + else if (a->family == AF_INET6) + return IN6_IS_ADDR_MULTICAST(a->data); + else + return false; +} + static int ipaddr_modify(int cmd, int flags, int argc, char **argv) { struct { @@ -1651,6 +1707,8 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) ifa_flags |= IFA_F_MANAGETEMPADDR; } else if (strcmp(*argv, "noprefixroute") == 0) { ifa_flags |= IFA_F_NOPREFIXROUTE; + } else if (strcmp(*argv, "autojoin") == 0) { + ifa_flags |= IFA_F_MCAUTOJOIN; } else { if (strcmp(*argv, "local") == 0) { NEXT_ARG(); @@ -1741,7 +1799,12 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) sizeof(cinfo)); } - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if ((ifa_flags & IFA_F_MCAUTOJOIN) && !ipaddr_is_multicast(&lcl)) { + fprintf(stderr, "autojoin needs multicast address\n"); + return -1; + } + + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) return -2; return 0; diff --git a/ip/ipaddrlabel.c b/ip/ipaddrlabel.c index b34dd8b1..a738ded0 100644 --- a/ip/ipaddrlabel.c +++ b/ip/ipaddrlabel.c @@ -182,8 +182,8 @@ static int ipaddrlabel_modify(int cmd, int argc, char **argv) if (req.ifal.ifal_family == AF_UNSPEC) req.ifal.ifal_family = AF_INET6; - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - return 2; + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) + return -2; return 0; } @@ -209,7 +209,7 @@ static int flush_addrlabel(const struct sockaddr_nl *who, struct nlmsghdr *n, vo if (rtnl_open(&rth2, 0) < 0) return -1; - if (rtnl_talk(&rth2, n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth2, n, NULL, 0) < 0) return -2; rtnl_close(&rth2); @@ -232,12 +232,12 @@ static int ipaddrlabel_flush(int argc, char **argv) if (rtnl_wilddump_request(&rth, af, RTM_GETADDRLABEL) < 0) { perror("Cannot send dump request"); - return 1; + return -1; } if (rtnl_dump_filter(&rth, flush_addrlabel, NULL) < 0) { fprintf(stderr, "Flush terminated\n"); - return 1; + return -1; } return 0; @@ -248,6 +248,7 @@ int do_ipaddrlabel(int argc, char **argv) if (argc < 1) { return ipaddrlabel_list(0, NULL); } else if (matches(argv[0], "list") == 0 || + matches(argv[0], "lst") == 0 || matches(argv[0], "show") == 0) { return ipaddrlabel_list(argc-1, argv+1); } else if (matches(argv[0], "add") == 0) { diff --git a/ip/ipfou.c b/ip/ipfou.c index 26760453..0b83c277 100644 --- a/ip/ipfou.c +++ b/ip/ipfou.c @@ -112,7 +112,7 @@ static int do_add(int argc, char **argv) fou_parse_opt(argc, argv, &req.n, true); - if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0) return -2; return 0; @@ -124,7 +124,7 @@ static int do_del(int argc, char **argv) fou_parse_opt(argc, argv, &req.n, false); - if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0) return -2; return 0; diff --git a/ip/ipl2tp.c b/ip/ipl2tp.c index 5cd86322..2f7c9bf1 100644 --- a/ip/ipl2tp.c +++ b/ip/ipl2tp.c @@ -119,7 +119,7 @@ static int create_tunnel(struct l2tp_parm *p) addattr16(&req.n, 1024, L2TP_ATTR_UDP_DPORT, p->peer_udp_port); } - if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0) return -2; return 0; @@ -132,7 +132,7 @@ static int delete_tunnel(struct l2tp_parm *p) addattr32(&req.n, 128, L2TP_ATTR_CONN_ID, p->tunnel_id); - if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0) return -2; return 0; @@ -166,7 +166,7 @@ static int create_session(struct l2tp_parm *p) if (p->ifname && p->ifname[0]) addattrstrz(&req.n, 1024, L2TP_ATTR_IFNAME, p->ifname); - if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0) return -2; return 0; @@ -179,7 +179,7 @@ static int delete_session(struct l2tp_parm *p) addattr32(&req.n, 1024, L2TP_ATTR_CONN_ID, p->tunnel_id); addattr32(&req.n, 1024, L2TP_ATTR_SESSION_ID, p->session_id); - if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0) return -2; return 0; diff --git a/ip/iplink.c b/ip/iplink.c index c93d1dc3..e296e6f6 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -53,9 +53,9 @@ void iplink_usage(void) fprintf(stderr, " [ numtxqueues QUEUE_COUNT ]\n"); fprintf(stderr, " [ numrxqueues QUEUE_COUNT ]\n"); fprintf(stderr, " type TYPE [ ARGS ]\n"); - fprintf(stderr, " ip link delete DEV type TYPE [ ARGS ]\n"); + fprintf(stderr, " ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ]\n"); fprintf(stderr, "\n"); - fprintf(stderr, " ip link set { dev DEVICE | group DEVGROUP } [ { up | down } ]\n"); + fprintf(stderr, " ip link set { DEVICE | dev DEVICE | group DEVGROUP } [ { up | down } ]\n"); } else fprintf(stderr, "Usage: ip link set DEVICE [ { up | down } ]\n"); @@ -72,6 +72,7 @@ void iplink_usage(void) fprintf(stderr, " [ mtu MTU ]\n"); fprintf(stderr, " [ netns PID ]\n"); fprintf(stderr, " [ netns NAME ]\n"); + fprintf(stderr, " [ link-netnsid ID ]\n"); fprintf(stderr, " [ alias NAME ]\n"); fprintf(stderr, " [ vf NUM [ mac LLADDR ]\n"); fprintf(stderr, " [ vlan VLANID [ qos VLAN-QOS ] ]\n"); @@ -79,6 +80,7 @@ void iplink_usage(void) fprintf(stderr, " [ rate TXRATE ] ] \n"); fprintf(stderr, " [ spoofchk { on | off} ] ] \n"); + fprintf(stderr, " [ query_rss { on | off} ] ] \n"); fprintf(stderr, " [ state { auto | enable | disable} ] ]\n"); fprintf(stderr, " [ master DEVICE ]\n"); fprintf(stderr, " [ nomaster ]\n"); @@ -91,7 +93,7 @@ void iplink_usage(void) fprintf(stderr, "TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | macvtap |\n"); fprintf(stderr, " bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |\n"); fprintf(stderr, " gre | gretap | ip6gre | ip6gretap | vti | nlmon |\n"); - fprintf(stderr, " bond_slave | ipvlan }\n"); + fprintf(stderr, " bond_slave | ipvlan | geneve }\n"); } exit(-1); } @@ -178,6 +180,7 @@ static int get_addr_gen_mode(const char *mode) static int have_rtnl_newlink = -1; static int accept_msg(const struct sockaddr_nl *who, + struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n, void *arg) { struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(n); @@ -330,6 +333,18 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, ivs.vf = vf; addattr_l(&req->n, sizeof(*req), IFLA_VF_SPOOFCHK, &ivs, sizeof(ivs)); + } else if (matches(*argv, "query_rss") == 0) { + struct ifla_vf_rss_query_en ivs; + NEXT_ARG(); + if (matches(*argv, "on") == 0) + ivs.setting = 1; + else if (matches(*argv, "off") == 0) + ivs.setting = 0; + else + invarg("Invalid \"query_rss\" value\n", *argv); + ivs.vf = vf; + addattr_l(&req->n, sizeof(*req), IFLA_VF_RSS_QUERY_EN, &ivs, sizeof(ivs)); + } else if (matches(*argv, "state") == 0) { struct ifla_vf_link_state ivl; NEXT_ARG(); @@ -386,6 +401,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, int numtxqueues = -1; int numrxqueues = -1; int dev_index = 0; + int link_netnsid = -1; *group = -1; ret = argc; @@ -588,6 +604,14 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, addattr8(&req->n, sizeof(*req), IFLA_INET6_ADDR_GEN_MODE, mode); addattr_nest_end(&req->n, afs6); addattr_nest_end(&req->n, afs); + } else if (matches(*argv, "link-netnsid") == 0) { + NEXT_ARG(); + if (link_netnsid != -1) + duparg("link-netnsid", *argv); + if (get_integer(&link_netnsid, *argv, 0)) + invarg("Invalid \"link-netnsid\" value\n", *argv); + addattr32(&req->n, sizeof(*req), IFLA_LINK_NETNSID, + link_netnsid); } else { if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); @@ -651,7 +675,7 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) req.i.ifi_index = 0; addattr32(&req.n, sizeof(req), IFLA_GROUP, group); - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) exit(2); return 0; } @@ -750,7 +774,7 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) return -1; } - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) exit(2); return 0; @@ -760,7 +784,10 @@ int iplink_get(unsigned int flags, char *name, __u32 filt_mask) { int len; struct iplink_req req; - char answer[16384]; + struct { + struct nlmsghdr n; + char buf[16384]; + } answer; memset(&req, 0, sizeof(req)); @@ -780,10 +807,10 @@ int iplink_get(unsigned int flags, char *name, __u32 filt_mask) } addattr32(&req.n, sizeof(req), IFLA_EXT_MASK, filt_mask); - if (rtnl_talk(&rth, &req.n, 0, 0, (struct nlmsghdr *)answer) < 0) + if (rtnl_talk(&rth, &req.n, &answer.n, sizeof(answer)) < 0) return -2; - print_linkinfo(NULL, (struct nlmsghdr *)answer, stdout); + print_linkinfo(NULL, &answer.n, stdout); return 0; } diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c index 3009ec91..2a9783e4 100644 --- a/ip/iplink_bond.c +++ b/ip/iplink_bond.c @@ -135,6 +135,9 @@ static void print_explain(FILE *f) " [ packets_per_slave PACKETS_PER_SLAVE ]\n" " [ lacp_rate LACP_RATE ]\n" " [ ad_select AD_SELECT ]\n" + " [ ad_user_port_key PORTKEY ]\n" + " [ ad_actor_sys_prio SYSPRIO ]\n" + " [ ad_actor_system LLADDR ]\n" "\n" "BONDMODE := balance-rr|active-backup|balance-xor|broadcast|802.3ad|balance-tlb|balance-alb\n" "ARP_VALIDATE := none|active|backup|all\n" @@ -158,6 +161,7 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv, __u8 mode, use_carrier, primary_reselect, fail_over_mac; __u8 xmit_hash_policy, num_peer_notif, all_slaves_active; __u8 lacp_rate, ad_select; + __u16 ad_user_port_key, ad_actor_sys_prio; __u32 miimon, updelay, downdelay, arp_interval, arp_validate; __u32 arp_all_targets, resend_igmp, min_links, lp_interval; __u32 packets_per_slave; @@ -344,6 +348,32 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv, } ad_select = get_index(ad_select_tbl, *argv); addattr8(n, 1024, IFLA_BOND_AD_SELECT, ad_select); + } else if (matches(*argv, "ad_user_port_key") == 0) { + NEXT_ARG(); + if (get_u16(&ad_user_port_key, *argv, 0)) { + invarg("invalid ad_user_port_key", *argv); + return -1; + } + addattr16(n, 1024, IFLA_BOND_AD_USER_PORT_KEY, + ad_user_port_key); + } else if (matches(*argv, "ad_actor_sys_prio") == 0) { + NEXT_ARG(); + if (get_u16(&ad_actor_sys_prio, *argv, 0)) { + invarg("invalid ad_actor_sys_prio", *argv); + return -1; + } + addattr16(n, 1024, IFLA_BOND_AD_ACTOR_SYS_PRIO, + ad_actor_sys_prio); + } else if (matches(*argv, "ad_actor_system") == 0) { + int len; + char abuf[32]; + + NEXT_ARG(); + len = ll_addr_a2n(abuf, sizeof(abuf), *argv); + if (len < 0) + return -1; + addattr_l(n, 1024, IFLA_BOND_AD_ACTOR_SYSTEM, + abuf, len); } else if (matches(*argv, "help") == 0) { explain(); return -1; @@ -415,6 +445,7 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (iptb[i]) fprintf(f, "%s", rt_addr_n2a(AF_INET, + RTA_PAYLOAD(iptb[i]), RTA_DATA(iptb[i]), buf, INET_ADDRSTRLEN)); @@ -533,6 +564,25 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) ll_addr_n2a(p, ETH_ALEN, 0, b, sizeof(b))); } } + + if (tb[IFLA_BOND_AD_ACTOR_SYS_PRIO]) { + fprintf(f, "ad_actor_sys_prio %u ", + rta_getattr_u16(tb[IFLA_BOND_AD_ACTOR_SYS_PRIO])); + } + + if (tb[IFLA_BOND_AD_USER_PORT_KEY]) { + fprintf(f, "ad_user_port_key %u ", + rta_getattr_u16(tb[IFLA_BOND_AD_USER_PORT_KEY])); + } + + if (tb[IFLA_BOND_AD_ACTOR_SYSTEM]) { + /* We assume the l2 address is an Ethernet MAC address */ + SPRINT_BUF(b1); + fprintf(f, "ad_actor_system %s ", + ll_addr_n2a(RTA_DATA(tb[IFLA_BOND_AD_ACTOR_SYSTEM]), + RTA_PAYLOAD(tb[IFLA_BOND_AD_ACTOR_SYSTEM]), + 1 /*ARPHDR_ETHER*/, b1, sizeof(b1))); + } } static void bond_print_help(struct link_util *lu, int argc, char **argv, diff --git a/ip/iplink_geneve.c b/ip/iplink_geneve.c new file mode 100644 index 00000000..74703e1e --- /dev/null +++ b/ip/iplink_geneve.c @@ -0,0 +1,122 @@ +/* + * iplink_geneve.c GENEVE device support + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: John W. Linville + */ + +#include + +#include "utils.h" +#include "ip_common.h" + +static void print_explain(FILE *f) +{ + fprintf(f, "Usage: ... geneve id VNI remote ADDR\n"); + fprintf(f, "\n"); + fprintf(f, "Where: VNI := 0-16777215\n"); + fprintf(f, " ADDR := IP_ADDRESS\n"); +} + +static void explain(void) +{ + print_explain(stderr); +} + +static int geneve_parse_opt(struct link_util *lu, int argc, char **argv, + struct nlmsghdr *n) +{ + __u32 vni = 0; + int vni_set = 0; + __u32 daddr = 0; + struct in6_addr daddr6 = IN6ADDR_ANY_INIT; + + + while (argc > 0) { + if (!matches(*argv, "id") || + !matches(*argv, "vni")) { + NEXT_ARG(); + if (get_u32(&vni, *argv, 0) || + vni >= 1u << 24) + invarg("invalid id", *argv); + vni_set = 1; + } else if (!matches(*argv, "remote")) { + NEXT_ARG(); + if (!inet_get_addr(*argv, &daddr, &daddr6)) { + fprintf(stderr, "Invalid address \"%s\"\n", *argv); + return -1; + } + if (IN_MULTICAST(ntohl(daddr))) + invarg("invalid remote address", *argv); + } else if (matches(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "geneve: unknown command \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--, argv++; + } + + if (!vni_set) { + fprintf(stderr, "geneve: missing virtual network identifier\n"); + return -1; + } + + if (!daddr) { + fprintf(stderr, "geneve: remove link partner not specified\n"); + return -1; + } + if (memcmp(&daddr6, &in6addr_any, sizeof(daddr6)) != 0) { + fprintf(stderr, "geneve: remove link over IPv6 not supported\n"); + return -1; + } + + addattr32(n, 1024, IFLA_GENEVE_ID, vni); + if (daddr) + addattr_l(n, 1024, IFLA_GENEVE_REMOTE, &daddr, 4); + + return 0; +} + +static void geneve_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) +{ + __u32 vni; + char s1[1024]; + + if (!tb) + return; + + if (!tb[IFLA_GENEVE_ID] || + RTA_PAYLOAD(tb[IFLA_GENEVE_ID]) < sizeof(__u32)) + return; + + vni = rta_getattr_u32(tb[IFLA_GENEVE_ID]); + fprintf(f, "id %u ", vni); + + if (tb[IFLA_GENEVE_REMOTE]) { + __be32 addr = rta_getattr_u32(tb[IFLA_GENEVE_REMOTE]); + if (addr) + fprintf(f, "remote %s ", + format_host(AF_INET, 4, &addr, s1, sizeof(s1))); + } +} + +static void geneve_print_help(struct link_util *lu, int argc, char **argv, + FILE *f) +{ + print_explain(f); +} + +struct link_util geneve_link_util = { + .id = "geneve", + .maxattr = IFLA_GENEVE_MAX, + .parse_opt = geneve_parse_opt, + .print_opt = geneve_print_opt, + .print_help = geneve_print_help, +}; diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c index 5ec8f418..8bcf8822 100644 --- a/ip/ipmonitor.c +++ b/ip/ipmonitor.c @@ -26,18 +26,36 @@ static void usage(void) __attribute__((noreturn)); int prefix_banner; +int listen_all_nsid; static void usage(void) { - fprintf(stderr, "Usage: ip monitor [ all | LISTofOBJECTS ] [ FILE ]" - "[ label ] [dev DEVICE]\n"); + fprintf(stderr, "Usage: ip monitor [ all | LISTofOBJECTS ] [ FILE ] " + "[ label ] [all-nsid] [dev DEVICE]\n"); fprintf(stderr, "LISTofOBJECTS := link | address | route | mroute | prefix |\n"); - fprintf(stderr, " neigh | netconf\n"); + fprintf(stderr, " neigh | netconf | rule | nsid\n"); fprintf(stderr, "FILE := file FILENAME\n"); exit(-1); } +static void print_headers(FILE *fp, char *label, struct rtnl_ctrl_data *ctrl) +{ + if (timestamp) + print_timestamp(fp); + + if (listen_all_nsid) { + if (ctrl == NULL || ctrl->nsid < 0) + fprintf(fp, "[nsid current]"); + else + fprintf(fp, "[nsid %d]", ctrl->nsid); + } + + if (prefix_banner) + fprintf(fp, "%s", label); +} + static int accept_msg(const struct sockaddr_nl *who, + struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n, void *arg) { FILE *fp = (FILE*)arg; @@ -54,42 +72,31 @@ static int accept_msg(const struct sockaddr_nl *who, if (r->rtm_flags & RTM_F_CLONED) return 0; - if (timestamp) - print_timestamp(fp); - if (r->rtm_family == RTNL_FAMILY_IPMR || r->rtm_family == RTNL_FAMILY_IP6MR) { - if (prefix_banner) - fprintf(fp, "[MROUTE]"); + print_headers(fp, "[MROUTE]", ctrl); print_mroute(who, n, arg); return 0; } else { - if (prefix_banner) - fprintf(fp, "[ROUTE]"); + print_headers(fp, "[ROUTE]", ctrl); print_route(who, n, arg); return 0; } } - if (timestamp) - print_timestamp(fp); - if (n->nlmsg_type == RTM_NEWLINK || n->nlmsg_type == RTM_DELLINK) { ll_remember_index(who, n, NULL); - if (prefix_banner) - fprintf(fp, "[LINK]"); + print_headers(fp, "[LINK]", ctrl); print_linkinfo(who, n, arg); return 0; } if (n->nlmsg_type == RTM_NEWADDR || n->nlmsg_type == RTM_DELADDR) { - if (prefix_banner) - fprintf(fp, "[ADDR]"); + print_headers(fp, "[ADDR]", ctrl); print_addrinfo(who, n, arg); return 0; } if (n->nlmsg_type == RTM_NEWADDRLABEL || n->nlmsg_type == RTM_DELADDRLABEL) { - if (prefix_banner) - fprintf(fp, "[ADDRLABEL]"); + print_headers(fp, "[ADDRLABEL]", ctrl); print_addrlabel(who, n, arg); return 0; } @@ -102,33 +109,34 @@ static int accept_msg(const struct sockaddr_nl *who, return 0; } - if (prefix_banner) - fprintf(fp, "[NEIGH]"); + print_headers(fp, "[NEIGH]", ctrl); print_neigh(who, n, arg); return 0; } if (n->nlmsg_type == RTM_NEWPREFIX) { - if (prefix_banner) - fprintf(fp, "[PREFIX]"); + print_headers(fp, "[PREFIX]", ctrl); print_prefix(who, n, arg); return 0; } if (n->nlmsg_type == RTM_NEWRULE || n->nlmsg_type == RTM_DELRULE) { - if (prefix_banner) - fprintf(fp, "[RULE]"); + print_headers(fp, "[RULE]", ctrl); print_rule(who, n, arg); return 0; } if (n->nlmsg_type == RTM_NEWNETCONF) { - if (prefix_banner) - fprintf(fp, "[NETCONF]"); - print_netconf(who, n, arg); + print_headers(fp, "[NETCONF]", ctrl); + print_netconf(who, ctrl, n, arg); return 0; } if (n->nlmsg_type == NLMSG_TSTAMP) { print_nlmsg_timestamp(fp, n); return 0; } + if (n->nlmsg_type == RTM_NEWNSID || n->nlmsg_type == RTM_DELNSID) { + print_headers(fp, "[NSID]", ctrl); + print_nsid(who, n, arg); + return 0; + } if (n->nlmsg_type != NLMSG_ERROR && n->nlmsg_type != NLMSG_NOOP && n->nlmsg_type != NLMSG_DONE) { fprintf(fp, "Unknown message: type=0x%08x(%d) flags=0x%08x(%d)" @@ -150,6 +158,8 @@ int do_ipmonitor(int argc, char **argv) int lprefix=0; int lneigh=0; int lnetconf=0; + int lrule=0; + int lnsid=0; int ifindex=0; groups |= nl_mgrp(RTNLGRP_LINK); @@ -157,12 +167,16 @@ int do_ipmonitor(int argc, char **argv) groups |= nl_mgrp(RTNLGRP_IPV6_IFADDR); groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE); groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE); + groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE); groups |= nl_mgrp(RTNLGRP_IPV4_MROUTE); groups |= nl_mgrp(RTNLGRP_IPV6_MROUTE); groups |= nl_mgrp(RTNLGRP_IPV6_PREFIX); groups |= nl_mgrp(RTNLGRP_NEIGH); groups |= nl_mgrp(RTNLGRP_IPV4_NETCONF); groups |= nl_mgrp(RTNLGRP_IPV6_NETCONF); + groups |= nl_mgrp(RTNLGRP_IPV4_RULE); + groups |= nl_mgrp(RTNLGRP_IPV6_RULE); + groups |= nl_mgrp(RTNLGRP_NSID); rtnl_close(&rth); @@ -172,6 +186,8 @@ int do_ipmonitor(int argc, char **argv) file = *argv; } else if (matches(*argv, "label") == 0) { prefix_banner = 1; + } else if (matches(*argv, "all-nsid") == 0) { + listen_all_nsid = 1; } else if (matches(*argv, "link") == 0) { llink=1; groups = 0; @@ -193,6 +209,12 @@ int do_ipmonitor(int argc, char **argv) } else if (matches(*argv, "netconf") == 0) { lnetconf = 1; groups = 0; + } else if (matches(*argv, "rule") == 0) { + lrule = 1; + groups = 0; + } else if (matches(*argv, "nsid") == 0) { + lnsid = 1; + groups = 0; } else if (strcmp(*argv, "all") == 0) { prefix_banner=1; } else if (matches(*argv, "help") == 0) { @@ -229,6 +251,8 @@ int do_ipmonitor(int argc, char **argv) groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE); if (!preferred_family || preferred_family == AF_INET6) groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE); + if (!preferred_family || preferred_family == AF_MPLS) + groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE); } if (lmroute) { if (!preferred_family || preferred_family == AF_INET) @@ -249,6 +273,15 @@ int do_ipmonitor(int argc, char **argv) if (!preferred_family || preferred_family == AF_INET6) groups |= nl_mgrp(RTNLGRP_IPV6_NETCONF); } + if (lrule) { + if (!preferred_family || preferred_family == AF_INET) + groups |= nl_mgrp(RTNLGRP_IPV4_RULE); + if (!preferred_family || preferred_family == AF_INET6) + groups |= nl_mgrp(RTNLGRP_IPV6_RULE); + } + if (lnsid) { + groups |= nl_mgrp(RTNLGRP_NSID); + } if (file) { FILE *fp; fp = fopen(file, "r"); @@ -261,7 +294,11 @@ int do_ipmonitor(int argc, char **argv) if (rtnl_open(&rth, groups) < 0) exit(1); + if (listen_all_nsid && rtnl_listen_all_nsid(&rth) < 0) + exit(1); + ll_init_map(&rth); + netns_map_init(); if (rtnl_listen(&rth, accept_msg, stdout) < 0) exit(2); diff --git a/ip/ipmroute.c b/ip/ipmroute.c index b4ed9f15..125a13f8 100644 --- a/ip/ipmroute.c +++ b/ip/ipmroute.c @@ -67,8 +67,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) int family; if ((n->nlmsg_type != RTM_NEWROUTE && - n->nlmsg_type != RTM_DELROUTE) || - !(n->nlmsg_flags & NLM_F_MULTI)) { + n->nlmsg_type != RTM_DELROUTE)) { fprintf(stderr, "Not a multicast route: %08x %08x %08x\n", n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); return 0; @@ -116,6 +115,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[RTA_SRC]) len = snprintf(obuf, sizeof(obuf), "(%s, ", rt_addr_n2a(family, + RTA_PAYLOAD(tb[RTA_SRC]), RTA_DATA(tb[RTA_SRC]), abuf, sizeof(abuf))); else @@ -123,6 +123,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[RTA_DST]) snprintf(obuf + len, sizeof(obuf) - len, "%s)", rt_addr_n2a(family, + RTA_PAYLOAD(tb[RTA_DST]), RTA_DATA(tb[RTA_DST]), abuf, sizeof(abuf))); else diff --git a/ip/ipneigh.c b/ip/ipneigh.c index eeec7bdd..a9e23f45 100644 --- a/ip/ipneigh.c +++ b/ip/ipneigh.c @@ -179,7 +179,7 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv) return -1; } - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) exit(2); return 0; diff --git a/ip/ipnetconf.c b/ip/ipnetconf.c index aa31ead0..eca6eeee 100644 --- a/ip/ipnetconf.c +++ b/ip/ipnetconf.c @@ -40,7 +40,8 @@ static void usage(void) #define NETCONF_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct netconfmsg)))) -int print_netconf(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) +int print_netconf(const struct sockaddr_nl *who, struct rtnl_ctrl_data *ctrl, + struct nlmsghdr *n, void *arg) { FILE *fp = (FILE*)arg; struct netconfmsg *ncm = NLMSG_DATA(n); @@ -123,6 +124,12 @@ int print_netconf(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) return 0; } +static int print_netconf2(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) +{ + return print_netconf(who, NULL, n, arg); +} + void ipnetconf_reset_filter(int ifindex) { memset(&filter, 0, sizeof(filter)); @@ -177,7 +184,7 @@ dump: perror("Cannot send dump request"); exit(1); } - if (rtnl_dump_filter(&rth, print_netconf, stdout) < 0) { + if (rtnl_dump_filter(&rth, print_netconf2, stdout) < 0) { fprintf(stderr, "Dump terminated\n"); exit(1); } diff --git a/ip/ipnetns.c b/ip/ipnetns.c index e4038ea7..0c28f8a7 100644 --- a/ip/ipnetns.c +++ b/ip/ipnetns.c @@ -14,8 +14,12 @@ #include #include #include +#include + +#include #include "utils.h" +#include "hlist.h" #include "ip_common.h" #include "namespace.h" @@ -23,18 +27,310 @@ static int usage(void) { fprintf(stderr, "Usage: ip netns list\n"); fprintf(stderr, " ip netns add NAME\n"); + fprintf(stderr, " ip netns set NAME NETNSID\n"); fprintf(stderr, " ip [-all] netns delete [NAME]\n"); fprintf(stderr, " ip netns identify [PID]\n"); fprintf(stderr, " ip netns pids NAME\n"); fprintf(stderr, " ip [-all] netns exec [NAME] cmd ...\n"); fprintf(stderr, " ip netns monitor\n"); + fprintf(stderr, " ip netns list-id\n"); exit(-1); } +/* This socket is used to get nsid */ +static struct rtnl_handle rtnsh = { .fd = -1 }; + +static int have_rtnl_getnsid = -1; + +static int ipnetns_accept_msg(const struct sockaddr_nl *who, + struct rtnl_ctrl_data *ctrl, + struct nlmsghdr *n, void *arg) +{ + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(n); + + if (n->nlmsg_type == NLMSG_ERROR && + (err->error == -EOPNOTSUPP || err->error == -EINVAL)) + have_rtnl_getnsid = 0; + else + have_rtnl_getnsid = 1; + return -1; +} + +static int ipnetns_have_nsid(void) +{ + struct { + struct nlmsghdr n; + struct rtgenmsg g; + char buf[1024]; + } req; + int fd; + + if (have_rtnl_getnsid < 0) { + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETNSID; + req.g.rtgen_family = AF_UNSPEC; + + fd = open("/proc/self/ns/net", O_RDONLY); + if (fd < 0) { + perror("open(\"/proc/self/ns/net\")"); + exit(1); + } + + addattr32(&req.n, 1024, NETNSA_FD, fd); + + if (rtnl_send(&rth, &req.n, req.n.nlmsg_len) < 0) { + perror("request send failed"); + exit(1); + } + rtnl_listen(&rth, ipnetns_accept_msg, NULL); + close(fd); + } + + return have_rtnl_getnsid; +} + +static int get_netnsid_from_name(const char *name) +{ + struct { + struct nlmsghdr n; + struct rtgenmsg g; + char buf[1024]; + } req, answer; + struct rtattr *tb[NETNSA_MAX + 1]; + struct rtgenmsg *rthdr; + int len, fd; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETNSID; + req.g.rtgen_family = AF_UNSPEC; + + fd = netns_get_fd(name); + if (fd < 0) + return fd; + + addattr32(&req.n, 1024, NETNSA_FD, fd); + if (rtnl_talk(&rtnsh, &req.n, &answer.n, sizeof(answer)) < 0) { + close(fd); + return -2; + } + close(fd); + + /* Validate message and parse attributes */ + if (answer.n.nlmsg_type == NLMSG_ERROR) + return -1; + + rthdr = NLMSG_DATA(&answer.n); + len = answer.n.nlmsg_len - NLMSG_SPACE(sizeof(*rthdr)); + if (len < 0) + return -1; + + parse_rtattr(tb, NETNSA_MAX, NETNS_RTA(rthdr), len); + + if (tb[NETNSA_NSID]) + return rta_getattr_u32(tb[NETNSA_NSID]); + + return -1; +} + +struct nsid_cache { + struct hlist_node nsid_hash; + struct hlist_node name_hash; + int nsid; + char name[NAME_MAX]; +}; + +#define NSIDMAP_SIZE 128 +#define NSID_HASH_NSID(nsid) (nsid & (NSIDMAP_SIZE - 1)) +#define NSID_HASH_NAME(name) (namehash(name) & (NSIDMAP_SIZE - 1)) + +static struct hlist_head nsid_head[NSIDMAP_SIZE]; +static struct hlist_head name_head[NSIDMAP_SIZE]; + +static struct nsid_cache *netns_map_get_by_nsid(int nsid) +{ + uint32_t h = NSID_HASH_NSID(nsid); + struct hlist_node *n; + + hlist_for_each(n, &nsid_head[h]) { + struct nsid_cache *c = container_of(n, struct nsid_cache, + nsid_hash); + if (c->nsid == nsid) + return c; + } + + return NULL; +} + +static int netns_map_add(int nsid, char *name) +{ + struct nsid_cache *c; + uint32_t h; + + if (netns_map_get_by_nsid(nsid) != NULL) + return -EEXIST; + + c = malloc(sizeof(*c)); + if (c == NULL) { + perror("malloc"); + return -ENOMEM; + } + c->nsid = nsid; + strcpy(c->name, name); + + h = NSID_HASH_NSID(nsid); + hlist_add_head(&c->nsid_hash, &nsid_head[h]); + + h = NSID_HASH_NAME(name); + hlist_add_head(&c->name_hash, &name_head[h]); + + return 0; +} + +static void netns_map_del(struct nsid_cache *c) +{ + hlist_del(&c->name_hash); + hlist_del(&c->nsid_hash); + free(c); +} + +void netns_map_init(void) +{ + static int initialized; + struct dirent *entry; + DIR *dir; + int nsid; + + if (initialized || !ipnetns_have_nsid()) + return; + + if (rtnl_open(&rtnsh, 0) < 0) { + fprintf(stderr, "Cannot open rtnetlink\n"); + exit(1); + } + + dir = opendir(NETNS_RUN_DIR); + if (!dir) + return; + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0) + continue; + if (strcmp(entry->d_name, "..") == 0) + continue; + nsid = get_netnsid_from_name(entry->d_name); + + if (nsid >= 0) + netns_map_add(nsid, entry->d_name); + } + closedir(dir); + initialized = 1; +} + +static int netns_get_name(int nsid, char *name) +{ + struct dirent *entry; + DIR *dir; + int id; + + dir = opendir(NETNS_RUN_DIR); + if (!dir) + return -ENOENT; + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0) + continue; + if (strcmp(entry->d_name, "..") == 0) + continue; + id = get_netnsid_from_name(entry->d_name); + + if (nsid == id) { + strcpy(name, entry->d_name); + closedir(dir); + return 0; + } + } + closedir(dir); + return -ENOENT; +} + +int print_nsid(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) +{ + struct rtgenmsg *rthdr = NLMSG_DATA(n); + struct rtattr *tb[NETNSA_MAX+1]; + int len = n->nlmsg_len; + FILE *fp = (FILE *)arg; + struct nsid_cache *c; + char name[NAME_MAX]; + int nsid; + + if (n->nlmsg_type != RTM_NEWNSID && n->nlmsg_type != RTM_DELNSID) + return 0; + + len -= NLMSG_SPACE(sizeof(*rthdr)); + if (len < 0) { + fprintf(stderr, "BUG: wrong nlmsg len %d in %s\n", len, + __func__); + return -1; + } + + parse_rtattr(tb, NETNSA_MAX, NETNS_RTA(rthdr), len); + if (tb[NETNSA_NSID] == NULL) { + fprintf(stderr, "BUG: NETNSA_NSID is missing %s\n", __func__); + return -1; + } + + if (n->nlmsg_type == RTM_DELNSID) + fprintf(fp, "Deleted "); + + nsid = rta_getattr_u32(tb[NETNSA_NSID]); + fprintf(fp, "nsid %u ", nsid); + + c = netns_map_get_by_nsid(nsid); + if (c != NULL) { + fprintf(fp, "(iproute2 netns name: %s)", c->name); + netns_map_del(c); + } + + /* During 'ip monitor nsid', no chance to have new nsid in cache. */ + if (c == NULL && n->nlmsg_type == RTM_NEWNSID) + if (netns_get_name(nsid, name) == 0) { + fprintf(fp, "(iproute2 netns name: %s)", name); + netns_map_add(nsid, name); + } + + fprintf(fp, "\n"); + fflush(fp); + return 0; +} + +static int netns_list_id(int argc, char **argv) +{ + if (!ipnetns_have_nsid()) { + fprintf(stderr, + "RTM_GETNSID is not supported by the kernel.\n"); + return -ENOTSUP; + } + + if (rtnl_wilddump_request(&rth, AF_UNSPEC, RTM_GETNSID) < 0) { + perror("Cannot send dump request"); + exit(1); + } + if (rtnl_dump_filter(&rth, print_nsid, stdout) < 0) { + fprintf(stderr, "Dump terminated\n"); + exit(1); + } + return 0; +} + static int netns_list(int argc, char **argv) { struct dirent *entry; DIR *dir; + int id; dir = opendir(NETNS_RUN_DIR); if (!dir) @@ -45,7 +341,13 @@ static int netns_list(int argc, char **argv) continue; if (strcmp(entry->d_name, "..") == 0) continue; - printf("%s\n", entry->d_name); + printf("%s", entry->d_name); + if (ipnetns_have_nsid()) { + id = get_netnsid_from_name(entry->d_name); + if (id >= 0) + printf(" (id: %d)", id); + } + printf("\n"); } closedir(dir); return 0; @@ -375,6 +677,61 @@ out_delete: return -1; } +static int set_netnsid_from_name(const char *name, int nsid) +{ + struct { + struct nlmsghdr n; + struct rtgenmsg g; + char buf[1024]; + } req; + int fd, err = 0; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_NEWNSID; + req.g.rtgen_family = AF_UNSPEC; + + fd = netns_get_fd(name); + if (fd < 0) + return fd; + + addattr32(&req.n, 1024, NETNSA_FD, fd); + addattr32(&req.n, 1024, NETNSA_NSID, nsid); + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) + err = -2; + + close(fd); + return err; +} + +static int netns_set(int argc, char **argv) +{ + char netns_path[MAXPATHLEN]; + const char *name; + int netns, nsid; + + if (argc < 1) { + fprintf(stderr, "No netns name specified\n"); + return -1; + } + if (argc < 2) { + fprintf(stderr, "No nsid specified\n"); + return -1; + } + name = argv[0]; + nsid = atoi(argv[1]); + + snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name); + netns = open(netns_path, O_RDONLY | O_CLOEXEC); + if (netns < 0) { + fprintf(stderr, "Cannot open network namespace \"%s\": %s\n", + name, strerror(errno)); + return -1; + } + + return set_netnsid_from_name(name, nsid); +} static int netns_monitor(int argc, char **argv) { @@ -417,6 +774,8 @@ static int netns_monitor(int argc, char **argv) int do_netns(int argc, char **argv) { + netns_map_init(); + if (argc < 1) return netns_list(0, NULL); @@ -424,12 +783,18 @@ int do_netns(int argc, char **argv) (matches(*argv, "lst") == 0)) return netns_list(argc-1, argv+1); + if ((matches(*argv, "list-id") == 0)) + return netns_list_id(argc-1, argv+1); + if (matches(*argv, "help") == 0) return usage(); if (matches(*argv, "add") == 0) return netns_add(argc-1, argv+1); + if (matches(*argv, "set") == 0) + return netns_set(argc-1, argv+1); + if (matches(*argv, "delete") == 0) return netns_delete(argc-1, argv+1); diff --git a/ip/ipntable.c b/ip/ipntable.c index ea7ca2d2..5e84b951 100644 --- a/ip/ipntable.c +++ b/ip/ipntable.c @@ -313,7 +313,7 @@ static int ipntable_modify(int cmd, int flags, int argc, char **argv) RTA_PAYLOAD(parms_rta)); } - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) exit(2); return 0; diff --git a/ip/ipprefix.c b/ip/ipprefix.c index 02c0efce..26b59615 100644 --- a/ip/ipprefix.c +++ b/ip/ipprefix.c @@ -80,7 +80,9 @@ int print_prefix(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) pfx = (struct in6_addr *)RTA_DATA(tb[PREFIX_ADDRESS]); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "%s", rt_addr_n2a(family, pfx, + fprintf(fp, "%s", rt_addr_n2a(family, + RTA_PAYLOAD(tb[PREFIX_ADDRESS]), + pfx, abuf, sizeof(abuf))); } fprintf(fp, "/%u ", prefix->prefix_len); diff --git a/ip/iproute.c b/ip/iproute.c index 76d8e36c..3795baf1 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "rt_names.h" @@ -75,19 +76,22 @@ static void usage(void) fprintf(stderr, " [ table TABLE_ID ] [ proto RTPROTO ]\n"); fprintf(stderr, " [ scope SCOPE ] [ metric METRIC ]\n"); fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n"); - fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n"); - fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n"); + fprintf(stderr, "NH := [ via [ FAMILY ] ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n"); + fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | mpls | bridge | link ]\n"); + fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n"); fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n"); fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n"); fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n"); fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n"); fprintf(stderr, " [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n"); + fprintf(stderr, " [ pref PREF ]\n"); fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n"); fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n"); fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n"); fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n"); fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n"); fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n"); + fprintf(stderr, "PREF := [ low | medium | high ]\n"); fprintf(stderr, "TIME := NUMBER[s|ms]\n"); fprintf(stderr, "BOOL := [1|0]\n"); fprintf(stderr, "FEATURES := ecn\n"); @@ -185,8 +189,15 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) (r->rtm_family != filter.msrc.family || (filter.msrc.bitlen >= 0 && filter.msrc.bitlen < r->rtm_src_len))) return 0; - if (filter.rvia.family && r->rtm_family != filter.rvia.family) - return 0; + if (filter.rvia.family) { + int family = r->rtm_family; + if (tb[RTA_VIA]) { + struct rtvia *via = RTA_DATA(tb[RTA_VIA]); + family = via->rtvia_family; + } + if (family != filter.rvia.family) + return 0; + } if (filter.rprefsrc.family && r->rtm_family != filter.rprefsrc.family) return 0; @@ -205,6 +216,12 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) via.family = r->rtm_family; if (tb[RTA_GATEWAY]) memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len/8); + if (tb[RTA_VIA]) { + size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2; + struct rtvia *rtvia = RTA_DATA(tb[RTA_VIA]); + via.family = rtvia->rtvia_family; + memcpy(&via.data, rtvia->rtvia_addr, len); + } } if (filter.rprefsrc.bitlen>0) { memset(&prefsrc, 0, sizeof(prefsrc)); @@ -268,20 +285,6 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) return 1; } -static int calc_host_len(const struct rtmsg *r) -{ - if (r->rtm_family == AF_INET6) - return 128; - else if (r->rtm_family == AF_INET) - return 32; - else if (r->rtm_family == AF_DECnet) - return 16; - else if (r->rtm_family == AF_IPX) - return 80; - else - return -1; -} - static void print_rtax_features(FILE *fp, unsigned int features) { unsigned int of = features; @@ -302,7 +305,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) int len = n->nlmsg_len; struct rtattr * tb[RTA_MAX+1]; char abuf[256]; - int host_len = -1; + int host_len; __u32 table; SPRINT_BUF(b1); static int hz; @@ -320,7 +323,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) return -1; } - host_len = calc_host_len(r); + host_len = af_bit_len(r->rtm_family); parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len); table = rtm_get_table(r, tb); @@ -353,8 +356,9 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[RTA_DST]) { if (r->rtm_dst_len != host_len) { fprintf(fp, "%s/%u ", rt_addr_n2a(r->rtm_family, - RTA_DATA(tb[RTA_DST]), - abuf, sizeof(abuf)), + RTA_PAYLOAD(tb[RTA_DST]), + RTA_DATA(tb[RTA_DST]), + abuf, sizeof(abuf)), r->rtm_dst_len ); } else { @@ -372,8 +376,9 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[RTA_SRC]) { if (r->rtm_src_len != host_len) { fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family, - RTA_DATA(tb[RTA_SRC]), - abuf, sizeof(abuf)), + RTA_PAYLOAD(tb[RTA_SRC]), + RTA_DATA(tb[RTA_SRC]), + abuf, sizeof(abuf)), r->rtm_src_len ); } else { @@ -386,6 +391,13 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) } else if (r->rtm_src_len) { fprintf(fp, "from 0/%u ", r->rtm_src_len); } + if (tb[RTA_NEWDST]) { + fprintf(fp, "as to %s ", format_host(r->rtm_family, + RTA_PAYLOAD(tb[RTA_NEWDST]), + RTA_DATA(tb[RTA_NEWDST]), + abuf, sizeof(abuf)) + ); + } if (r->rtm_tos && filter.tosmask != -1) { SPRINT_BUF(b1); fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1))); @@ -398,6 +410,14 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) RTA_DATA(tb[RTA_GATEWAY]), abuf, sizeof(abuf))); } + if (tb[RTA_VIA]) { + size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2; + struct rtvia *via = RTA_DATA(tb[RTA_VIA]); + fprintf(fp, "via %s %s ", + family_name(via->rtvia_family), + format_host(via->rtvia_family, len, via->rtvia_addr, + abuf, sizeof(abuf))); + } if (tb[RTA_OIF] && filter.oifmask != -1) fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF]))); @@ -415,6 +435,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) */ fprintf(fp, " src %s ", rt_addr_n2a(r->rtm_family, + RTA_PAYLOAD(tb[RTA_PREFSRC]), RTA_DATA(tb[RTA_PREFSRC]), abuf, sizeof(abuf))); } @@ -426,6 +447,8 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) fprintf(fp, "onlink "); if (r->rtm_flags & RTNH_F_PERVASIVE) fprintf(fp, "pervasive "); + if (r->rtm_flags & RTNH_F_OFFLOAD) + fprintf(fp, "offload "); if (r->rtm_flags & RTM_F_NOTIFY) fprintf(fp, "notify "); if (tb[RTA_MARK]) { @@ -612,6 +635,14 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) RTA_DATA(tb[RTA_GATEWAY]), abuf, sizeof(abuf))); } + if (tb[RTA_VIA]) { + size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2; + struct rtvia *via = RTA_DATA(tb[RTA_VIA]); + fprintf(fp, "via %s %s ", + family_name(via->rtvia_family), + format_host(via->rtvia_family, len, via->rtvia_addr, + abuf, sizeof(abuf))); + } if (tb[RTA_FLOW]) { __u32 to = rta_getattr_u32(tb[RTA_FLOW]); __u32 from = to>>16; @@ -643,6 +674,24 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) nh = RTNH_NEXT(nh); } } + if (tb[RTA_PREF]) { + unsigned int pref = rta_getattr_u8(tb[RTA_PREF]); + fprintf(fp, " pref "); + + switch (pref) { + case ICMPV6_ROUTER_PREF_LOW: + fprintf(fp, "low"); + break; + case ICMPV6_ROUTER_PREF_MEDIUM: + fprintf(fp, "medium"); + break; + case ICMPV6_ROUTER_PREF_HIGH: + fprintf(fp, "high"); + break; + default: + fprintf(fp, "%u", pref); + } + } fprintf(fp, "\n"); fflush(fp); return 0; @@ -659,12 +708,23 @@ static int parse_one_nh(struct rtmsg *r, struct rtattr *rta, while (++argv, --argc > 0) { if (strcmp(*argv, "via") == 0) { inet_prefix addr; + int family; NEXT_ARG(); - get_addr(&addr, *argv, r->rtm_family); + family = read_family(*argv); + if (family == AF_UNSPEC) + family = r->rtm_family; + else + NEXT_ARG(); + get_addr(&addr, *argv, family); if (r->rtm_family == AF_UNSPEC) r->rtm_family = addr.family; - rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen); - rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen; + if (addr.family == r->rtm_family) { + rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen); + rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen; + } else { + rta_addattr_l(rta, 4096, RTA_VIA, &addr.family, addr.bytelen+2); + rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen+2; + } } else if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); if ((rtnh->rtnh_ifindex = ll_name_to_index(*argv)) == 0) { @@ -770,14 +830,33 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) if (req.r.rtm_family == AF_UNSPEC) req.r.rtm_family = addr.family; addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &addr.data, addr.bytelen); - } else if (strcmp(*argv, "via") == 0) { + } else if (strcmp(*argv, "as") == 0) { inet_prefix addr; - gw_ok = 1; NEXT_ARG(); + if (strcmp(*argv, "to") == 0) { + NEXT_ARG(); + } get_addr(&addr, *argv, req.r.rtm_family); if (req.r.rtm_family == AF_UNSPEC) req.r.rtm_family = addr.family; - addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen); + addattr_l(&req.n, sizeof(req), RTA_NEWDST, &addr.data, addr.bytelen); + } else if (strcmp(*argv, "via") == 0) { + inet_prefix addr; + int family; + gw_ok = 1; + NEXT_ARG(); + family = read_family(*argv); + if (family == AF_UNSPEC) + family = req.r.rtm_family; + else + NEXT_ARG(); + get_addr(&addr, *argv, family); + if (req.r.rtm_family == AF_UNSPEC) + req.r.rtm_family = addr.family; + if (addr.family == req.r.rtm_family) + addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen); + else + addattr_l(&req.n, sizeof(req), RTA_VIA, &addr.family, addr.bytelen+2); } else if (strcmp(*argv, "from") == 0) { inet_prefix addr; NEXT_ARG(); @@ -796,7 +875,7 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) req.r.rtm_tos = tos; } else if (matches(*argv, "metric") == 0 || matches(*argv, "priority") == 0 || - matches(*argv, "preference") == 0) { + strcmp(*argv, "preference") == 0) { __u32 metric; NEXT_ARG(); if (get_u32(&metric, *argv, 0)) @@ -993,6 +1072,18 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) strcmp(*argv, "oif") == 0) { NEXT_ARG(); d = *argv; + } else if (matches(*argv, "pref") == 0) { + __u8 pref; + NEXT_ARG(); + if (strcmp(*argv, "low") == 0) + pref = ICMPV6_ROUTER_PREF_LOW; + else if (strcmp(*argv, "medium") == 0) + pref = ICMPV6_ROUTER_PREF_MEDIUM; + else if (strcmp(*argv, "high") == 0) + pref = ICMPV6_ROUTER_PREF_HIGH; + else if (get_u8(&pref, *argv, 0)) + invarg("\"pref\" value is invalid\n", *argv); + addattr8(&req.n, sizeof(req), RTA_PREF, pref); } else { int type; inet_prefix dst; @@ -1072,8 +1163,8 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) if (req.r.rtm_family == AF_UNSPEC) req.r.rtm_family = AF_INET; - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) + return -2; return 0; } @@ -1134,9 +1225,9 @@ static int save_route(const struct sockaddr_nl *who, struct nlmsghdr *n, int len = n->nlmsg_len; struct rtmsg *r = NLMSG_DATA(n); struct rtattr *tb[RTA_MAX+1]; - int host_len = -1; + int host_len; - host_len = calc_host_len(r); + host_len = af_bit_len(r->rtm_family); len -= NLMSG_LENGTH(sizeof(*r)); parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len); @@ -1262,8 +1353,14 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action) get_unsigned(&mark, *argv, 0); filter.markmask = -1; } else if (strcmp(*argv, "via") == 0) { + int family; NEXT_ARG(); - get_prefix(&filter.rvia, *argv, do_ipv6); + family = read_family(*argv); + if (family == AF_UNSPEC) + family = do_ipv6; + else + NEXT_ARG(); + get_prefix(&filter.rvia, *argv, family); } else if (strcmp(*argv, "src") == 0) { NEXT_ARG(); get_prefix(&filter.rprefsrc, *argv, do_ipv6); @@ -1529,7 +1626,7 @@ static int iproute_get(int argc, char **argv) if (req.r.rtm_family == AF_UNSPEC) req.r.rtm_family = AF_INET; - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) exit(2); if (connected && !from_ok) { @@ -1565,12 +1662,14 @@ static int iproute_get(int argc, char **argv) tb[RTA_OIF]->rta_type = 0; if (tb[RTA_GATEWAY]) tb[RTA_GATEWAY]->rta_type = 0; + if (tb[RTA_VIA]) + tb[RTA_VIA]->rta_type = 0; if (!idev && tb[RTA_IIF]) tb[RTA_IIF]->rta_type = 0; req.n.nlmsg_flags = NLM_F_REQUEST; req.n.nlmsg_type = RTM_GETROUTE; - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) exit(2); } @@ -1582,8 +1681,9 @@ static int iproute_get(int argc, char **argv) exit(0); } -static int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, - void *arg) +static int restore_handler(const struct sockaddr_nl *nl, + struct rtnl_ctrl_data *ctrl, + struct nlmsghdr *n, void *arg) { int ret; @@ -1591,7 +1691,7 @@ static int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, ll_init_map(&rth); - ret = rtnl_talk(&rth, n, 0, 0, n); + ret = rtnl_talk(&rth, n, n, sizeof(*n)); if ((ret < 0) && (errno == EEXIST)) ret = 0; @@ -1625,7 +1725,9 @@ static int iproute_restore(void) exit(rtnl_from_file(stdin, &restore_handler, NULL)); } -static int show_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, void *arg) +static int show_handler(const struct sockaddr_nl *nl, + struct rtnl_ctrl_data *ctrl, + struct nlmsghdr *n, void *arg) { print_route(nl, n, stdout); return 0; diff --git a/ip/iprule.c b/ip/iprule.c index 366878e9..714278a2 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -66,14 +66,7 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len); - if (r->rtm_family == AF_INET) - host_len = 32; - else if (r->rtm_family == AF_INET6) - host_len = 128; - else if (r->rtm_family == AF_DECnet) - host_len = 16; - else if (r->rtm_family == AF_IPX) - host_len = 80; + host_len = af_bit_len(r->rtm_family); if (n->nlmsg_type == RTM_DELRULE) fprintf(fp, "Deleted "); @@ -89,8 +82,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[FRA_SRC]) { if (r->rtm_src_len != host_len) { fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family, - RTA_DATA(tb[FRA_SRC]), - abuf, sizeof(abuf)), + RTA_PAYLOAD(tb[FRA_SRC]), + RTA_DATA(tb[FRA_SRC]), + abuf, sizeof(abuf)), r->rtm_src_len ); } else { @@ -109,8 +103,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[FRA_DST]) { if (r->rtm_dst_len != host_len) { fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family, - RTA_DATA(tb[FRA_DST]), - abuf, sizeof(abuf)), + RTA_PAYLOAD(tb[FRA_DST]), + RTA_DATA(tb[FRA_DST]), + abuf, sizeof(abuf)), r->rtm_dst_len ); } else { @@ -385,8 +380,8 @@ static int iprule_modify(int cmd, int argc, char **argv) if (!table_ok && cmd == RTM_NEWRULE) req.r.rtm_table = RT_TABLE_MAIN; - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - return 2; + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) + return -2; return 0; } @@ -412,7 +407,7 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *a if (rtnl_open(&rth2, 0) < 0) return -1; - if (rtnl_talk(&rth2, n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth2, n, NULL, 0) < 0) return -2; rtnl_close(&rth2); diff --git a/ip/iptoken.c b/ip/iptoken.c index 5689c2ec..a38194c9 100644 --- a/ip/iptoken.c +++ b/ip/iptoken.c @@ -182,7 +182,7 @@ static int iptoken_set(int argc, char **argv) return -1; } - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) return -2; return 0; @@ -195,6 +195,7 @@ int do_iptoken(int argc, char **argv) if (argc < 1) { return iptoken_list(0, NULL); } else if (matches(argv[0], "list") == 0 || + matches(argv[0], "lst") == 0 || matches(argv[0], "show") == 0) { return iptoken_list(argc - 1, argv + 1); } else if (matches(argv[0], "set") == 0 || diff --git a/ip/iptunnel.c b/ip/iptunnel.c index caf8a28e..be84b83e 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -342,8 +342,8 @@ static void print_tunnel(struct ip_tunnel_parm *p) printf("%s: %s/ip remote %s local %s ", p->name, tnl_strproto(p->iph.protocol), - p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any", - p->iph.saddr ? rt_addr_n2a(AF_INET, &p->iph.saddr, s2, sizeof(s2)) : "any"); + p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any", + p->iph.saddr ? rt_addr_n2a(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any"); if (p->iph.protocol == IPPROTO_IPV6 && (p->i_flags & SIT_ISATAP)) { struct ip_tunnel_prl prl[16]; diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c index 659fa6b6..9c264289 100644 --- a/ip/ipxfrm.c +++ b/ip/ipxfrm.c @@ -288,10 +288,10 @@ void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id, fputs(title, fp); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "src %s ", rt_addr_n2a(family, + fprintf(fp, "src %s ", rt_addr_n2a(family, sizeof(*saddr), saddr, abuf, sizeof(abuf))); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "dst %s", rt_addr_n2a(family, + fprintf(fp, "dst %s", rt_addr_n2a(family, sizeof(id->daddr), &id->daddr, abuf, sizeof(abuf))); fprintf(fp, "%s", _SL_); @@ -455,11 +455,15 @@ void xfrm_selector_print(struct xfrm_selector *sel, __u16 family, fputs(prefix, fp); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "src %s/%u ", rt_addr_n2a(f, &sel->saddr, abuf, sizeof(abuf)), + fprintf(fp, "src %s/%u ", + rt_addr_n2a(f, sizeof(sel->saddr), &sel->saddr, + abuf, sizeof(abuf)), sel->prefixlen_s); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "dst %s/%u ", rt_addr_n2a(f, &sel->daddr, abuf, sizeof(abuf)), + fprintf(fp, "dst %s/%u ", + rt_addr_n2a(f, sizeof(sel->daddr), &sel->daddr, + abuf, sizeof(abuf)), sel->prefixlen_d); if (sel->proto) @@ -689,7 +693,8 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family, if (tb[XFRMA_MARK]) { struct rtattr *rta = tb[XFRMA_MARK]; struct xfrm_mark *m = (struct xfrm_mark *) RTA_DATA(rta); - fprintf(fp, "\tmark %d/0x%x\n", m->v, m->m); + fprintf(fp, "\tmark %#x/%#x", m->v, m->m); + fprintf(fp, "%s", _SL_); } if (tb[XFRMA_ALG_AUTH] && !tb[XFRMA_ALG_AUTH_TRUNC]) { @@ -754,7 +759,8 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family, memset(abuf, '\0', sizeof(abuf)); fprintf(fp, "addr %s", - rt_addr_n2a(family, &e->encap_oa, abuf, sizeof(abuf))); + rt_addr_n2a(family, sizeof(e->encap_oa), &e->encap_oa, + abuf, sizeof(abuf))); fprintf(fp, "%s", _SL_); } @@ -782,7 +788,7 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family, memset(abuf, '\0', sizeof(abuf)); fprintf(fp, "%s", - rt_addr_n2a(family, coa, + rt_addr_n2a(family, sizeof(*coa), coa, abuf, sizeof(abuf))); fprintf(fp, "%s", _SL_); } @@ -1338,6 +1344,7 @@ static int xfrm_selector_upspec_parse(struct xfrm_selector *sel, case IPPROTO_UDP: case IPPROTO_SCTP: case IPPROTO_DCCP: + case IPPROTO_IP: /* to allow shared SA for different protocols */ break; default: fprintf(stderr, "\"sport\" and \"dport\" are invalid with PROTO value \"%s\"\n", strxf_proto(sel->proto)); diff --git a/ip/link_gre.c b/ip/link_gre.c index 1d783876..58f416ca 100644 --- a/ip/link_gre.c +++ b/ip/link_gre.c @@ -53,7 +53,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv, struct { struct nlmsghdr n; struct ifinfomsg i; - char buf[1024]; + char buf[16384]; } req; struct ifinfomsg *ifi = (struct ifinfomsg *)(n + 1); struct rtattr *tb[IFLA_MAX + 1]; @@ -84,7 +84,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv, req.i.ifi_family = preferred_family; req.i.ifi_index = ifi->ifi_index; - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) { + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { get_failed: fprintf(stderr, "Failed to get existing tunnel info.\n"); diff --git a/ip/link_gre6.c b/ip/link_gre6.c index f18919cc..e00ea091 100644 --- a/ip/link_gre6.c +++ b/ip/link_gre6.c @@ -91,7 +91,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv, req.i.ifi_family = preferred_family; req.i.ifi_index = ifi->ifi_index; - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) { + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { get_failed: fprintf(stderr, "Failed to get existing tunnel info.\n"); diff --git a/ip/link_ip6tnl.c b/ip/link_ip6tnl.c index 5ed3d5a2..f771c75d 100644 --- a/ip/link_ip6tnl.c +++ b/ip/link_ip6tnl.c @@ -89,7 +89,7 @@ static int ip6tunnel_parse_opt(struct link_util *lu, int argc, char **argv, req.i.ifi_family = preferred_family; req.i.ifi_index = ifi->ifi_index; - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) { + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { get_failed: fprintf(stderr, "Failed to get existing tunnel info.\n"); @@ -285,6 +285,7 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb if (tb[IFLA_IPTUN_REMOTE]) { fprintf(f, "remote %s ", rt_addr_n2a(AF_INET6, + RTA_PAYLOAD(tb[IFLA_IPTUN_REMOTE]), RTA_DATA(tb[IFLA_IPTUN_REMOTE]), s1, sizeof(s1))); } @@ -292,6 +293,7 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb if (tb[IFLA_IPTUN_LOCAL]) { fprintf(f, "local %s ", rt_addr_n2a(AF_INET6, + RTA_PAYLOAD(tb[IFLA_IPTUN_LOCAL]), RTA_DATA(tb[IFLA_IPTUN_LOCAL]), s1, sizeof(s1))); } diff --git a/ip/link_iptnl.c b/ip/link_iptnl.c index cab174f9..9d6bc986 100644 --- a/ip/link_iptnl.c +++ b/ip/link_iptnl.c @@ -91,7 +91,7 @@ static int iptunnel_parse_opt(struct link_util *lu, int argc, char **argv, req.i.ifi_family = preferred_family; req.i.ifi_index = ifi->ifi_index; - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) { + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { get_failed: fprintf(stderr, "Failed to get existing tunnel info.\n"); diff --git a/ip/link_vti.c b/ip/link_vti.c index 59ac4c48..f3fea338 100644 --- a/ip/link_vti.c +++ b/ip/link_vti.c @@ -71,7 +71,7 @@ static int vti_parse_opt(struct link_util *lu, int argc, char **argv, req.i.ifi_family = preferred_family; req.i.ifi_index = ifi->ifi_index; - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) { + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { get_failed: fprintf(stderr, "Failed to get existing tunnel info.\n"); diff --git a/ip/link_vti6.c b/ip/link_vti6.c index 282896df..c146f791 100644 --- a/ip/link_vti6.c +++ b/ip/link_vti6.c @@ -67,7 +67,7 @@ static int vti6_parse_opt(struct link_util *lu, int argc, char **argv, req.i.ifi_family = preferred_family; req.i.ifi_index = ifi->ifi_index; - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) { + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { get_failed: fprintf(stderr, "Failed to get existing tunnel info.\n"); diff --git a/ip/rtmon.c b/ip/rtmon.c index ff685e53..42b24fb5 100644 --- a/ip/rtmon.c +++ b/ip/rtmon.c @@ -45,8 +45,8 @@ static void write_stamp(FILE *fp) fwrite((void*)n1, 1, NLMSG_ALIGN(n1->nlmsg_len), fp); } -static int dump_msg(const struct sockaddr_nl *who, struct nlmsghdr *n, - void *arg) +static int dump_msg(const struct sockaddr_nl *who, struct rtnl_ctrl_data *ctrl, + struct nlmsghdr *n, void *arg) { FILE *fp = (FILE*)arg; if (!init_phase) @@ -56,6 +56,12 @@ static int dump_msg(const struct sockaddr_nl *who, struct nlmsghdr *n, return 0; } +static int dump_msg2(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) +{ + return dump_msg(who, NULL, n, arg); +} + static void usage(void) { fprintf(stderr, "Usage: rtmon file FILE [ all | LISTofOBJECTS]\n"); @@ -163,7 +169,7 @@ main(int argc, char **argv) write_stamp(fp); - if (rtnl_dump_filter(&rth, dump_msg, fp) < 0) { + if (rtnl_dump_filter(&rth, dump_msg2, fp) < 0) { fprintf(stderr, "Dump terminated\n"); return 1; } diff --git a/ip/tcp_metrics.c b/ip/tcp_metrics.c index bbbb4cc0..bdc503ef 100644 --- a/ip/tcp_metrics.c +++ b/ip/tcp_metrics.c @@ -467,10 +467,10 @@ static int tcpm_do_cmd(int cmd, int argc, char **argv) } if (ack) { - if (rtnl_talk(&grth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&grth, &req.n, NULL, 0) < 0) return -2; } else if (atype >= 0) { - if (rtnl_talk(&grth, &req.n, 0, 0, &req.n) < 0) + if (rtnl_talk(&grth, &req.n, &req.n, sizeof(req)) < 0) return -2; if (process_msg(NULL, &req.n, stdout) < 0) { fprintf(stderr, "Dump terminated\n"); diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c index 79453e41..ebccb71c 100644 --- a/ip/xfrm_monitor.c +++ b/ip/xfrm_monitor.c @@ -27,16 +27,20 @@ #include #include #include +#include #include + #include "utils.h" #include "xfrm.h" #include "ip_common.h" static void usage(void) __attribute__((noreturn)); +int listen_all_nsid; static void usage(void) { - fprintf(stderr, "Usage: ip xfrm monitor [ all | LISTofXFRM-OBJECTS ]\n"); + fprintf(stderr, "Usage: ip xfrm monitor [all-nsid] [ all | OBJECTS | help ]\n"); + fprintf(stderr, "OBJECTS := { acquire | expire | SA | aevent | policy | report }\n"); exit(-1); } @@ -226,7 +230,8 @@ static void xfrm_usersa_print(const struct xfrm_usersa_id *sa_id, __u32 reqid, F buf[0] = 0; fprintf(fp, "dst %s ", - rt_addr_n2a(sa_id->family, &sa_id->daddr, buf, sizeof(buf))); + rt_addr_n2a(sa_id->family, sizeof(sa_id->daddr), &sa_id->daddr, + buf, sizeof(buf))); fprintf(fp, " reqid 0x%x", reqid); @@ -245,7 +250,8 @@ static int xfrm_ae_print(const struct sockaddr_nl *who, xfrm_ae_flags_print(id->flags, arg); fprintf(fp,"\n\t"); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family, &id->saddr, + fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family, + sizeof(id->saddr), &id->saddr, abuf, sizeof(abuf))); xfrm_usersa_print(&id->sa_id, id->reqid, fp); @@ -261,7 +267,7 @@ static void xfrm_print_addr(FILE *fp, int family, xfrm_address_t *a) char buf[256]; buf[0] = 0; - fprintf(fp, "%s", rt_addr_n2a(family, a, buf, sizeof(buf))); + fprintf(fp, "%s", rt_addr_n2a(family, sizeof(*a), a, buf, sizeof(buf))); } static int xfrm_mapping_print(const struct sockaddr_nl *who, @@ -285,6 +291,7 @@ static int xfrm_mapping_print(const struct sockaddr_nl *who, } static int xfrm_accept_msg(const struct sockaddr_nl *who, + struct rtnl_ctrl_data *ctrl, struct nlmsghdr *n, void *arg) { FILE *fp = (FILE*)arg; @@ -292,6 +299,13 @@ static int xfrm_accept_msg(const struct sockaddr_nl *who, if (timestamp) print_timestamp(fp); + if (listen_all_nsid) { + if (ctrl == NULL || ctrl->nsid < 0) + fprintf(fp, "[nsid current]"); + else + fprintf(fp, "[nsid %d]", ctrl->nsid); + } + switch (n->nlmsg_type) { case XFRM_MSG_NEWSA: case XFRM_MSG_DELSA: @@ -354,6 +368,8 @@ int do_xfrm_monitor(int argc, char **argv) if (matches(*argv, "file") == 0) { NEXT_ARG(); file = *argv; + } else if (matches(*argv, "all-nsid") == 0) { + listen_all_nsid = 1; } else if (matches(*argv, "acquire") == 0) { lacquire=1; groups = 0; @@ -374,7 +390,7 @@ int do_xfrm_monitor(int argc, char **argv) groups = 0; } else if (matches(*argv, "help") == 0) { usage(); - } else { + } else if (strcmp(*argv, "all")) { fprintf(stderr, "Argument \"%s\" is unknown, try \"ip xfrm monitor help\".\n", *argv); exit(-1); } @@ -406,6 +422,8 @@ int do_xfrm_monitor(int argc, char **argv) if (rtnl_open_byproto(&rth, groups, NETLINK_XFRM) < 0) exit(1); + if (listen_all_nsid && rtnl_listen_all_nsid(&rth) < 0) + exit(1); if (rtnl_listen(&rth, xfrm_accept_msg, (void*)stdout) < 0) exit(2); diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c index 2337d352..8f4d1a07 100644 --- a/ip/xfrm_policy.c +++ b/ip/xfrm_policy.c @@ -63,7 +63,8 @@ static void usage(void) fprintf(stderr, " [ index INDEX ] [ ptype PTYPE ] [ action ACTION ] [ priority PRIORITY ]\n"); fprintf(stderr, " [ flag FLAG-LIST ]\n"); fprintf(stderr, "Usage: ip xfrm policy flush [ ptype PTYPE ]\n"); - fprintf(stderr, "Usage: ip xfrm count\n"); + fprintf(stderr, "Usage: ip xfrm policy count\n"); + fprintf(stderr, "Usage: ip xfrm policy set [ hthresh4 LBITS RBITS ] [ hthresh6 LBITS RBITS ]\n"); fprintf(stderr, "SELECTOR := [ src ADDR[/PLEN] ] [ dst ADDR[/PLEN] ] [ dev DEV ] [ UPSPEC ]\n"); fprintf(stderr, "UPSPEC := proto { { "); fprintf(stderr, "%s | ", strxf_proto(IPPROTO_TCP)); @@ -392,7 +393,7 @@ static int xfrm_policy_modify(int cmd, unsigned flags, int argc, char **argv) if (req.xpinfo.sel.family == AF_UNSPEC) req.xpinfo.sel.family = AF_INET; - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) exit(2); rtnl_close(&rth); @@ -554,7 +555,7 @@ int xfrm_policy_print(const struct sockaddr_nl *who, struct nlmsghdr *n, } static int xfrm_policy_get_or_delete(int argc, char **argv, int delete, - void *res_nlbuf) + void *res_nlbuf, size_t res_size) { struct rtnl_handle rth; struct { @@ -669,7 +670,7 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete, (void *)&ctx, ctx.sctx.len); } - if (rtnl_talk(&rth, &req.n, 0, 0, res_nlbuf) < 0) + if (rtnl_talk(&rth, &req.n, res_nlbuf, res_size) < 0) exit(2); rtnl_close(&rth); @@ -679,7 +680,7 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete, static int xfrm_policy_delete(int argc, char **argv) { - return xfrm_policy_get_or_delete(argc, argv, 1, NULL); + return xfrm_policy_get_or_delete(argc, argv, 1, NULL, 0); } static int xfrm_policy_get(int argc, char **argv) @@ -689,7 +690,7 @@ static int xfrm_policy_get(int argc, char **argv) memset(buf, 0, sizeof(buf)); - xfrm_policy_get_or_delete(argc, argv, 0, n); + xfrm_policy_get_or_delete(argc, argv, 0, n, sizeof(buf)); if (xfrm_policy_print(NULL, n, (void*)stdout) < 0) { fprintf(stderr, "An error :-)\n"); @@ -847,13 +848,23 @@ static int xfrm_policy_list_or_deleteall(int argc, char **argv, int deleteall) xb.rth = &rth; for (i = 0; ; i++) { + struct { + struct nlmsghdr n; + char buf[NLMSG_BUF_SIZE]; + } req = { + .n.nlmsg_len = NLMSG_HDRLEN, + .n.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .n.nlmsg_type = XFRM_MSG_GETPOLICY, + .n.nlmsg_seq = rth.dump = ++rth.seq, + }; + xb.offset = 0; xb.nlmsg_count = 0; if (show_stats > 1) fprintf(stderr, "Delete-all round = %d\n", i); - if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETPOLICY) < 0) { + if (rtnl_send(&rth, (void *)&req, req.n.nlmsg_len) < 0) { perror("Cannot send dump request"); exit(1); } @@ -879,7 +890,17 @@ static int xfrm_policy_list_or_deleteall(int argc, char **argv, int deleteall) xb.nlmsg_count = 0; } } else { - if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETPOLICY) < 0) { + struct { + struct nlmsghdr n; + char buf[NLMSG_BUF_SIZE]; + } req = { + .n.nlmsg_len = NLMSG_HDRLEN, + .n.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .n.nlmsg_type = XFRM_MSG_GETPOLICY, + .n.nlmsg_seq = rth.dump = ++rth.seq, + }; + + if (rtnl_send(&rth, (void *)&req, req.n.nlmsg_len) < 0) { perror("Cannot send dump request"); exit(1); } @@ -934,7 +955,7 @@ static int print_spdinfo( struct nlmsghdr *n, void *arg) fprintf(fp,")"); } - fprintf(fp,"\n"); + fprintf(fp, "%s", _SL_); } if (show_stats > 1) { struct xfrmu_spdhinfo *sh; @@ -948,13 +969,109 @@ static int print_spdinfo( struct nlmsghdr *n, void *arg) fprintf(fp,"\t SPD buckets:"); fprintf(fp," count %d", sh->spdhcnt); fprintf(fp," Max %d", sh->spdhmcnt); + fprintf(fp, "%s", _SL_); + } + if (tb[XFRMA_SPD_IPV4_HTHRESH]) { + struct xfrmu_spdhthresh *th; + if (RTA_PAYLOAD(tb[XFRMA_SPD_IPV4_HTHRESH]) < sizeof(*th)) { + fprintf(stderr, "SPDinfo: Wrong len %d\n", len); + return -1; + } + th = RTA_DATA(tb[XFRMA_SPD_IPV4_HTHRESH]); + fprintf(fp,"\t SPD IPv4 thresholds:"); + fprintf(fp," local %d", th->lbits); + fprintf(fp," remote %d", th->rbits); + fprintf(fp, "%s", _SL_); + + } + if (tb[XFRMA_SPD_IPV6_HTHRESH]) { + struct xfrmu_spdhthresh *th; + if (RTA_PAYLOAD(tb[XFRMA_SPD_IPV6_HTHRESH]) < sizeof(*th)) { + fprintf(stderr, "SPDinfo: Wrong len %d\n", len); + return -1; + } + th = RTA_DATA(tb[XFRMA_SPD_IPV6_HTHRESH]); + fprintf(fp,"\t SPD IPv6 thresholds:"); + fprintf(fp," local %d", th->lbits); + fprintf(fp," remote %d", th->rbits); + fprintf(fp, "%s", _SL_); } } - fprintf(fp,"\n"); + + if (oneline) + fprintf(fp, "\n"); return 0; } +static int xfrm_spd_setinfo(int argc, char **argv) +{ + struct rtnl_handle rth; + struct { + struct nlmsghdr n; + __u32 flags; + char buf[RTA_BUF_SIZE]; + } req; + + char *thr4 = NULL; + char *thr6 = NULL; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(__u32)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = XFRM_MSG_NEWSPDINFO; + req.flags = 0XFFFFFFFF; + + while (argc > 0) { + if (strcmp(*argv, "hthresh4") == 0) { + struct xfrmu_spdhthresh thr; + + if (thr4) + duparg("hthresh4", *argv); + thr4 = *argv; + NEXT_ARG(); + if (get_u8(&thr.lbits, *argv, 0) || thr.lbits > 32) + invarg("hthresh4 LBITS value is invalid", *argv); + NEXT_ARG(); + if (get_u8(&thr.rbits, *argv, 0) || thr.rbits > 32) + invarg("hthresh4 RBITS value is invalid", *argv); + + addattr_l(&req.n, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, + (void *)&thr, sizeof(thr)); + } else if (strcmp(*argv, "hthresh6") == 0) { + struct xfrmu_spdhthresh thr; + + if (thr6) + duparg("hthresh6", *argv); + thr6 = *argv; + NEXT_ARG(); + if (get_u8(&thr.lbits, *argv, 0) || thr.lbits > 128) + invarg("hthresh6 LBITS value is invalid", *argv); + NEXT_ARG(); + if (get_u8(&thr.rbits, *argv, 0) || thr.rbits > 128) + invarg("hthresh6 RBITS value is invalid", *argv); + + addattr_l(&req.n, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, + (void *)&thr, sizeof(thr)); + } else { + invarg("unknown", *argv); + } + + argc--; argv++; + } + + if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0) + exit(1); + + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) + exit(2); + + rtnl_close(&rth); + + return 0; +} + static int xfrm_spd_getinfo(int argc, char **argv) { struct rtnl_handle rth; @@ -974,7 +1091,7 @@ static int xfrm_spd_getinfo(int argc, char **argv) if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0) exit(1); - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) exit(2); print_spdinfo(&req.n, (void*)stdout); @@ -1026,7 +1143,7 @@ static int xfrm_policy_flush(int argc, char **argv) if (show_stats > 1) fprintf(stderr, "Flush policy\n"); - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) exit(2); rtnl_close(&rth); @@ -1058,6 +1175,8 @@ int do_xfrm_policy(int argc, char **argv) return xfrm_policy_flush(argc-1, argv+1); if (matches(*argv, "count") == 0) return xfrm_spd_getinfo(argc, argv); + if (matches(*argv, "set") == 0) + return xfrm_spd_setinfo(argc-1, argv+1); if (matches(*argv, "help") == 0) usage(); fprintf(stderr, "Command \"%s\" is unknown, try \"ip xfrm policy help\".\n", *argv); diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index 2ad3d8d3..d2831d00 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -688,7 +688,7 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv) if (req.xsinfo.family == AF_UNSPEC) req.xsinfo.family = AF_INET; - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) exit(2); rtnl_close(&rth); @@ -825,7 +825,7 @@ static int xfrm_state_allocspi(int argc, char **argv) req.xspi.info.family = AF_INET; - if (rtnl_talk(&rth, &req.n, 0, 0, res_n) < 0) + if (rtnl_talk(&rth, &req.n, res_n, sizeof(res_buf)) < 0) exit(2); if (xfrm_state_print(NULL, res_n, (void*)stdout) < 0) { @@ -1015,7 +1015,7 @@ static int xfrm_state_get_or_delete(int argc, char **argv, int delete) req.xsid.family = AF_INET; if (delete) { - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) exit(2); } else { char buf[NLMSG_BUF_SIZE]; @@ -1023,7 +1023,7 @@ static int xfrm_state_get_or_delete(int argc, char **argv, int delete) memset(buf, 0, sizeof(buf)); - if (rtnl_talk(&rth, &req.n, 0, 0, res_n) < 0) + if (rtnl_talk(&rth, &req.n, res_n, sizeof(req)) < 0) exit(2); if (xfrm_state_print(NULL, res_n, (void*)stdout) < 0) { @@ -1148,13 +1148,23 @@ static int xfrm_state_list_or_deleteall(int argc, char **argv, int deleteall) xb.rth = &rth; for (i = 0; ; i++) { + struct { + struct nlmsghdr n; + char buf[NLMSG_BUF_SIZE]; + } req = { + .n.nlmsg_len = NLMSG_HDRLEN, + .n.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .n.nlmsg_type = XFRM_MSG_GETSA, + .n.nlmsg_seq = rth.dump = ++rth.seq, + }; + xb.offset = 0; xb.nlmsg_count = 0; if (show_stats > 1) fprintf(stderr, "Delete-all round = %d\n", i); - if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETSA) < 0) { + if (rtnl_send(&rth, (void *)&req, req.n.nlmsg_len) < 0) { perror("Cannot send dump request"); exit(1); } @@ -1287,7 +1297,7 @@ static int xfrm_sad_getinfo(int argc, char **argv) if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0) exit(1); - if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) exit(2); print_sadinfo(&req.n, (void*)stdout); @@ -1341,7 +1351,7 @@ static int xfrm_state_flush(int argc, char **argv) fprintf(stderr, "Flush state with XFRM-PROTO value \"%s\"\n", strxf_xfrmproto(req.xsf.proto)); - if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) exit(2); rtnl_close(&rth); diff --git a/lib/Makefile b/lib/Makefile index 66f89f1d..1d4045fc 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -6,7 +6,8 @@ endif CFLAGS += -fPIC -UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o namespace.o +UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o namespace.o \ + names.o color.o NLOBJ=libgenl.o ll_map.o libnetlink.o diff --git a/lib/color.c b/lib/color.c new file mode 100644 index 00000000..8c9a48ba --- /dev/null +++ b/lib/color.c @@ -0,0 +1,64 @@ +#include +#include + +#include "color.h" + +enum color { + C_RED, + C_GREEN, + C_YELLOW, + C_BLUE, + C_MAGENTA, + C_CYAN, + C_WHITE, + C_CLEAR +}; + +static const char * const color_codes[] = { + "\e[31m", + "\e[32m", + "\e[33m", + "\e[34m", + "\e[35m", + "\e[36m", + "\e[37m", + "\e[0m", + NULL, +}; + +static enum color attr_colors[] = { + C_CYAN, + C_YELLOW, + C_MAGENTA, + C_BLUE, + C_GREEN, + C_RED +}; + +static int color_is_enabled; + +void enable_color(void) +{ + color_is_enabled = 1; +} + +int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...) +{ + int ret = 0; + va_list args; + + va_start(args, fmt); + + if (!color_is_enabled) { + ret = vfprintf(fp, fmt, args); + goto end; + } + + ret += fprintf(fp, "%s", color_codes[attr_colors[attr]]); + ret += vfprintf(fp, fmt, args); + ret += fprintf(fp, "%s", color_codes[C_CLEAR]); + +end: + va_end(args); + return ret; +} diff --git a/lib/libgenl.c b/lib/libgenl.c index ef3e5db6..acb14783 100644 --- a/lib/libgenl.c +++ b/lib/libgenl.c @@ -53,7 +53,7 @@ int genl_resolve_family(struct rtnl_handle *grth, const char *family) addattr_l(&req.n, sizeof(req), CTRL_ATTR_FAMILY_NAME, family, strlen(family) + 1); - if (rtnl_talk(grth, &req.n, 0, 0, &req.n) < 0) { + if (rtnl_talk(grth, &req.n, &req.n, sizeof(req)) < 0) { fprintf(stderr, "Error talking to the kernel\n"); return -2; } diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 77e07ef7..46cac34c 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -25,6 +25,14 @@ #include "libnetlink.h" +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + int rcvbuf = 1024 * 1024; void rtnl_close(struct rtnl_handle *rth) @@ -300,8 +308,8 @@ int rtnl_dump_filter(struct rtnl_handle *rth, return rtnl_dump_filter_l(rth, a); } -int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, - unsigned groups, struct nlmsghdr *answer) +int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr *answer, size_t len) { int status; unsigned seq; @@ -317,12 +325,10 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, .msg_iov = &iov, .msg_iovlen = 1, }; - char buf[16384]; + char buf[32768]; memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; - nladdr.nl_pid = peer; - nladdr.nl_groups = groups; n->nlmsg_seq = seq = ++rtnl->seq; @@ -330,7 +336,6 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, n->nlmsg_flags |= NLM_F_ACK; status = sendmsg(rtnl->fd, &msg, 0); - if (status < 0) { perror("Cannot talk to rtnetlink"); return -1; @@ -339,7 +344,6 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, memset(buf,0,sizeof(buf)); iov.iov_base = buf; - while (1) { iov.iov_len = sizeof(buf); status = recvmsg(rtnl->fd, &msg, 0); @@ -372,7 +376,7 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, exit(1); } - if (nladdr.nl_pid != peer || + if (nladdr.nl_pid != 0 || h->nlmsg_pid != rtnl->local.nl_pid || h->nlmsg_seq != seq) { /* Don't forget to skip that message. */ @@ -385,20 +389,22 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h); if (l < sizeof(struct nlmsgerr)) { fprintf(stderr, "ERROR truncated\n"); - } else { - if (!err->error) { - if (answer) - memcpy(answer, h, h->nlmsg_len); - return 0; - } - - fprintf(stderr, "RTNETLINK answers: %s\n", strerror(-err->error)); - errno = -err->error; + } else if (!err->error) { + if (answer) + memcpy(answer, h, + MIN(len, h->nlmsg_len)); + return 0; } + + fprintf(stderr, "RTNETLINK answers: %s\n", + strerror(-err->error)); + errno = -err->error; return -1; } + if (answer) { - memcpy(answer, h, h->nlmsg_len); + memcpy(answer, h, + MIN(len, h->nlmsg_len)); return 0; } @@ -407,10 +413,12 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, status -= NLMSG_ALIGN(len); h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len)); } + if (msg.msg_flags & MSG_TRUNC) { fprintf(stderr, "Message truncated\n"); continue; } + if (status) { fprintf(stderr, "!!!Remnant of size %d\n", status); exit(1); @@ -418,8 +426,21 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, } } +int rtnl_listen_all_nsid(struct rtnl_handle *rth) +{ + unsigned int on = 1; + + if (setsockopt(rth->fd, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, &on, + sizeof(on)) < 0) { + perror("NETLINK_LISTEN_ALL_NSID"); + return -1; + } + rth->flags |= RTNL_HANDLE_F_LISTEN_ALL_NSID; + return 0; +} + int rtnl_listen(struct rtnl_handle *rtnl, - rtnl_filter_t handler, + rtnl_listen_filter_t handler, void *jarg) { int status; @@ -433,6 +454,12 @@ int rtnl_listen(struct rtnl_handle *rtnl, .msg_iovlen = 1, }; char buf[16384]; + char cmsgbuf[BUFSIZ]; + + if (rtnl->flags & RTNL_HANDLE_F_LISTEN_ALL_NSID) { + msg.msg_control = &cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + } memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; @@ -441,6 +468,9 @@ int rtnl_listen(struct rtnl_handle *rtnl, iov.iov_base = buf; while (1) { + struct rtnl_ctrl_data ctrl; + struct cmsghdr *cmsg; + iov.iov_len = sizeof(buf); status = recvmsg(rtnl->fd, &msg, 0); @@ -461,6 +491,21 @@ int rtnl_listen(struct rtnl_handle *rtnl, fprintf(stderr, "Sender address length == %d\n", msg.msg_namelen); exit(1); } + + if (rtnl->flags & RTNL_HANDLE_F_LISTEN_ALL_NSID) { + memset(&ctrl, 0, sizeof(ctrl)); + ctrl.nsid = -1; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; + cmsg = CMSG_NXTHDR(&msg, cmsg)) + if (cmsg->cmsg_level == SOL_NETLINK && + cmsg->cmsg_type == NETLINK_LISTEN_ALL_NSID && + cmsg->cmsg_len == CMSG_LEN(sizeof(int))) { + int *data = (int *)CMSG_DATA(cmsg); + + ctrl.nsid = *data; + } + } + for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) { int err; int len = h->nlmsg_len; @@ -475,7 +520,7 @@ int rtnl_listen(struct rtnl_handle *rtnl, exit(1); } - err = handler(&nladdr, h, jarg); + err = handler(&nladdr, &ctrl, h, jarg); if (err < 0) return err; @@ -493,7 +538,7 @@ int rtnl_listen(struct rtnl_handle *rtnl, } } -int rtnl_from_file(FILE *rtnl, rtnl_filter_t handler, +int rtnl_from_file(FILE *rtnl, rtnl_listen_filter_t handler, void *jarg) { int status; @@ -541,7 +586,7 @@ int rtnl_from_file(FILE *rtnl, rtnl_filter_t handler, return -1; } - err = handler(&nladdr, h, jarg); + err = handler(&nladdr, NULL, h, jarg); if (err < 0) return err; } diff --git a/lib/ll_addr.c b/lib/ll_addr.c index c12ab075..2ce9abfb 100644 --- a/lib/ll_addr.c +++ b/lib/ll_addr.c @@ -29,7 +29,7 @@ #include "utils.h" -const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen) +const char *ll_addr_n2a(const unsigned char *addr, int alen, int type, char *buf, int blen) { int i; int l; diff --git a/lib/ll_map.c b/lib/ll_map.c index db34a2aa..c6f70274 100644 --- a/lib/ll_map.c +++ b/lib/ll_map.c @@ -52,7 +52,7 @@ static struct ll_cache *ll_get_by_index(unsigned index) return NULL; } -static unsigned namehash(const char *str) +unsigned namehash(const char *str) { unsigned hash = 5381; diff --git a/lib/mpls_ntop.c b/lib/mpls_ntop.c new file mode 100644 index 00000000..945d6d5e --- /dev/null +++ b/lib/mpls_ntop.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include + +#include "utils.h" + +static const char *mpls_ntop1(const struct mpls_label *addr, char *buf, size_t buflen) +{ + size_t destlen = buflen; + char *dest = buf; + int count; + + for (count = 0; count < MPLS_MAX_LABELS; count++) { + uint32_t entry = ntohl(addr[count].entry); + uint32_t label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + int len = snprintf(dest, destlen, "%u", label); + + /* Is this the end? */ + if (entry & MPLS_LS_S_MASK) + return buf; + + + dest += len; + destlen -= len; + if (destlen) { + *dest = '/'; + dest++; + destlen--; + } + } + errno = -E2BIG; + return NULL; +} + +const char *mpls_ntop(int af, const void *addr, char *buf, size_t buflen) +{ + switch(af) { + case AF_MPLS: + errno = 0; + return mpls_ntop1((struct mpls_label *)addr, buf, buflen); + default: + errno = EAFNOSUPPORT; + } + + return NULL; +} diff --git a/lib/mpls_pton.c b/lib/mpls_pton.c new file mode 100644 index 00000000..bd448cfc --- /dev/null +++ b/lib/mpls_pton.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +#include "utils.h" + + +static int mpls_pton1(const char *name, struct mpls_label *addr) +{ + char *endp; + unsigned count; + + for (count = 0; count < MPLS_MAX_LABELS; count++) { + unsigned long label; + + label = strtoul(name, &endp, 0); + /* Fail when the label value is out or range */ + if (label >= (1 << 20)) + return 0; + + if (endp == name) /* no digits */ + return 0; + + addr->entry = htonl(label << MPLS_LS_LABEL_SHIFT); + if (*endp == '\0') { + addr->entry |= htonl(1 << MPLS_LS_S_SHIFT); + return 1; + } + + /* Bad character in the address */ + if (*endp != '/') + return 0; + + name = endp + 1; + addr += 1; + } + /* The address was too long */ + return 0; +} + +int mpls_pton(int af, const char *src, void *addr) +{ + int err; + + switch(af) { + case AF_MPLS: + errno = 0; + err = mpls_pton1(src, (struct mpls_label *)addr); + break; + default: + errno = EAFNOSUPPORT; + err = -1; + } + + return err; +} diff --git a/lib/names.c b/lib/names.c new file mode 100644 index 00000000..3b5b0b1e --- /dev/null +++ b/lib/names.c @@ -0,0 +1,183 @@ +/* + * names.c db names + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include + +#include "names.h" +#include "utils.h" + +#define MAX_ENTRIES 256 +#define NAME_MAX_LEN 512 + +static int read_id_name(FILE *fp, int *id, char *name) +{ + char buf[NAME_MAX_LEN]; + int min, maj; + + while (fgets(buf, sizeof(buf), fp)) { + char *p = buf; + + while (*p == ' ' || *p == '\t') + p++; + + if (*p == '#' || *p == '\n' || *p == 0) + continue; + + if (sscanf(p, "%x:%x %s\n", &maj, &min, name) == 3) { + *id = (maj << 16) | min; + } else if (sscanf(p, "%x:%x %s #", &maj, &min, name) == 3) { + *id = (maj << 16) | min; + } else if (sscanf(p, "0x%x %s\n", id, name) != 2 && + sscanf(p, "0x%x %s #", id, name) != 2 && + sscanf(p, "%d %s\n", id, name) != 2 && + sscanf(p, "%d %s #", id, name) != 2) { + strcpy(name, p); + return -1; + } + return 1; + } + + return 0; +} + +struct db_names *db_names_alloc(void) +{ + struct db_names *db; + + db = malloc(sizeof(*db)); + if (!db) + return NULL; + + memset(db, 0, sizeof(*db)); + + db->size = MAX_ENTRIES; + db->hash = malloc(sizeof(struct db_entry *) * db->size); + memset(db->hash, 0, sizeof(struct db_entry *) * db->size); + + return db; +} + +int db_names_load(struct db_names *db, const char *path) +{ + struct db_entry *entry; + FILE *fp; + int id; + char namebuf[NAME_MAX_LEN] = {0}; + int ret = -1; + + fp = fopen(path, "r"); + if (!fp) + return -ENOENT; + + while ((ret = read_id_name(fp, &id, &namebuf[0]))) { + if (ret == -1) { + fprintf(stderr, "Database %s is corrupted at %s\n", + path, namebuf); + goto Exit; + } + ret = -1; + + if (id < 0) + continue; + + entry = malloc(sizeof(*entry)); + if (!entry) + goto Exit; + + entry->name = strdup(namebuf); + if (!entry->name) { + free(entry); + goto Exit; + } + + entry->id = id; + entry->next = db->hash[id & (db->size - 1)]; + db->hash[id & (db->size - 1)] = entry; + } + ret = 0; + +Exit: + fclose(fp); + return ret; +} + +void db_names_free(struct db_names *db) +{ + int i; + + if (!db) + return; + + for (i = 0; i < db->size; i++) { + struct db_entry *entry = db->hash[i]; + + while (entry) { + struct db_entry *next = entry->next; + + free(entry->name); + free(entry); + entry = next; + } + } + + free(db->hash); + free(db); +} + +char *id_to_name(struct db_names *db, int id, char *name) +{ + struct db_entry *entry; + + if (!db) + return NULL; + + entry = db->hash[id & (db->size - 1)]; + while (entry && entry->id != id) + entry = entry->next; + + if (entry) { + strncpy(name, entry->name, IDNAME_MAX); + return name; + } + + snprintf(name, IDNAME_MAX, "%d", id); + return NULL; +} + +int name_to_id(struct db_names *db, int *id, const char *name) +{ + struct db_entry *entry; + int i; + + if (!db) + return -1; + + if (db->cached && strcmp(db->cached->name, name) == 0) { + *id = db->cached->id; + return 0; + } + + for (i = 0; i < db->size; i++) { + entry = db->hash[i]; + while (entry && strcmp(entry->name, name)) + entry = entry->next; + + if (entry) { + db->cached = entry; + *id = entry->id; + return 0; + } + } + + return -1; +} diff --git a/lib/utils.c b/lib/utils.c index efebe189..29b4f548 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -25,11 +25,13 @@ #include #include #include +#include +#include #include #include #include - +#include "rt_names.h" #include "utils.h" #include "namespace.h" @@ -389,7 +391,7 @@ int get_addr_1(inet_prefix *addr, const char *name, int family) if (strcmp(name, "default") == 0 || strcmp(name, "all") == 0 || strcmp(name, "any") == 0) { - if (family == AF_DECnet) + if ((family == AF_DECnet) || (family == AF_MPLS)) return -1; addr->family = family; addr->bytelen = (family == AF_INET6 ? 16 : 4); @@ -397,6 +399,18 @@ int get_addr_1(inet_prefix *addr, const char *name, int family) return 0; } + if (family == AF_PACKET) { + int len; + len = ll_addr_a2n((char *)&addr->data, sizeof(addr->data), name); + if (len < 0) + return -1; + + addr->family = AF_PACKET; + addr->bytelen = len; + addr->bitlen = len * 8; + return 0; + } + if (strchr(name, ':')) { addr->family = AF_INET6; if (family != AF_UNSPEC && family != AF_INET6) @@ -419,6 +433,23 @@ int get_addr_1(inet_prefix *addr, const char *name, int family) return 0; } + if (family == AF_MPLS) { + int i; + addr->family = AF_MPLS; + if (mpls_pton(AF_MPLS, name, addr->data) <= 0) + return -1; + addr->bytelen = 4; + addr->bitlen = 20; + /* How many bytes do I need? */ + for (i = 0; i < 8; i++) { + if (ntohl(addr->data[i]) & MPLS_LS_S_MASK) { + addr->bytelen = (i + 1)*4; + break; + } + } + return 0; + } + addr->family = AF_INET; if (family != AF_UNSPEC && family != AF_INET) return -1; @@ -431,6 +462,29 @@ int get_addr_1(inet_prefix *addr, const char *name, int family) return 0; } +int af_bit_len(int af) +{ + switch (af) { + case AF_INET6: + return 128; + case AF_INET: + return 32; + case AF_DECnet: + return 16; + case AF_IPX: + return 80; + case AF_MPLS: + return 20; + } + + return 0; +} + +int af_byte_len(int af) +{ + return af_bit_len(af) / 8; +} + int get_prefix_1(inet_prefix *dst, char *arg, int family) { int err; @@ -442,7 +496,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) if (strcmp(arg, "default") == 0 || strcmp(arg, "any") == 0 || strcmp(arg, "all") == 0) { - if (family == AF_DECnet) + if ((family == AF_DECnet) || (family == AF_MPLS)) return -1; dst->family = family; dst->bytelen = 0; @@ -456,17 +510,8 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) err = get_addr_1(dst, arg, family); if (err == 0) { - switch(dst->family) { - case AF_INET6: - dst->bitlen = 128; - break; - case AF_DECnet: - dst->bitlen = 16; - break; - default: - case AF_INET: - dst->bitlen = 32; - } + dst->bitlen = af_bit_len(dst->family); + if (slash) { if (get_netmask(&plen, slash+1, 0) || plen > dst->bitlen) { @@ -485,10 +530,6 @@ done: int get_addr(inet_prefix *dst, const char *arg, int family) { - if (family == AF_PACKET) { - fprintf(stderr, "Error: \"%s\" may be inet address, but it is not allowed in this context.\n", arg); - exit(1); - } if (get_addr_1(dst, arg, family)) { fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", arg); exit(1); @@ -624,12 +665,14 @@ int __get_user_hz(void) return sysconf(_SC_CLK_TCK); } -const char *rt_addr_n2a(int af, const void *addr, char *buf, int buflen) +const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen) { switch (af) { case AF_INET: case AF_INET6: return inet_ntop(af, addr, buf, buflen); + case AF_MPLS: + return mpls_ntop(af, addr, buf, buflen); case AF_IPX: return ipx_ntop(af, addr, buf, buflen); case AF_DECnet: @@ -638,11 +681,52 @@ const char *rt_addr_n2a(int af, const void *addr, char *buf, int buflen) memcpy(dna.a_addr, addr, 2); return dnet_ntop(af, &dna, buf, buflen); } + case AF_PACKET: + return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen); default: return "???"; } } +int read_family(const char *name) +{ + int family = AF_UNSPEC; + if (strcmp(name, "inet") == 0) + family = AF_INET; + else if (strcmp(name, "inet6") == 0) + family = AF_INET6; + else if (strcmp(name, "dnet") == 0) + family = AF_DECnet; + else if (strcmp(name, "link") == 0) + family = AF_PACKET; + else if (strcmp(name, "ipx") == 0) + family = AF_IPX; + else if (strcmp(name, "mpls") == 0) + family = AF_MPLS; + else if (strcmp(name, "bridge") == 0) + family = AF_BRIDGE; + return family; +} + +const char *family_name(int family) +{ + if (family == AF_INET) + return "inet"; + if (family == AF_INET6) + return "inet6"; + if (family == AF_DECnet) + return "dnet"; + if (family == AF_PACKET) + return "link"; + if (family == AF_IPX) + return "ipx"; + if (family == AF_MPLS) + return "mpls"; + if (family == AF_BRIDGE) + return "bridge"; + return "???"; +} + #ifdef RESOLVE_HOSTNAMES struct namerec { @@ -697,7 +781,6 @@ static const char *resolve_address(const void *addr, int len, int af) } #endif - const char *format_host(int af, int len, const void *addr, char *buf, int buflen) { @@ -705,33 +788,14 @@ const char *format_host(int af, int len, const void *addr, if (resolve_hosts) { const char *n; - if (len <= 0) { - switch (af) { - case AF_INET: - len = 4; - break; - case AF_INET6: - len = 16; - break; - case AF_IPX: - len = 10; - break; -#ifdef AF_DECnet - /* I see no reasons why gethostbyname - may not work for DECnet */ - case AF_DECnet: - len = 2; - break; -#endif - default: ; - } - } + len = len <= 0 ? af_byte_len(af) : len; + if (len > 0 && (n = resolve_address(addr, len, af)) != NULL) return n; } #endif - return rt_addr_n2a(af, addr, buf, buflen); + return rt_addr_n2a(af, len, addr, buf, buflen); } @@ -906,3 +970,9 @@ int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, return netns_foreach(on_netns, &nsf); } + +char *int_to_str(int val, char *buf) +{ + sprintf(buf, "%d", val); + return buf; +} diff --git a/man/man3/libnetlink.3 b/man/man3/libnetlink.3 index e999bd68..99be9cc9 100644 --- a/man/man3/libnetlink.3 +++ b/man/man3/libnetlink.3 @@ -33,7 +33,8 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, void *jarg) .sp int rtnl_listen(struct rtnl_handle *rtnl, - int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *), + int (*handler)(struct sockaddr_nl *, struct rtnl_ctrl_data *, + struct nlmsghdr *n, void *), void *jarg) .sp int rtnl_from_file(FILE *rtnl, @@ -108,8 +109,8 @@ rtnl_listen Receive netlink data after a request and pass it to .I handler. .B handler -is a callback that gets the message source address, the message itself, -and the +is a callback that gets the message source address, anscillary data, the message +itself, and the .B jarg cookie as arguments. It will get called for all received messages. Only one message bundle is received. If there is a message diff --git a/man/man8/Makefile b/man/man8/Makefile index e9989288..152747a3 100644 --- a/man/man8/Makefile +++ b/man/man8/Makefile @@ -6,7 +6,7 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 ss.8 \ tc-mqprio.8 tc-netem.8 tc-pfifo.8 tc-pfifo_fast.8 tc-prio.8 tc-red.8 \ tc-sfb.8 tc-sfq.8 tc-stab.8 tc-tbf.8 \ bridge.8 rtstat.8 ctstat.8 nstat.8 routef.8 \ - ip-addrlabel.8 ip-l2tp.8 \ + ip-addrlabel.8 ip-fou.8 ip-gue.8 ip-l2tp.8 \ ip-maddress.8 ip-monitor.8 ip-mroute.8 ip-neighbour.8 \ ip-netns.8 ip-ntable.8 ip-rule.8 ip-tunnel.8 ip-xfrm.8 \ ip-tcp_metrics.8 ip-netconf.8 ip-token.8 diff --git a/man/man8/arpd.8 b/man/man8/arpd.8 index fc99b97e..5050a98b 100644 --- a/man/man8/arpd.8 +++ b/man/man8/arpd.8 @@ -35,7 +35,7 @@ Suppress sending broadcast queries by the kernel. This option only makes sense t Specifies the timeout of the negative cache. When resolution fails, arpd suppresses further attempts to resolve for this period. This option only makes sense together with option '-k'. This timeout should not be too much longer than the boot time of a typical host not supporting gratuitous ARP. Default value is 60 seconds. .TP -p