diff --git a/Makefile b/Makefile index 46a5ad9b..d4eed713 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,9 @@ CC = gcc HOSTCC = gcc DEFINES += -D_GNU_SOURCE CCOPTS = -O2 -WFLAGS = -Wall -Wstrict-prototypes +WFLAGS := -Wall -Wstrict-prototypes -Werror -Wmissing-prototypes +WFLAGS += -Wmissing-declarations -Wold-style-definition + CFLAGS = $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) YACCFLAGS = -d -t -v diff --git a/README b/README index 99d1aeb4..c7a5118d 100644 --- a/README +++ b/README @@ -4,7 +4,7 @@ Information: http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2 Download: - http://devresources.linuxfoundation.org/dev/iproute2/download + http://www.kernel.org/pub/linux/utils/net/iproute2/ Repository: git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git @@ -37,7 +37,7 @@ contains whether or not ATM is available, etc. kernel include files. Stephen Hemminger -shemminger@linux-foundation.org +stephen@networkplumber.org Alexey Kuznetsov kuznet@ms2.inr.ac.ru diff --git a/bridge/Makefile b/bridge/Makefile index 9a6743e5..67aceb4d 100644 --- a/bridge/Makefile +++ b/bridge/Makefile @@ -1,4 +1,4 @@ -BROBJ = bridge.o fdb.o monitor.o link.o +BROBJ = bridge.o fdb.o monitor.o link.o mdb.o include ../Config diff --git a/bridge/br_common.h b/bridge/br_common.h index 718ecb9a..10f6ce91 100644 --- a/bridge/br_common.h +++ b/bridge/br_common.h @@ -3,12 +3,15 @@ extern int print_linkinfo(const struct sockaddr_nl *who, void *arg); extern int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg); +extern int print_mdb(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg); extern int do_fdb(int argc, char **argv); +extern int do_mdb(int argc, char **argv); extern int do_monitor(int argc, char **argv); extern int preferred_family; extern int show_stats; -extern int show_detail; +extern int show_details; extern int timestamp; extern struct rtnl_handle rth; diff --git a/bridge/bridge.c b/bridge/bridge.c index e2c33b0f..1d59a1e1 100644 --- a/bridge/bridge.c +++ b/bridge/bridge.c @@ -27,7 +27,7 @@ static void usage(void) { fprintf(stderr, "Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n" -"where OBJECT := { fdb | monitor }\n" +"where OBJECT := { fdb | mdb | monitor }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails]\n" ); exit(-1); } @@ -43,6 +43,7 @@ static const struct cmd { int (*func)(int argc, char **argv); } cmds[] = { { "fdb", do_fdb }, + { "mdb", do_mdb }, { "monitor", do_monitor }, { "help", do_help }, { 0 } diff --git a/bridge/mdb.c b/bridge/mdb.c new file mode 100644 index 00000000..81d479b9 --- /dev/null +++ b/bridge/mdb.c @@ -0,0 +1,254 @@ +/* + * Get mdb table with netlink + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libnetlink.h" +#include "br_common.h" +#include "rt_names.h" +#include "utils.h" + +#ifndef MDBA_RTA +#define MDBA_RTA(r) \ + ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct br_port_msg)))) +#endif + +int filter_index; + +static void usage(void) +{ + fprintf(stderr, "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [permanent | temp]\n"); + fprintf(stderr, " bridge mdb {show} [ dev DEV ]\n"); + exit(-1); +} + +static void br_print_router_ports(FILE *f, struct rtattr *attr) +{ + uint32_t *port_ifindex; + struct rtattr *i; + int rem; + + rem = RTA_PAYLOAD(attr); + for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { + port_ifindex = RTA_DATA(i); + fprintf(f, "%s ", ll_index_to_name(*port_ifindex)); + } + + fprintf(f, "\n"); +} + +static void print_mdb_entry(FILE *f, int ifindex, struct br_mdb_entry *e) +{ + SPRINT_BUF(abuf); + + if (e->addr.proto == htons(ETH_P_IP)) + fprintf(f, "dev %s port %s grp %s %s\n", ll_index_to_name(ifindex), + ll_index_to_name(e->ifindex), + inet_ntop(AF_INET, &e->addr.u.ip4, abuf, sizeof(abuf)), + (e->state & MDB_PERMANENT) ? "permanent" : "temp"); + else + fprintf(f, "dev %s port %s grp %s %s\n", ll_index_to_name(ifindex), + ll_index_to_name(e->ifindex), + inet_ntop(AF_INET6, &e->addr.u.ip6, abuf, sizeof(abuf)), + (e->state & MDB_PERMANENT) ? "permanent" : "temp"); +} + +static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr) +{ + struct rtattr *i; + int rem; + struct br_mdb_entry *e; + + rem = RTA_PAYLOAD(attr); + for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) { + e = RTA_DATA(i); + print_mdb_entry(f, ifindex, e); + } +} + +int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) +{ + FILE *fp = arg; + struct br_port_msg *r = NLMSG_DATA(n); + int len = n->nlmsg_len; + struct rtattr * tb[MDBA_MAX+1]; + + if (n->nlmsg_type != RTM_GETMDB && n->nlmsg_type != RTM_NEWMDB && n->nlmsg_type != RTM_DELMDB) { + fprintf(stderr, "Not RTM_GETMDB, RTM_NEWMDB or RTM_DELMDB: %08x %08x %08x\n", + n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); + + return 0; + } + + len -= NLMSG_LENGTH(sizeof(*r)); + if (len < 0) { + fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); + return -1; + } + + if (filter_index && filter_index != r->ifindex) + return 0; + + parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); + + if (tb[MDBA_MDB]) { + struct rtattr *i; + int rem = RTA_PAYLOAD(tb[MDBA_MDB]); + + for (i = RTA_DATA(tb[MDBA_MDB]); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) + br_print_mdb_entry(fp, r->ifindex, i); + } + + if (tb[MDBA_ROUTER]) { + if (show_details) { + fprintf(fp, "router ports on %s: ", ll_index_to_name(r->ifindex)); + br_print_router_ports(fp, tb[MDBA_ROUTER]); + } + } + + return 0; +} + +static int mdb_show(int argc, char **argv) +{ + char *filter_dev = NULL; + + while (argc > 0) { + if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); + if (filter_dev) + duparg("dev", *argv); + filter_dev = *argv; + } + argc--; argv++; + } + + if (filter_dev) { + filter_index = if_nametoindex(filter_dev); + if (filter_index == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", + filter_dev); + return -1; + } + } + + if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETMDB) < 0) { + perror("Cannot send dump request"); + exit(1); + } + + if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) { + fprintf(stderr, "Dump terminated\n"); + exit(1); + } + + return 0; +} + +static int mdb_modify(int cmd, int flags, int argc, char **argv) +{ + struct { + struct nlmsghdr n; + struct br_port_msg bpm; + char buf[1024]; + } req; + struct br_mdb_entry entry; + char *d = NULL, *p = NULL, *grp = NULL; + + memset(&req, 0, sizeof(req)); + memset(&entry, 0, sizeof(entry)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct br_port_msg)); + req.n.nlmsg_flags = NLM_F_REQUEST|flags; + req.n.nlmsg_type = cmd; + req.bpm.family = PF_BRIDGE; + + while (argc > 0) { + if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); + d = *argv; + } else if (strcmp(*argv, "grp") == 0) { + NEXT_ARG(); + grp = *argv; + } else if (strcmp(*argv, "port") == 0) { + NEXT_ARG(); + p = *argv; + } else if (strcmp(*argv, "permanent") == 0) { + if (cmd == RTM_NEWMDB) + entry.state |= MDB_PERMANENT; + } else if (strcmp(*argv, "temp") == 0) { + ;/* nothing */ + } else { + if (matches(*argv, "help") == 0) + usage(); + } + argc--; argv++; + } + + if (d == NULL || grp == NULL || p == NULL) { + fprintf(stderr, "Device, group address and port name are required arguments.\n"); + exit(-1); + } + + req.bpm.ifindex = ll_name_to_index(d); + if (req.bpm.ifindex == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", d); + return -1; + } + + entry.ifindex = ll_name_to_index(p); + if (entry.ifindex == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", p); + return -1; + } + + if (!inet_pton(AF_INET, grp, &entry.addr.u.ip4)) { + if (!inet_pton(AF_INET6, grp, &entry.addr.u.ip6)) { + fprintf(stderr, "Invalid address \"%s\"\n", grp); + return -1; + } else + entry.addr.proto = htons(ETH_P_IPV6); + } else + entry.addr.proto = htons(ETH_P_IP); + + addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry)); + + if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + exit(2); + + return 0; +} + +int do_mdb(int argc, char **argv) +{ + ll_init_map(&rth); + + if (argc > 0) { + if (matches(*argv, "add") == 0) + return mdb_modify(RTM_NEWMDB, NLM_F_CREATE|NLM_F_EXCL, argc-1, argv+1); + if (matches(*argv, "delete") == 0) + return mdb_modify(RTM_DELMDB, 0, argc-1, argv+1); + + if (matches(*argv, "show") == 0 || + matches(*argv, "lst") == 0 || + matches(*argv, "list") == 0) + return mdb_show(argc-1, argv+1); + if (matches(*argv, "help") == 0) + usage(); + } else + return mdb_show(0, NULL); + + fprintf(stderr, "Command \"%s\" is unknown, try \"bridge mdb help\".\n", *argv); + exit(-1); +} diff --git a/bridge/monitor.c b/bridge/monitor.c index 2f606552..e96fcaf7 100644 --- a/bridge/monitor.c +++ b/bridge/monitor.c @@ -31,7 +31,7 @@ int prefix_banner; static void usage(void) { - fprintf(stderr, "Usage: bridge monitor\n"); + fprintf(stderr, "Usage: bridge monitor [file | link | fdb | mdb | all]\n"); exit(-1); } @@ -46,8 +46,8 @@ static int show_mark(FILE *fp, const struct nlmsghdr *n) return 0; } -int accept_msg(const struct sockaddr_nl *who, - struct nlmsghdr *n, void *arg) +static int accept_msg(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) { FILE *fp = arg; @@ -68,6 +68,12 @@ int accept_msg(const struct sockaddr_nl *who, fprintf(fp, "[NEIGH]"); return print_fdb(who, n, arg); + case RTM_NEWMDB: + case RTM_DELMDB: + if (prefix_banner) + fprintf(fp, "[MDB]"); + return print_mdb(who, n, arg); + case 15: return show_mark(fp, n); @@ -84,6 +90,7 @@ int do_monitor(int argc, char **argv) unsigned groups = ~RTMGRP_TC; int llink=0; int lneigh=0; + int lmdb=0; rtnl_close(&rth); @@ -97,6 +104,9 @@ int do_monitor(int argc, char **argv) } else if (matches(*argv, "fdb") == 0) { lneigh = 1; groups = 0; + } else if (matches(*argv, "mdb") == 0) { + lmdb = 1; + groups = 0; } else if (strcmp(*argv, "all") == 0) { groups = ~RTMGRP_TC; prefix_banner=1; @@ -116,6 +126,10 @@ int do_monitor(int argc, char **argv) groups |= nl_mgrp(RTNLGRP_NEIGH); } + if (lmdb) { + groups |= nl_mgrp(RTNLGRP_MDB); + } + if (file) { FILE *fp; fp = fopen(file, "r"); diff --git a/configure b/configure index 99121144..da01c19b 100755 --- a/configure +++ b/configure @@ -1,18 +1,25 @@ #! /bin/bash -# This is not an autconf generated configure +# This is not an autoconf generated configure # INCLUDE=${1:-"$PWD/include"} -: ${PKG_CONFIG:=pkg-config} -: ${CC=gcc} -echo "PKG_CONFIG:=${PKG_CONFIG}" >>Config # Make a temp directory in build tree. TMPDIR=$(mktemp -d config.XXXXXX) trap 'status=$?; rm -rf $TMPDIR; exit $status' EXIT HUP INT QUIT TERM +check_toolchain() +{ + : ${PKG_CONFIG:=pkg-config} + : ${AR=ar} + : ${CC=gcc} + echo "PKG_CONFIG:=${PKG_CONFIG}" >>Config + echo "AR:=${AR}" >>Config + echo "CC:=${CC}" >>Config +} + check_atm() { -cat >$TMPDIR/atmtest.c <$TMPDIR/atmtest.c < int main(int argc, char **argv) { struct atm_qos qos; @@ -20,21 +27,22 @@ int main(int argc, char **argv) { return 0; } EOF -$CC -I$INCLUDE -o $TMPDIR/atmtest $TMPDIR/atmtest.c -latm >/dev/null 2>&1 -if [ $? -eq 0 ] -then - echo "TC_CONFIG_ATM:=y" >>Config - echo yes -else - echo no -fi -rm -f $TMPDIR/atmtest.c $TMPDIR/atmtest + + $CC -I$INCLUDE -o $TMPDIR/atmtest $TMPDIR/atmtest.c -latm >/dev/null 2>&1 + if [ $? -eq 0 ] + then + echo "TC_CONFIG_ATM:=y" >>Config + echo yes + else + echo no + fi + rm -f $TMPDIR/atmtest.c $TMPDIR/atmtest } check_xt() { -#check if we have xtables from iptables >= 1.4.5. -cat >$TMPDIR/ipttest.c <= 1.4.5. + cat >$TMPDIR/ipttest.c < #include static struct xtables_globals test_globals = { @@ -51,27 +59,27 @@ int main(int argc, char **argv) xtables_init_all(&test_globals, NFPROTO_IPV4); return 0; } - EOF -if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL $(${PKG_CONFIG} xtables --cflags --libs) -ldl >/dev/null 2>&1 -then + if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL \ + $(${PKG_CONFIG} xtables --cflags --libs) -ldl >/dev/null 2>&1 + then echo "TC_CONFIG_XT:=y" >>Config echo "using xtables" -fi -rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest + fi + rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest } check_xt_old() { -# bail if previous XT checks has already succeded. -if grep TC_CONFIG_XT Config > /dev/null -then + # bail if previous XT checks has already succeded. + if grep -q TC_CONFIG_XT Config + then return -fi + fi -#check if we dont need our internal header .. -cat >$TMPDIR/ipttest.c <$TMPDIR/ipttest.c < char *lib_dir; unsigned int global_option_offset = 0; @@ -91,26 +99,26 @@ int main(int argc, char **argv) { } EOF -$CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1 -if [ $? -eq 0 ] -then + $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1 + if [ $? -eq 0 ] + then echo "TC_CONFIG_XT_OLD:=y" >>Config echo "using old xtables (no need for xt-internal.h)" -fi -rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest + fi + rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest } check_xt_old_internal_h() { -# bail if previous XT checks has already succeded. -if grep TC_CONFIG_XT Config > /dev/null -then + # bail if previous XT checks has already succeded. + if grep -q TC_CONFIG_XT Config + then return -fi + fi -#check if we need our own internal.h -cat >$TMPDIR/ipttest.c <$TMPDIR/ipttest.c < #include "xt-internal.h" char *lib_dir; @@ -131,14 +139,14 @@ int main(int argc, char **argv) { } EOF -$CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1 + $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1 -if [ $? -eq 0 ] -then - echo "using old xtables with xt-internal.h" - echo "TC_CONFIG_XT_OLD_H:=y" >>Config -fi -rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest + if [ $? -eq 0 ] + then + echo "using old xtables with xt-internal.h" + echo "TC_CONFIG_XT_OLD_H:=y" >>Config + fi + rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest } check_ipt() @@ -173,7 +181,7 @@ check_ipt_lib_dir() check_setns() { -cat >$TMPDIR/setnstest.c <$TMPDIR/setnstest.c < int main(int argc, char **argv) { @@ -181,20 +189,20 @@ int main(int argc, char **argv) return 0; } EOF -$CC -I$INCLUDE -o $TMPDIR/setnstest $TMPDIR/setnstest.c >/dev/null 2>&1 -if [ $? -eq 0 ] -then + $CC -I$INCLUDE -o $TMPDIR/setnstest $TMPDIR/setnstest.c >/dev/null 2>&1 + if [ $? -eq 0 ] + then echo "IP_CONFIG_SETNS:=y" >>Config echo "yes" -else + else echo "no" -fi -rm -f $TMPDIR/setnstest.c $TMPDIR/setnstest + fi + rm -f $TMPDIR/setnstest.c $TMPDIR/setnstest } check_ipset() { -cat >$TMPDIR/ipsettest.c <$TMPDIR/ipsettest.c < #ifndef IP_SET_INVALID #define IPSET_DIM_MAX 3 @@ -213,17 +221,18 @@ int main(void) #endif EOF -if $CC -I$INCLUDE -o $TMPDIR/ipsettest $TMPDIR/ipsettest.c >/dev/null 2>&1 -then + if $CC -I$INCLUDE -o $TMPDIR/ipsettest $TMPDIR/ipsettest.c >/dev/null 2>&1 + then echo "TC_CONFIG_IPSET:=y" >>Config echo "yes" -else + else echo "no" -fi -rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest + fi + rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest } echo "# Generated config based on" $INCLUDE >Config +check_toolchain echo "TC schedulers" diff --git a/genl/ctrl.c b/genl/ctrl.c index 6d97c269..7c425788 100644 --- a/genl/ctrl.c +++ b/genl/ctrl.c @@ -112,7 +112,7 @@ errout: return ret; } -void print_ctrl_cmd_flags(FILE *fp, __u32 fl) +static void print_ctrl_cmd_flags(FILE *fp, __u32 fl) { fprintf(fp, "\n\t\tCapabilities (0x%x):\n ", fl); if (!fl) { diff --git a/include/SNAPSHOT.h b/include/SNAPSHOT.h index d4fc137c..b3738737 100644 --- a/include/SNAPSHOT.h +++ b/include/SNAPSHOT.h @@ -1 +1 @@ -static const char SNAPSHOT[] = "121001"; +static const char SNAPSHOT[] = "121211"; diff --git a/include/libnetlink.h b/include/libnetlink.h index 81649afe..41e6ed1a 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -1,6 +1,7 @@ #ifndef __LIBNETLINK_H__ #define __LIBNETLINK_H__ 1 +#include #include #include #include @@ -8,6 +9,7 @@ #include #include #include +#include struct rtnl_handle { diff --git a/include/linux/atm.h b/include/linux/atm.h index 2fafdfb9..08e27beb 100644 --- a/include/linux/atm.h +++ b/include/linux/atm.h @@ -8,8 +8,8 @@ * Instead, #include */ -#ifndef _UAPI_LINUX_ATM_H -#define _UAPI_LINUX_ATM_H +#ifndef _LINUX_ATM_H +#define _LINUX_ATM_H /* * BEGIN_xx and END_xx markers are used for automatic generation of @@ -238,4 +238,4 @@ struct atmif_sioc { typedef unsigned short atm_backend_t; -#endif /* _UAPI_LINUX_ATM_H */ +#endif /* _LINUX_ATM_H */ diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index c880a417..1f85a276 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -1,5 +1,5 @@ -#ifndef _UAPI__LINUX_GENERIC_NETLINK_H -#define _UAPI__LINUX_GENERIC_NETLINK_H +#ifndef __LINUX_GENERIC_NETLINK_H +#define __LINUX_GENERIC_NETLINK_H #include #include @@ -81,4 +81,4 @@ enum { #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) -#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */ +#endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/include/linux/hdlc/ioctl.h b/include/linux/hdlc/ioctl.h index 58397236..04bc0274 100644 --- a/include/linux/hdlc/ioctl.h +++ b/include/linux/hdlc/ioctl.h @@ -34,13 +34,15 @@ #define LMI_CCITT 3 /* ITU-T Annex A */ #define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ -typedef struct { +#ifndef __ASSEMBLY__ + +typedef struct { unsigned int clock_rate; /* bits per second */ unsigned int clock_type; /* internal, external, TX-internal etc. */ unsigned short loopback; } sync_serial_settings; /* V.35, V.24, X.21 */ -typedef struct { +typedef struct { unsigned int clock_rate; /* bits per second */ unsigned int clock_type; /* internal, external, TX-internal etc. */ unsigned short loopback; @@ -78,4 +80,5 @@ typedef struct { /* PPP doesn't need any info now - supply length = 0 to ioctl */ +#endif /* __ASSEMBLY__ */ #endif /* __HDLC_IOCTL_H__ */ diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 82c7d1bd..6a48d55a 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -20,8 +20,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#ifndef _UAPI_LINUX_IF_ARP_H -#define _UAPI_LINUX_IF_ARP_H +#ifndef _LINUX_IF_ARP_H +#define _LINUX_IF_ARP_H #include @@ -156,4 +156,4 @@ struct arphdr { }; -#endif /* _UAPI_LINUX_IF_ARP_H */ +#endif /* _LINUX_IF_ARP_H */ diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h new file mode 100644 index 00000000..aac8b8c0 --- /dev/null +++ b/include/linux/if_bridge.h @@ -0,0 +1,185 @@ +/* + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_IF_BRIDGE_H +#define _LINUX_IF_BRIDGE_H + +#include + +#define SYSFS_BRIDGE_ATTR "bridge" +#define SYSFS_BRIDGE_FDB "brforward" +#define SYSFS_BRIDGE_PORT_SUBDIR "brif" +#define SYSFS_BRIDGE_PORT_ATTR "brport" +#define SYSFS_BRIDGE_PORT_LINK "bridge" + +#define BRCTL_VERSION 1 + +#define BRCTL_GET_VERSION 0 +#define BRCTL_GET_BRIDGES 1 +#define BRCTL_ADD_BRIDGE 2 +#define BRCTL_DEL_BRIDGE 3 +#define BRCTL_ADD_IF 4 +#define BRCTL_DEL_IF 5 +#define BRCTL_GET_BRIDGE_INFO 6 +#define BRCTL_GET_PORT_LIST 7 +#define BRCTL_SET_BRIDGE_FORWARD_DELAY 8 +#define BRCTL_SET_BRIDGE_HELLO_TIME 9 +#define BRCTL_SET_BRIDGE_MAX_AGE 10 +#define BRCTL_SET_AGEING_TIME 11 +#define BRCTL_SET_GC_INTERVAL 12 +#define BRCTL_GET_PORT_INFO 13 +#define BRCTL_SET_BRIDGE_STP_STATE 14 +#define BRCTL_SET_BRIDGE_PRIORITY 15 +#define BRCTL_SET_PORT_PRIORITY 16 +#define BRCTL_SET_PATH_COST 17 +#define BRCTL_GET_FDB_ENTRIES 18 + +#define BR_STATE_DISABLED 0 +#define BR_STATE_LISTENING 1 +#define BR_STATE_LEARNING 2 +#define BR_STATE_FORWARDING 3 +#define BR_STATE_BLOCKING 4 + +struct __bridge_info { + __u64 designated_root; + __u64 bridge_id; + __u32 root_path_cost; + __u32 max_age; + __u32 hello_time; + __u32 forward_delay; + __u32 bridge_max_age; + __u32 bridge_hello_time; + __u32 bridge_forward_delay; + __u8 topology_change; + __u8 topology_change_detected; + __u8 root_port; + __u8 stp_enabled; + __u32 ageing_time; + __u32 gc_interval; + __u32 hello_timer_value; + __u32 tcn_timer_value; + __u32 topology_change_timer_value; + __u32 gc_timer_value; +}; + +struct __port_info { + __u64 designated_root; + __u64 designated_bridge; + __u16 port_id; + __u16 designated_port; + __u32 path_cost; + __u32 designated_cost; + __u8 state; + __u8 top_change_ack; + __u8 config_pending; + __u8 unused0; + __u32 message_age_timer_value; + __u32 forward_delay_timer_value; + __u32 hold_timer_value; +}; + +struct __fdb_entry { + __u8 mac_addr[6]; + __u8 port_no; + __u8 is_local; + __u32 ageing_timer_value; + __u8 port_hi; + __u8 pad0; + __u16 unused; +}; + +/* Bridge Flags */ +#define BRIDGE_FLAGS_MASTER 1 /* Bridge command to/from master */ +#define BRIDGE_FLAGS_SELF 2 /* Bridge command to/from lowerdev */ + +#define BRIDGE_MODE_VEB 0 /* Default loopback mode */ +#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ + +/* Bridge management nested attributes + * [IFLA_AF_SPEC] = { + * [IFLA_BRIDGE_FLAGS] + * [IFLA_BRIDGE_MODE] + * } + */ +enum { + IFLA_BRIDGE_FLAGS, + IFLA_BRIDGE_MODE, + __IFLA_BRIDGE_MAX, +}; +#define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) + +/* Bridge multicast database attributes + * [MDBA_MDB] = { + * [MDBA_MDB_ENTRY] = { + * [MDBA_MDB_ENTRY_INFO] + * } + * } + * [MDBA_ROUTER] = { + * [MDBA_ROUTER_PORT] + * } + */ +enum { + MDBA_UNSPEC, + MDBA_MDB, + MDBA_ROUTER, + __MDBA_MAX, +}; +#define MDBA_MAX (__MDBA_MAX - 1) + +enum { + MDBA_MDB_UNSPEC, + MDBA_MDB_ENTRY, + __MDBA_MDB_MAX, +}; +#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1) + +enum { + MDBA_MDB_ENTRY_UNSPEC, + MDBA_MDB_ENTRY_INFO, + __MDBA_MDB_ENTRY_MAX, +}; +#define MDBA_MDB_ENTRY_MAX (__MDBA_MDB_ENTRY_MAX - 1) + +enum { + MDBA_ROUTER_UNSPEC, + MDBA_ROUTER_PORT, + __MDBA_ROUTER_MAX, +}; +#define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1) + +struct br_port_msg { + __u8 family; + __u32 ifindex; +}; + +struct br_mdb_entry { + __u32 ifindex; +#define MDB_TEMPORARY 0 +#define MDB_PERMANENT 1 + __u8 state; + struct { + union { + __be32 ip4; + struct in6_addr ip6; + } u; + __be16 proto; + } addr; +}; + +enum { + MDBA_SET_ENTRY_UNSPEC, + MDBA_SET_ENTRY, + __MDBA_SET_ENTRY_MAX, +}; +#define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1) + +#endif /* _LINUX_IF_BRIDGE_H */ diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 0343e1f0..730410a8 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -18,8 +18,8 @@ * 2 of the License, or (at your option) any later version. */ -#ifndef _UAPI_LINUX_IF_ETHER_H -#define _UAPI_LINUX_IF_ETHER_H +#ifndef _LINUX_IF_ETHER_H +#define _LINUX_IF_ETHER_H #include @@ -48,6 +48,7 @@ #define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */ #define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */ +#define ETH_P_BATMAN 0x4305 /* B.A.T.M.A.N.-Advanced packet [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DEC 0x6000 /* DEC Assigned proto */ #define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */ #define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */ @@ -132,4 +133,4 @@ struct ethhdr { } __attribute__((packed)); -#endif /* _UAPI_LINUX_IF_ETHER_H */ +#endif /* _LINUX_IF_ETHER_H */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 012d95a5..8ca3afe7 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -1,5 +1,5 @@ -#ifndef _UAPI_LINUX_IF_LINK_H -#define _UAPI_LINUX_IF_LINK_H +#ifndef _LINUX_IF_LINK_H +#define _LINUX_IF_LINK_H #include #include @@ -203,6 +203,24 @@ enum { #define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) +enum { + BRIDGE_MODE_UNSPEC, + BRIDGE_MODE_HAIRPIN, +}; + +enum { + IFLA_BRPORT_UNSPEC, + IFLA_BRPORT_STATE, /* Spanning tree state */ + IFLA_BRPORT_PRIORITY, /* " priority */ + IFLA_BRPORT_COST, /* " cost */ + IFLA_BRPORT_MODE, /* mode (hairpin) */ + IFLA_BRPORT_GUARD, /* bpdu guard */ + IFLA_BRPORT_PROTECT, /* root port protection */ + IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ + __IFLA_BRPORT_MAX +}; +#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) + struct ifla_cacheinfo { __u32 max_reasm_len; __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ @@ -283,6 +301,10 @@ enum { IFLA_VXLAN_AGEING, IFLA_VXLAN_LIMIT, IFLA_VXLAN_PORT_RANGE, + IFLA_VXLAN_PROXY, + IFLA_VXLAN_RSC, + IFLA_VXLAN_L2MISS, + IFLA_VXLAN_L3MISS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -424,4 +446,4 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -#endif /* _UAPI_LINUX_IF_LINK_H */ +#endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 25a585ce..dffb1927 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -13,8 +13,8 @@ * GNU General Public License for more details. */ -#ifndef _UAPI__IF_TUN_H -#define _UAPI__IF_TUN_H +#ifndef __IF_TUN_H +#define __IF_TUN_H #include #include @@ -31,9 +31,11 @@ #define TUN_FASYNC 0x0010 #define TUN_NOCHECKSUM 0x0020 #define TUN_NO_PI 0x0040 +/* This flag has no real effect */ #define TUN_ONE_QUEUE 0x0080 #define TUN_PERSIST 0x0100 #define TUN_VNET_HDR 0x0200 +#define TUN_TAP_MQ 0x0400 /* Ioctl defines */ #define TUNSETNOCSUM _IOW('T', 200, int) @@ -53,14 +55,19 @@ #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog) #define TUNGETVNETHDRSZ _IOR('T', 215, int) #define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 #define IFF_TAP 0x0002 #define IFF_NO_PI 0x1000 +/* This flag has no real effect */ #define IFF_ONE_QUEUE 0x2000 #define IFF_VNET_HDR 0x4000 #define IFF_TUN_EXCL 0x8000 +#define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 /* Features for GSO (TUNSETOFFLOAD). */ #define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ @@ -91,4 +98,4 @@ struct tun_filter { __u8 addr[0][ETH_ALEN]; }; -#endif /* _UAPI__IF_TUN_H */ +#endif /* __IF_TUN_H */ diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 5db59425..9f471ca1 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -1,5 +1,5 @@ -#ifndef _UAPI_IF_TUNNEL_H_ -#define _UAPI_IF_TUNNEL_H_ +#ifndef _IF_TUNNEL_H_ +#define _IF_TUNNEL_H_ #include #include @@ -37,6 +37,26 @@ struct ip_tunnel_parm { struct iphdr iph; }; +enum { + IFLA_IPTUN_UNSPEC, + IFLA_IPTUN_LINK, + IFLA_IPTUN_LOCAL, + IFLA_IPTUN_REMOTE, + IFLA_IPTUN_TTL, + IFLA_IPTUN_TOS, + IFLA_IPTUN_ENCAP_LIMIT, + IFLA_IPTUN_FLOWINFO, + IFLA_IPTUN_FLAGS, + IFLA_IPTUN_PROTO, + IFLA_IPTUN_PMTUDISC, + IFLA_IPTUN_6RD_PREFIX, + IFLA_IPTUN_6RD_RELAY_PREFIX, + IFLA_IPTUN_6RD_PREFIXLEN, + IFLA_IPTUN_6RD_RELAY_PREFIXLEN, + __IFLA_IPTUN_MAX, +}; +#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) + /* SIT-mode i_flags */ #define SIT_ISATAP 0x0001 @@ -93,4 +113,4 @@ enum { }; #define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1) -#endif /* _UAPI_IF_TUNNEL_H_ */ +#endif /* _IF_TUNNEL_H_ */ diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 0744f8e6..3be1ca61 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -10,8 +10,8 @@ * */ -#ifndef _UAPI_LINUX_IF_VLAN_H_ -#define _UAPI_LINUX_IF_VLAN_H_ +#ifndef _LINUX_IF_VLAN_H_ +#define _LINUX_IF_VLAN_H_ /* VLAN IOCTLs are found in sockios.h */ @@ -60,4 +60,4 @@ struct vlan_ioctl_args { short vlan_qos; }; -#endif /* _UAPI_LINUX_IF_VLAN_H_ */ +#endif /* _LINUX_IF_VLAN_H_ */ diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 8c469af9..e34f2477 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -1,5 +1,5 @@ -#ifndef _UAPI_INET_DIAG_H_ -#define _UAPI_INET_DIAG_H_ +#ifndef _INET_DIAG_H_ +#define _INET_DIAG_H_ #include @@ -109,9 +109,10 @@ enum { INET_DIAG_TOS, INET_DIAG_TCLASS, INET_DIAG_SKMEMINFO, + INET_DIAG_SHUTDOWN, }; -#define INET_DIAG_MAX INET_DIAG_SKMEMINFO +#define INET_DIAG_MAX INET_DIAG_SHUTDOWN /* INET_DIAG_MEM */ @@ -133,4 +134,4 @@ struct tcpvegas_info { }; -#endif /* _UAPI_INET_DIAG_H_ */ +#endif /* _INET_DIAG_H_ */ diff --git a/include/linux/l2tp.h b/include/linux/l2tp.h index 02a567b3..c4bec823 100644 --- a/include/linux/l2tp.h +++ b/include/linux/l2tp.h @@ -4,8 +4,8 @@ * Author: James Chapman */ -#ifndef _UAPI_LINUX_L2TP_H_ -#define _UAPI_LINUX_L2TP_H_ +#ifndef _LINUX_L2TP_H_ +#define _LINUX_L2TP_H_ #include #include @@ -175,4 +175,4 @@ enum l2tp_seqmode { #define L2TP_GENL_NAME "l2tp" #define L2TP_GENL_VERSION 0x1 -#endif /* _UAPI_LINUX_L2TP_H_ */ +#endif /* _LINUX_L2TP_H_ */ diff --git a/include/linux/netconf.h b/include/linux/netconf.h new file mode 100644 index 00000000..52c44244 --- /dev/null +++ b/include/linux/netconf.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_NETCONF_H_ +#define _LINUX_NETCONF_H_ + +#include +#include + +struct netconfmsg { + __u8 ncm_family; +}; + +enum { + NETCONFA_UNSPEC, + NETCONFA_IFINDEX, + NETCONFA_FORWARDING, + NETCONFA_RP_FILTER, + NETCONFA_MC_FORWARDING, + __NETCONFA_MAX +}; +#define NETCONFA_MAX (__NETCONFA_MAX - 1) + +#define NETCONFA_IFINDEX_ALL -1 +#define NETCONFA_IFINDEX_DEFAULT -2 + +#endif /* _LINUX_NETCONF_H_ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6b9500bc..adc72607 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -22,8 +22,8 @@ * * Moved to /usr/include/linux for NET3 */ -#ifndef _UAPI_LINUX_NETDEVICE_H -#define _UAPI_LINUX_NETDEVICE_H +#ifndef _LINUX_NETDEVICE_H +#define _LINUX_NETDEVICE_H #include #include @@ -50,4 +50,4 @@ enum { }; -#endif /* _UAPI_LINUX_NETDEVICE_H */ +#endif /* _LINUX_NETDEVICE_H */ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9961e7f6..f05c3d92 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -1,5 +1,5 @@ -#ifndef _UAPI__LINUX_NETFILTER_H -#define _UAPI__LINUX_NETFILTER_H +#ifndef __LINUX_NETFILTER_H +#define __LINUX_NETFILTER_H #include @@ -67,4 +67,4 @@ union nf_inet_addr { struct in6_addr in6; }; -#endif /* _UAPI__LINUX_NETFILTER_H */ +#endif /* __LINUX_NETFILTER_H */ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 92a06f30..41209700 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -1,5 +1,5 @@ -#ifndef _UAPI_X_TABLES_H -#define _UAPI_X_TABLES_H +#ifndef _X_TABLES_H +#define _X_TABLES_H #include #include @@ -182,4 +182,4 @@ struct xt_counters_info { (pos)->u.match_size)) -#endif /* _UAPI_X_TABLES_H */ +#endif /* _X_TABLES_H */ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 77a3e439..a5f4dc78 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -1,8 +1,8 @@ /* IPv4-specific defines for netfilter. * (C)1998 Rusty Russell -- This code is GPL. */ -#ifndef _UAPI__LINUX_IP_NETFILTER_H -#define _UAPI__LINUX_IP_NETFILTER_H +#ifndef __LINUX_IP_NETFILTER_H +#define __LINUX_IP_NETFILTER_H #include @@ -76,4 +76,4 @@ enum nf_ip_hook_priorities { #define SO_ORIGINAL_DST 80 -#endif /* _UAPI__LINUX_IP_NETFILTER_H */ +#endif /* __LINUX_IP_NETFILTER_H */ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 6ba9b093..38542b4f 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -12,8 +12,8 @@ * Port numbers are stored in HOST byte order. */ -#ifndef _UAPI_IPTABLES_H -#define _UAPI_IPTABLES_H +#ifndef _IPTABLES_H +#define _IPTABLES_H #include @@ -224,4 +224,4 @@ ipt_get_target(struct ipt_entry *e) /* * Main firewall chains definitions and global var's definitions. */ -#endif /* _UAPI_IPTABLES_H */ +#endif /* _IPTABLES_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 78d5b8a5..ced0e1a7 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -1,5 +1,5 @@ -#ifndef _UAPI__LINUX_NETLINK_H -#define _UAPI__LINUX_NETLINK_H +#ifndef __LINUX_NETLINK_H +#define __LINUX_NETLINK_H #include /* for __kernel_sa_family_t */ #include @@ -150,4 +150,4 @@ struct nlattr { #define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) -#endif /* _UAPI__LINUX_NETLINK_H */ +#endif /* __LINUX_NETLINK_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0e3e0c17..87452b4f 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -1,5 +1,5 @@ -#ifndef _UAPI__LINUX_RTNETLINK_H -#define _UAPI__LINUX_RTNETLINK_H +#ifndef __LINUX_RTNETLINK_H +#define __LINUX_RTNETLINK_H #include #include @@ -120,6 +120,18 @@ enum { RTM_SETDCB, #define RTM_SETDCB RTM_SETDCB + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -222,6 +234,7 @@ enum { #define RTPROT_XORP 14 /* XORP */ #define RTPROT_NTK 15 /* Netsukuku */ #define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ /* rtm_scope @@ -283,6 +296,7 @@ enum rtattr_type_t { RTA_MP_ALGO, /* no longer used */ RTA_TABLE, RTA_MARK, + RTA_MFC_STATS, __RTA_MAX }; @@ -403,6 +417,12 @@ struct rta_session { } u; }; +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + /**** * General form of address family dependent message. ****/ @@ -585,6 +605,12 @@ enum rtnetlink_groups { #define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE RTNLGRP_DCB, #define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) @@ -607,4 +633,4 @@ struct tcamsg { -#endif /* _UAPI__LINUX_RTNETLINK_H */ +#endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index b00e29ef..78996e23 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -1,5 +1,5 @@ -#ifndef _UAPI__SOCK_DIAG_H__ -#define _UAPI__SOCK_DIAG_H__ +#ifndef __SOCK_DIAG_H__ +#define __SOCK_DIAG_H__ #include @@ -23,4 +23,4 @@ enum { SK_MEMINFO_VARS, }; -#endif /* _UAPI__SOCK_DIAG_H__ */ +#endif /* __SOCK_DIAG_H__ */ diff --git a/include/linux/socket.h b/include/linux/socket.h index 76ab0c68..8c1e5017 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -1,5 +1,5 @@ -#ifndef _UAPI_LINUX_SOCKET_H -#define _UAPI_LINUX_SOCKET_H +#ifndef _LINUX_SOCKET_H +#define _LINUX_SOCKET_H /* * Desired design of maximum size and alignment (see RFC2553) @@ -18,4 +18,4 @@ struct __kernel_sockaddr_storage { /* _SS_MAXSIZE value minus size of ss_family */ } __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ -#endif /* _UAPI_LINUX_SOCKET_H */ +#endif /* _LINUX_SOCKET_H */ diff --git a/include/linux/types.h b/include/linux/types.h index 54c5df3d..23ea78fd 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -1,5 +1,5 @@ -#ifndef _UAPI_LINUX_TYPES_H -#define _UAPI_LINUX_TYPES_H +#ifndef _LINUX_TYPES_H +#define _LINUX_TYPES_H #include @@ -48,4 +48,4 @@ typedef __u32 __bitwise __wsum; #define __aligned_le64 __le64 __attribute__((aligned(8))) #endif /* __ASSEMBLY__ */ -#endif /* _UAPI_LINUX_TYPES_H */ +#endif /* _LINUX_TYPES_H */ diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h index b1d2bf16..b8a24941 100644 --- a/include/linux/unix_diag.h +++ b/include/linux/unix_diag.h @@ -37,6 +37,7 @@ enum { UNIX_DIAG_ICONS, UNIX_DIAG_RQLEN, UNIX_DIAG_MEMINFO, + UNIX_DIAG_SHUTDOWN, UNIX_DIAG_MAX, }; diff --git a/include/rt_names.h b/include/rt_names.h index e5dbd45b..37adbd34 100644 --- a/include/rt_names.h +++ b/include/rt_names.h @@ -3,29 +3,30 @@ #include -char* rtnl_rtprot_n2a(int id, char *buf, int len); -char* rtnl_rtscope_n2a(int id, char *buf, int len); -char* rtnl_rttable_n2a(__u32 id, char *buf, int len); -char* rtnl_rtrealm_n2a(int id, char *buf, int len); -char* rtnl_dsfield_n2a(int id, char *buf, int len); -int rtnl_rtprot_a2n(__u32 *id, char *arg); -int rtnl_rtscope_a2n(__u32 *id, char *arg); -int rtnl_rttable_a2n(__u32 *id, char *arg); -int rtnl_rtrealm_a2n(__u32 *id, char *arg); -int rtnl_dsfield_a2n(__u32 *id, char *arg); -int rtnl_group_a2n(int *id, char *arg); +const char *rtnl_rtprot_n2a(int id, char *buf, int len); +const char *rtnl_rtscope_n2a(int id, char *buf, int len); +const char *rtnl_rttable_n2a(__u32 id, char *buf, int len); +const char *rtnl_rtrealm_n2a(int id, char *buf, int len); +const char *rtnl_dsfield_n2a(int id, char *buf, int len); + +int rtnl_rtprot_a2n(__u32 *id, const char *arg); +int rtnl_rtscope_a2n(__u32 *id, const char *arg); +int rtnl_rttable_a2n(__u32 *id, const char *arg); +int rtnl_rtrealm_a2n(__u32 *id, const char *arg); +int rtnl_dsfield_a2n(__u32 *id, const char *arg); +int rtnl_group_a2n(int *id, const char *arg); const char *inet_proto_n2a(int proto, char *buf, int len); -int inet_proto_a2n(char *buf); +int inet_proto_a2n(const char *buf); const char * ll_type_n2a(int type, char *buf, int len); - -const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen); -int ll_addr_a2n(char *lladdr, int len, char *arg); +const char *ll_addr_n2a(unsigned char *addr, int alen, + int type, char *buf, int blen); +int ll_addr_a2n(char *lladdr, int len, const char *arg); const char * ll_proto_n2a(unsigned short id, char *buf, int len); -int ll_proto_a2n(unsigned short *id, char *buf); +int ll_proto_a2n(unsigned short *id, const char *buf); #endif diff --git a/include/utils.h b/include/utils.h index 496db68e..2bd8c623 100644 --- a/include/utils.h +++ b/include/utils.h @@ -1,6 +1,7 @@ #ifndef __UTILS_H__ #define __UTILS_H__ 1 +#include #include #include #include diff --git a/ip/Makefile b/ip/Makefile index 1676f0ff..2b606d47 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -4,7 +4,8 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \ ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o \ iplink_vlan.o link_veth.o link_gre.o iplink_can.o \ iplink_macvlan.o iplink_macvtap.o ipl2tp.o link_vti.o \ - iplink_vxlan.o tcp_metrics.o iplink_ipoib.o + iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \ + link_iptnl.o RTMONOBJ=rtmon.o diff --git a/ip/ip.c b/ip/ip.c index e0f7e609..c3ff6270 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -57,6 +57,7 @@ static void usage(void) static int do_help(int argc, char **argv) { usage(); + return 0; } static const struct cmd { @@ -85,6 +86,7 @@ static const struct cmd { { "mroute", do_multiroute }, { "mrule", do_multirule }, { "netns", do_netns }, + { "netconf", do_ipnetconf }, { "help", do_help }, { 0 } }; diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c index fcc9f33c..216e982f 100644 --- a/ip/ip6tunnel.c +++ b/ip/ip6tunnel.c @@ -128,7 +128,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p) strcmp(*argv, "any") == 0) p->proto = 0; else { - fprintf(stderr,"Cannot guess tunnel mode.\n"); + fprintf(stderr,"Unknown tunnel mode \"%s\"\n", *argv); exit(-1); } } else if (strcmp(*argv, "remote") == 0) { @@ -293,7 +293,7 @@ static int do_tunnels_list(struct ip6_tnl_parm *p) buf[sizeof(buf) - 1] = '\0'; if ((ptr = strchr(buf, ':')) == NULL || (*ptr++ = 0, sscanf(buf, "%s", name) != 1)) { - fprintf(stderr, "Wrong format of /proc/net/dev. Sorry.\n"); + fprintf(stderr, "Wrong format for /proc/net/dev. Giving up.\n"); goto end; } if (sscanf(ptr, "%ld%ld%ld%ld%ld%ld%ld%*d%ld%ld%ld%ld%ld%ld%ld", @@ -309,7 +309,7 @@ static int do_tunnels_list(struct ip6_tnl_parm *p) continue; type = ll_index_to_type(index); if (type == -1) { - fprintf(stderr, "Failed to get type of [%s]\n", name); + fprintf(stderr, "Failed to get type of \"%s\"\n", name); continue; } if (type != ARPHRD_TUNNEL6) @@ -402,7 +402,7 @@ int do_ip6tunnel(int argc, char **argv) case AF_INET6: break; default: - fprintf(stderr, "Unsupported family:%d\n", preferred_family); + fprintf(stderr, "Unsupported protocol family: %d\n", preferred_family); exit(-1); } diff --git a/ip/ip_common.h b/ip/ip_common.h index 2fd66b74..de568101 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -16,15 +16,20 @@ extern int ipaddr_list_link(int argc, char **argv); extern int iproute_monitor(int argc, char **argv); extern void iplink_usage(void) __attribute__((noreturn)); extern void iproute_reset_filter(void); +extern void ipmroute_reset_filter(void); extern void ipaddr_reset_filter(int); extern void ipneigh_reset_filter(void); extern void ipntable_reset_filter(void); extern int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg); +extern int print_mroute(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg); extern int print_prefix(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg); extern int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg); +extern int print_netconf(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg); extern int do_ipaddr(int argc, char **argv); extern int do_ipaddrlabel(int argc, char **argv); extern int do_iproute(int argc, char **argv); @@ -43,6 +48,7 @@ extern int do_netns(int argc, char **argv); extern int do_xfrm(int argc, char **argv); extern int do_ipl2tp(int argc, char **argv); extern int do_tcp_metrics(int argc, char **argv); +extern int do_ipnetconf(int argc, char **argv); static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb) { diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 5498f468..cff503fe 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -89,7 +89,7 @@ static void usage(void) exit(-1); } -void print_link_flags(FILE *fp, unsigned flags, unsigned mdown) +static void print_link_flags(FILE *fp, unsigned flags, unsigned mdown) { fprintf(fp, "<"); if (flags & IFF_UP && !(flags & IFF_RUNNING)) @@ -163,7 +163,7 @@ static void print_queuelen(FILE *f, struct rtattr *tb[IFLA_MAX + 1]) memset(&ifr, 0, sizeof(ifr)); strcpy(ifr.ifr_name, rta_getattr_str(tb[IFLA_IFNAME])); if (ioctl(s, SIOCGIFTXQLEN, &ifr) < 0) { - fprintf(f, "ioctl(SIOCGIFXQLEN) failed: %s\n", strerror(errno)); + fprintf(f, "ioctl(SIOCGIFTXQLEN) failed: %s\n", strerror(errno)); close(s); return; } @@ -461,6 +461,10 @@ int print_linkinfo(const struct sockaddr_nl *who, } } + if (do_link && tb[IFLA_PROMISCUITY] && show_details) + fprintf(fp, " promiscuity %u ", + *(int*)RTA_DATA(tb[IFLA_PROMISCUITY])); + if (do_link && tb[IFLA_LINKINFO] && show_details) print_linktype(fp, tb[IFLA_LINKINFO]); @@ -695,8 +699,8 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, return 0; } -int print_addrinfo_primary(const struct sockaddr_nl *who, struct nlmsghdr *n, - void *arg) +static int print_addrinfo_primary(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) { struct ifaddrmsg *ifa = NLMSG_DATA(n); @@ -706,8 +710,8 @@ int print_addrinfo_primary(const struct sockaddr_nl *who, struct nlmsghdr *n, return print_addrinfo(who, n, arg); } -int print_addrinfo_secondary(const struct sockaddr_nl *who, struct nlmsghdr *n, - void *arg) +static int print_addrinfo_secondary(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) { struct ifaddrmsg *ifa = NLMSG_DATA(n); @@ -781,7 +785,7 @@ static int ipadd_save_prep(void) int ret; if (isatty(STDOUT_FILENO)) { - fprintf(stderr, "Not sending binary stream to stdout\n"); + fprintf(stderr, "Not sending a binary stream to stdout\n"); return -1; } diff --git a/ip/ipl2tp.c b/ip/ipl2tp.c index f6e264a8..2d223179 100644 --- a/ip/ipl2tp.c +++ b/ip/ipl2tp.c @@ -494,7 +494,7 @@ static int parse_args(int argc, char **argv, int cmd, struct l2tp_parm *p) } else if (strcmp(*argv, "udp") == 0) { p->encap = L2TP_ENCAPTYPE_UDP; } else { - fprintf(stderr, "Unknown tunnel encapsulation.\n"); + fprintf(stderr, "Unknown tunnel encapsulation \"%s\"\n", *argv); exit(-1); } } else if (strcmp(*argv, "name") == 0) { diff --git a/ip/iplink.c b/ip/iplink.c index 7451aa02..ad336114 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -83,7 +83,8 @@ void iplink_usage(void) if (iplink_have_newlink()) { fprintf(stderr, "\n"); - fprintf(stderr, "TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | can | bridge | ipoib }\n"); + fprintf(stderr, "TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | can |\n"); + fprintf(stderr, " bridge | ipoib | ip6tnl | ipip | sit | vxlan }\n"); } exit(-1); } @@ -93,9 +94,9 @@ static void usage(void) iplink_usage(); } -static int on_off(char *msg) +static int on_off(const char *msg, const char *realval) { - fprintf(stderr, "Error: argument of \"%s\" must be \"on\" or \"off\"\n", msg); + fprintf(stderr, "Error: argument of \"%s\" must be \"on\" or \"off\", not \"%s\"\n", msg, realval); return -1; } @@ -134,7 +135,7 @@ struct link_util *get_link_kind(const char *id) return l; } -int get_link_mode(const char *mode) +static int get_link_mode(const char *mode) { if (strcasecmp(mode, "default") == 0) return IF_LINK_MODE_DEFAULT; @@ -193,7 +194,7 @@ struct iplink_req { char buf[1024]; }; -int iplink_parse_vf(int vf, int *argcp, char ***argvp, +static int iplink_parse_vf(int vf, int *argcp, char ***argvp, struct iplink_req *req) { int len, argc = *argcp; @@ -347,7 +348,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, } else if (strcmp(*argv, "off") == 0) { req->i.ifi_flags &= ~IFF_MULTICAST; } else - return on_off("multicast"); + return on_off("multicast", *argv); } else if (strcmp(*argv, "allmulticast") == 0) { NEXT_ARG(); req->i.ifi_change |= IFF_ALLMULTI; @@ -356,7 +357,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, } else if (strcmp(*argv, "off") == 0) { req->i.ifi_flags &= ~IFF_ALLMULTI; } else - return on_off("allmulticast"); + return on_off("allmulticast", *argv); } else if (strcmp(*argv, "promisc") == 0) { NEXT_ARG(); req->i.ifi_change |= IFF_PROMISC; @@ -365,7 +366,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, } else if (strcmp(*argv, "off") == 0) { req->i.ifi_flags &= ~IFF_PROMISC; } else - return on_off("promisc"); + return on_off("promisc", *argv); } else if (strcmp(*argv, "trailers") == 0) { NEXT_ARG(); req->i.ifi_change |= IFF_NOTRAILERS; @@ -374,7 +375,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, } else if (strcmp(*argv, "on") == 0) { req->i.ifi_flags &= ~IFF_NOTRAILERS; } else - return on_off("trailers"); + return on_off("trailers", *argv); } else if (strcmp(*argv, "arp") == 0) { NEXT_ARG(); req->i.ifi_change |= IFF_NOARP; @@ -383,7 +384,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, } else if (strcmp(*argv, "off") == 0) { req->i.ifi_flags |= IFF_NOARP; } else - return on_off("noarp"); + return on_off("noarp", *argv); } else if (strcmp(*argv, "vf") == 0) { struct rtattr *vflist; NEXT_ARG(); @@ -416,7 +417,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, } else if (strcmp(*argv, "off") == 0) { req->i.ifi_flags &= ~IFF_DYNAMIC; } else - return on_off("dynamic"); + return on_off("dynamic", *argv); } else if (matches(*argv, "type") == 0) { NEXT_ARG(); *type = *argv; @@ -851,7 +852,7 @@ static int do_set(int argc, char **argv) } else if (strcmp(*argv, "off") == 0) { flags &= ~IFF_MULTICAST; } else - return on_off("multicast"); + return on_off("multicast", *argv); } else if (strcmp(*argv, "allmulticast") == 0) { NEXT_ARG(); mask |= IFF_ALLMULTI; @@ -860,7 +861,7 @@ static int do_set(int argc, char **argv) } else if (strcmp(*argv, "off") == 0) { flags &= ~IFF_ALLMULTI; } else - return on_off("allmulticast"); + return on_off("allmulticast", *argv); } else if (strcmp(*argv, "promisc") == 0) { NEXT_ARG(); mask |= IFF_PROMISC; @@ -869,7 +870,7 @@ static int do_set(int argc, char **argv) } else if (strcmp(*argv, "off") == 0) { flags &= ~IFF_PROMISC; } else - return on_off("promisc"); + return on_off("promisc", *argv); } else if (strcmp(*argv, "trailers") == 0) { NEXT_ARG(); mask |= IFF_NOTRAILERS; @@ -878,7 +879,7 @@ static int do_set(int argc, char **argv) } else if (strcmp(*argv, "on") == 0) { flags &= ~IFF_NOTRAILERS; } else - return on_off("trailers"); + return on_off("trailers", *argv); } else if (strcmp(*argv, "arp") == 0) { NEXT_ARG(); mask |= IFF_NOARP; @@ -887,7 +888,7 @@ static int do_set(int argc, char **argv) } else if (strcmp(*argv, "off") == 0) { flags |= IFF_NOARP; } else - return on_off("noarp"); + return on_off("noarp", *argv); } else if (matches(*argv, "dynamic") == 0) { NEXT_ARG(); mask |= IFF_DYNAMIC; @@ -896,7 +897,7 @@ static int do_set(int argc, char **argv) } else if (strcmp(*argv, "off") == 0) { flags &= ~IFF_DYNAMIC; } else - return on_off("dynamic"); + return on_off("dynamic", *argv); } else { if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); diff --git a/ip/iplink_can.c b/ip/iplink_can.c index c8af4bc2..56743588 100644 --- a/ip/iplink_can.c +++ b/ip/iplink_can.c @@ -68,8 +68,8 @@ static void set_ctrlmode(char* name, char *arg, cm->flags |= flags; } else if (strcmp(arg, "off") != 0) { fprintf(stderr, - "Error: argument of \"%s\" must be \"on\" or \"off\"\n", - name); + "Error: argument of \"%s\" must be \"on\" or \"off\", not \"%s\"\n", + name, arg); exit(-1); } cm->mask |= flags; @@ -169,7 +169,7 @@ static int can_parse_opt(struct link_util *lu, int argc, char **argv, usage(); return -1; } else { - fprintf(stderr, "can: what is \"%s\"?\n", *argv); + fprintf(stderr, "can: unknown option \"%s\"\n", *argv); usage(); return -1; } diff --git a/ip/iplink_ipoib.c b/ip/iplink_ipoib.c index ae372bcc..5c1c68c4 100644 --- a/ip/iplink_ipoib.c +++ b/ip/iplink_ipoib.c @@ -66,7 +66,7 @@ static int ipoib_parse_opt(struct link_util *lu, int argc, char **argv, explain(); return -1; } else { - fprintf(stderr, "ipoib: what is \"%s\"?\n", *argv); + fprintf(stderr, "ipoib: unknown option \"%s\"?\n", *argv); explain(); return -1; } diff --git a/ip/iplink_macvlan.c b/ip/iplink_macvlan.c index ed0e34b7..5b4b868b 100644 --- a/ip/iplink_macvlan.c +++ b/ip/iplink_macvlan.c @@ -58,7 +58,7 @@ static int macvlan_parse_opt(struct link_util *lu, int argc, char **argv, explain(); return -1; } else { - fprintf(stderr, "macvlan: what is \"%s\"?\n", *argv); + fprintf(stderr, "macvlan: unknown option \"%s\"?\n", *argv); explain(); return -1; } diff --git a/ip/iplink_macvtap.c b/ip/iplink_macvtap.c index 6c7fe1f1..bea9f0cf 100644 --- a/ip/iplink_macvtap.c +++ b/ip/iplink_macvtap.c @@ -24,10 +24,10 @@ static void explain(void) ); } -static int mode_arg(void) +static int mode_arg(const char *arg) { fprintf(stderr, "Error: argument of \"mode\" must be \"private\", " - "\"vepa\", \"bridge\" or \"passthru\" \n"); + "\"vepa\", \"bridge\" or \"passthru\", not \"%s\"\n", arg); return -1; } @@ -48,14 +48,14 @@ static int macvtap_parse_opt(struct link_util *lu, int argc, char **argv, else if (strcmp(*argv, "passthru") == 0) mode = MACVLAN_MODE_PASSTHRU; else - return mode_arg(); + return mode_arg(*argv); addattr32(n, 1024, IFLA_MACVLAN_MODE, mode); } else if (matches(*argv, "help") == 0) { explain(); return -1; } else { - fprintf(stderr, "macvtap: what is \"%s\"?\n", *argv); + fprintf(stderr, "macvtap: unknown command \"%s\"?\n", *argv); explain(); return -1; } diff --git a/ip/iplink_vlan.c b/ip/iplink_vlan.c index 97af8d6c..26ceb8d9 100644 --- a/ip/iplink_vlan.c +++ b/ip/iplink_vlan.c @@ -33,9 +33,9 @@ static void explain(void) ); } -static int on_off(char *msg) +static int on_off(const char *msg, const char *arg) { - fprintf(stderr, "Error: argument of \"%s\" must be \"on\" or \"off\"\n", msg); + fprintf(stderr, "Error: argument of \"%s\" must be \"on\" or \"off\", not \"%s\"\n", msg, arg); return -1; } @@ -93,7 +93,7 @@ static int vlan_parse_opt(struct link_util *lu, int argc, char **argv, else if (strcmp(*argv, "off") == 0) flags.flags &= ~VLAN_FLAG_REORDER_HDR; else - return on_off("reorder_hdr"); + return on_off("reorder_hdr", *argv); } else if (matches(*argv, "gvrp") == 0) { NEXT_ARG(); flags.mask |= VLAN_FLAG_GVRP; @@ -102,7 +102,7 @@ static int vlan_parse_opt(struct link_util *lu, int argc, char **argv, else if (strcmp(*argv, "off") == 0) flags.flags &= ~VLAN_FLAG_GVRP; else - return on_off("gvrp"); + return on_off("gvrp", *argv); } else if (matches(*argv, "loose_binding") == 0) { NEXT_ARG(); flags.mask |= VLAN_FLAG_LOOSE_BINDING; @@ -111,7 +111,7 @@ static int vlan_parse_opt(struct link_util *lu, int argc, char **argv, else if (strcmp(*argv, "off") == 0) flags.flags &= ~VLAN_FLAG_LOOSE_BINDING; else - return on_off("loose_binding"); + return on_off("loose_binding", *argv); } else if (matches(*argv, "ingress-qos-map") == 0) { NEXT_ARG(); if (vlan_parse_qos_map(&argc, &argv, n, @@ -128,7 +128,7 @@ static int vlan_parse_opt(struct link_util *lu, int argc, char **argv, explain(); return -1; } else { - fprintf(stderr, "vlan: what is \"%s\"?\n", *argv); + fprintf(stderr, "vlan: unknown command \"%s\"?\n", *argv); explain(); return -1; } diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c index ba5c4ab6..10253261 100644 --- a/ip/iplink_vxlan.c +++ b/ip/iplink_vxlan.c @@ -26,6 +26,8 @@ static void explain(void) fprintf(stderr, "Usage: ... vxlan id VNI [ group ADDR ] [ local ADDR ]\n"); fprintf(stderr, " [ ttl TTL ] [ tos TOS ] [ dev PHYS_DEV ]\n"); fprintf(stderr, " [ port MIN MAX ] [ [no]learning ]\n"); + fprintf(stderr, " [ [no]proxy ] [ [no]rsc ]\n"); + fprintf(stderr, " [ [no]l2miss ] [ [no]l3miss ]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where: VNI := 0-16777215\n"); fprintf(stderr, " ADDR := { IP_ADDRESS | any }\n"); @@ -44,6 +46,10 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, __u8 tos = 0; __u8 ttl = 0; __u8 learning = 1; + __u8 proxy = 0; + __u8 rsc = 0; + __u8 l2miss = 0; + __u8 l3miss = 0; __u8 noage = 0; __u32 age = 0; __u32 maxaddr = 0; @@ -123,11 +129,27 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, learning = 0; } else if (!matches(*argv, "learning")) { learning = 1; + } else if (!matches(*argv, "noproxy")) { + proxy = 0; + } else if (!matches(*argv, "proxy")) { + proxy = 1; + } else if (!matches(*argv, "norsc")) { + rsc = 0; + } else if (!matches(*argv, "rsc")) { + rsc = 1; + } else if (!matches(*argv, "nol2miss")) { + l2miss = 0; + } else if (!matches(*argv, "l2miss")) { + l2miss = 1; + } else if (!matches(*argv, "nol3miss")) { + l3miss = 0; + } else if (!matches(*argv, "l3miss")) { + l3miss = 1; } else if (matches(*argv, "help") == 0) { explain(); return -1; } else { - fprintf(stderr, "vxlan: what is \"%s\"?\n", *argv); + fprintf(stderr, "vxlan: unknown command \"%s\"?\n", *argv); explain(); return -1; } @@ -148,6 +170,10 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv, addattr8(n, 1024, IFLA_VXLAN_TTL, ttl); addattr8(n, 1024, IFLA_VXLAN_TOS, tos); addattr8(n, 1024, IFLA_VXLAN_LEARNING, learning); + addattr8(n, 1024, IFLA_VXLAN_PROXY, proxy); + addattr8(n, 1024, IFLA_VXLAN_RSC, rsc); + addattr8(n, 1024, IFLA_VXLAN_L2MISS, l2miss); + addattr8(n, 1024, IFLA_VXLAN_L3MISS, l3miss); if (noage) addattr32(n, 1024, IFLA_VXLAN_AGEING, 0); else if (age) @@ -190,7 +216,7 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (tb[IFLA_VXLAN_LOCAL]) { __be32 addr = rta_getattr_u32(tb[IFLA_VXLAN_LOCAL]); if (addr) - fprintf(f, "local %s ", + fprintf(f, "local %s ", format_host(AF_INET, 4, &addr, s1, sizeof(s1))); } @@ -208,12 +234,24 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) const struct ifla_vxlan_port_range *r = RTA_DATA(tb[IFLA_VXLAN_PORT_RANGE]); fprintf(f, "port %u %u ", ntohs(r->low), ntohs(r->high)); - } + } if (tb[IFLA_VXLAN_LEARNING] && !rta_getattr_u8(tb[IFLA_VXLAN_LEARNING])) fputs("nolearning ", f); - + + if (tb[IFLA_VXLAN_PROXY] && rta_getattr_u8(tb[IFLA_VXLAN_PROXY])) + fputs("proxy ", f); + + if (tb[IFLA_VXLAN_RSC] && rta_getattr_u8(tb[IFLA_VXLAN_RSC])) + fputs("rsc ", f); + + if (tb[IFLA_VXLAN_L2MISS] && rta_getattr_u8(tb[IFLA_VXLAN_L2MISS])) + fputs("l2miss ", f); + + if (tb[IFLA_VXLAN_L3MISS] && rta_getattr_u8(tb[IFLA_VXLAN_L3MISS])) + fputs("l3miss ", f); + if (tb[IFLA_VXLAN_TOS] && (tos = rta_getattr_u8(tb[IFLA_VXLAN_TOS]))) { if (tos == 1) diff --git a/ip/ipmaddr.c b/ip/ipmaddr.c index 3ae94780..6e19465f 100644 --- a/ip/ipmaddr.c +++ b/ip/ipmaddr.c @@ -28,6 +28,7 @@ #include "rt_names.h" #include "utils.h" +#include "ip_common.h" static struct { char *dev; @@ -70,7 +71,7 @@ struct ma_info inet_prefix addr; }; -void maddr_ins(struct ma_info **lst, struct ma_info *m) +static void maddr_ins(struct ma_info **lst, struct ma_info *m) { struct ma_info *mp; @@ -82,7 +83,7 @@ void maddr_ins(struct ma_info **lst, struct ma_info *m) *lst = m; } -void read_dev_mcast(struct ma_info **result_p) +static void read_dev_mcast(struct ma_info **result_p) { char buf[256]; FILE *fp = fopen("/proc/net/dev_mcast", "r"); @@ -119,7 +120,7 @@ void read_dev_mcast(struct ma_info **result_p) fclose(fp); } -void read_igmp(struct ma_info **result_p) +static void read_igmp(struct ma_info **result_p) { struct ma_info m; char buf[256]; @@ -158,7 +159,7 @@ void read_igmp(struct ma_info **result_p) } -void read_igmp6(struct ma_info **result_p) +static void read_igmp6(struct ma_info **result_p) { char buf[256]; FILE *fp = fopen("/proc/net/igmp6", "r"); @@ -275,7 +276,7 @@ static int multiaddr_list(int argc, char **argv) return 0; } -int multiaddr_modify(int cmd, int argc, char **argv) +static int multiaddr_modify(int cmd, int argc, char **argv) { struct ifreq ifr; int fd; diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c index 4b1d469d..86c473e9 100644 --- a/ip/ipmonitor.c +++ b/ip/ipmonitor.c @@ -29,13 +29,16 @@ int prefix_banner; static void usage(void) { - fprintf(stderr, "Usage: ip monitor [ all | LISTofOBJECTS ]\n"); + fprintf(stderr, "Usage: ip monitor [ all | LISTofOBJECTS ] [ FILE ]\n"); + fprintf(stderr, "LISTofOBJECTS := link | address | route | mroute | prefix |\n"); + fprintf(stderr, " neigh | netconf\n"); + fprintf(stderr, "FILE := file FILENAME\n"); exit(-1); } -int accept_msg(const struct sockaddr_nl *who, - struct nlmsghdr *n, void *arg) +static int accept_msg(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) { FILE *fp = (FILE*)arg; @@ -43,10 +46,26 @@ int accept_msg(const struct sockaddr_nl *who, print_timestamp(fp); if (n->nlmsg_type == RTM_NEWROUTE || n->nlmsg_type == RTM_DELROUTE) { - if (prefix_banner) - fprintf(fp, "[ROUTE]"); - print_route(who, n, arg); - return 0; + struct rtmsg *r = NLMSG_DATA(n); + int len = n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)); + + if (len < 0) { + fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); + return -1; + } + + if (r->rtm_family == RTNL_FAMILY_IPMR || + r->rtm_family == RTNL_FAMILY_IP6MR) { + if (prefix_banner) + fprintf(fp, "[MROUTE]"); + print_mroute(who, n, arg); + return 0; + } else { + if (prefix_banner) + fprintf(fp, "[ROUTE]"); + print_route(who, n, arg); + return 0; + } } if (n->nlmsg_type == RTM_NEWLINK || n->nlmsg_type == RTM_DELLINK) { ll_remember_index(who, n, NULL); @@ -67,7 +86,8 @@ int accept_msg(const struct sockaddr_nl *who, print_addrlabel(who, n, arg); return 0; } - if (n->nlmsg_type == RTM_NEWNEIGH || n->nlmsg_type == RTM_DELNEIGH) { + if (n->nlmsg_type == RTM_NEWNEIGH || n->nlmsg_type == RTM_DELNEIGH || + n->nlmsg_type == RTM_GETNEIGH) { if (prefix_banner) fprintf(fp, "[NEIGH]"); print_neigh(who, n, arg); @@ -85,6 +105,12 @@ int accept_msg(const struct sockaddr_nl *who, print_rule(who, n, arg); return 0; } + if (n->nlmsg_type == RTM_NEWNETCONF) { + if (prefix_banner) + fprintf(fp, "[NETCONF]"); + print_netconf(who, n, arg); + return 0; + } if (n->nlmsg_type == 15) { char *tstr; time_t secs = ((__u32*)NLMSG_DATA(n))[0]; @@ -116,12 +142,15 @@ int do_ipmonitor(int argc, char **argv) int llink=0; int laddr=0; int lroute=0; + int lmroute=0; int lprefix=0; int lneigh=0; + int lnetconf=0; rtnl_close(&rth); ipaddr_reset_filter(1); iproute_reset_filter(); + ipmroute_reset_filter(); ipneigh_reset_filter(); while (argc > 0) { @@ -137,12 +166,18 @@ int do_ipmonitor(int argc, char **argv) } else if (matches(*argv, "route") == 0) { lroute=1; groups = 0; + } else if (matches(*argv, "mroute") == 0) { + lmroute=1; + groups = 0; } else if (matches(*argv, "prefix") == 0) { lprefix=1; groups = 0; } else if (matches(*argv, "neigh") == 0) { lneigh = 1; groups = 0; + } else if (matches(*argv, "netconf") == 0) { + lnetconf = 1; + groups = 0; } else if (strcmp(*argv, "all") == 0) { groups = ~RTMGRP_TC; prefix_banner=1; @@ -169,6 +204,12 @@ int do_ipmonitor(int argc, char **argv) if (!preferred_family || preferred_family == AF_INET6) groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE); } + if (lmroute) { + if (!preferred_family || preferred_family == AF_INET) + groups |= nl_mgrp(RTNLGRP_IPV4_MROUTE); + if (!preferred_family || preferred_family == AF_INET6) + groups |= nl_mgrp(RTNLGRP_IPV6_MROUTE); + } if (lprefix) { if (!preferred_family || preferred_family == AF_INET6) groups |= nl_mgrp(RTNLGRP_IPV6_PREFIX); @@ -176,6 +217,12 @@ int do_ipmonitor(int argc, char **argv) if (lneigh) { groups |= nl_mgrp(RTNLGRP_NEIGH); } + if (lnetconf) { + if (!preferred_family || preferred_family == AF_INET) + groups |= nl_mgrp(RTNLGRP_IPV4_NETCONF); + if (!preferred_family || preferred_family == AF_INET6) + groups |= nl_mgrp(RTNLGRP_IPV6_NETCONF); + } if (file) { FILE *fp; fp = fopen(file, "r"); diff --git a/ip/ipmroute.c b/ip/ipmroute.c index 945727d3..345576d0 100644 --- a/ip/ipmroute.c +++ b/ip/ipmroute.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -26,167 +27,231 @@ #include #include +#include #include "utils.h" - -char filter_dev[16]; -int filter_family; +#include "ip_common.h" static void usage(void) __attribute__((noreturn)); static void usage(void) { - fprintf(stderr, "Usage: ip mroute show [ PREFIX ] [ from PREFIX ] [ iif DEVICE ]\n"); + fprintf(stderr, "Usage: ip mroute show [ [ to ] PREFIX ] [ from PREFIX ] [ iif DEVICE ]\n"); + fprintf(stderr, " [ table TABLE_ID ]\n"); + fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n"); #if 0 fprintf(stderr, "Usage: ip mroute [ add | del ] DESTINATION from SOURCE [ iif DEVICE ] [ oif DEVICE ]\n"); #endif exit(-1); } -static char *viftable[32]; - struct rtfilter { + int tb; + int af; + int iif; inet_prefix mdst; inet_prefix msrc; } filter; -static void read_viftable(void) +int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) { - char buf[256]; - FILE *fp = fopen("/proc/net/ip_mr_vif", "r"); + FILE *fp = (FILE*)arg; + struct rtmsg *r = NLMSG_DATA(n); + int len = n->nlmsg_len; + struct rtattr * tb[RTA_MAX+1]; + char abuf[256]; + char obuf[256]; + SPRINT_BUF(b1); + __u32 table; + int iif = 0; + int family; - if (!fp) - return; - - if (!fgets(buf, sizeof(buf), fp)) { - fclose(fp); - return; + if ((n->nlmsg_type != RTM_NEWROUTE && + n->nlmsg_type != RTM_DELROUTE) || + !(n->nlmsg_flags & NLM_F_MULTI)) { + fprintf(stderr, "Not a multicast route: %08x %08x %08x\n", + n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); + return 0; } - while (fgets(buf, sizeof(buf), fp)) { - int vifi; - char dev[256]; - - if (sscanf(buf, "%d%s", &vifi, dev) < 2) - continue; - - if (vifi<0 || vifi>31) - continue; - - viftable[vifi] = strdup(dev); + len -= NLMSG_LENGTH(sizeof(*r)); + if (len < 0) { + fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); + return -1; } - fclose(fp); -} - -static void read_mroute_list(FILE *ofp) -{ - char buf[256]; - FILE *fp = fopen("/proc/net/ip_mr_cache", "r"); - - if (!fp) - return; - - if (!fgets(buf, sizeof(buf), fp)) { - fclose(fp); - return; + if (r->rtm_type != RTN_MULTICAST) { + fprintf(stderr, "Not a multicast route (type: %s)\n", + rtnl_rtntype_n2a(r->rtm_type, b1, sizeof(b1))); + return 0; } - while (fgets(buf, sizeof(buf), fp)) { - inet_prefix maddr, msrc; - unsigned pkts, b, w; - int vifi; - char oiflist[256]; - char sbuf[256]; - char mbuf[256]; - char obuf[256]; + parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len); + table = rtm_get_table(r, tb); - oiflist[0] = 0; - if (sscanf(buf, "%x%x%d%u%u%u %[^\n]", - maddr.data, msrc.data, &vifi, - &pkts, &b, &w, oiflist) < 6) - continue; + if (filter.tb > 0 && filter.tb != table) + return 0; - if (vifi!=-1 && (vifi < 0 || vifi>31)) - continue; + if (tb[RTA_IIF]) + iif = *(int*)RTA_DATA(tb[RTA_IIF]); + if (filter.iif && filter.iif != iif) + return 0; - if (filter_dev[0] && (vifi<0 || strcmp(filter_dev, viftable[vifi]))) - continue; - if (filter.mdst.family && inet_addr_match(&maddr, &filter.mdst, filter.mdst.bitlen)) - continue; - if (filter.msrc.family && inet_addr_match(&msrc, &filter.msrc, filter.msrc.bitlen)) - continue; + if (filter.af && filter.af != r->rtm_family) + return 0; - snprintf(obuf, sizeof(obuf), "(%s, %s)", - format_host(AF_INET, 4, &msrc.data[0], sbuf, sizeof(sbuf)), - format_host(AF_INET, 4, &maddr.data[0], mbuf, sizeof(mbuf))); + if (tb[RTA_DST] && + filter.mdst.bitlen > 0 && + inet_addr_match(RTA_DATA(tb[RTA_DST]), &filter.mdst, filter.mdst.bitlen)) + return 0; - fprintf(ofp, "%-32s Iif: ", obuf); + if (tb[RTA_SRC] && + filter.msrc.bitlen > 0 && + inet_addr_match(RTA_DATA(tb[RTA_SRC]), &filter.msrc, filter.msrc.bitlen)) + return 0; - if (vifi == -1) - fprintf(ofp, "unresolved "); - else - fprintf(ofp, "%-10s ", viftable[vifi]); + family = r->rtm_family == RTNL_FAMILY_IPMR ? AF_INET : AF_INET6; - if (oiflist[0]) { - char *next = NULL; - char *p = oiflist; - int ovifi, ottl; + if (n->nlmsg_type == RTM_DELROUTE) + fprintf(fp, "Deleted "); - fprintf(ofp, "Oifs: "); + if (tb[RTA_SRC]) + len = snprintf(obuf, sizeof(obuf), + "(%s, ", rt_addr_n2a(family, + RTA_PAYLOAD(tb[RTA_SRC]), + RTA_DATA(tb[RTA_SRC]), + abuf, sizeof(abuf))); + else + len = sprintf(obuf, "(unknown, "); + if (tb[RTA_DST]) + snprintf(obuf + len, sizeof(obuf) - len, + "%s)", rt_addr_n2a(family, RTA_PAYLOAD(tb[RTA_DST]), + RTA_DATA(tb[RTA_DST]), + abuf, sizeof(abuf))); + else + snprintf(obuf + len, sizeof(obuf) - len, "unknown) "); - while (p) { - next = strchr(p, ' '); - if (next) { - *next = 0; - next++; - } - if (sscanf(p, "%d:%d", &ovifi, &ottl)<2) { - p = next; - continue; - } - p = next; + fprintf(fp, "%-32s Iif: ", obuf); + if (iif) + fprintf(fp, "%-10s ", ll_index_to_name(iif)); + else + fprintf(fp, "unresolved "); - fprintf(ofp, "%s", viftable[ovifi]); - if (ottl>1) - fprintf(ofp, "(ttl %d) ", ovifi); - else - fprintf(ofp, " "); + if (tb[RTA_MULTIPATH]) { + struct rtnexthop *nh = RTA_DATA(tb[RTA_MULTIPATH]); + int first = 1; + + len = RTA_PAYLOAD(tb[RTA_MULTIPATH]); + + for (;;) { + if (len < sizeof(*nh)) + break; + if (nh->rtnh_len > len) + break; + + if (first) { + fprintf(fp, "Oifs: "); + first = 0; } + fprintf(fp, "%s", ll_index_to_name(nh->rtnh_ifindex)); + if (nh->rtnh_hops > 1) + fprintf(fp, "(ttl %d) ", nh->rtnh_hops); + else + fprintf(fp, " "); + len -= NLMSG_ALIGN(nh->rtnh_len); + nh = RTNH_NEXT(nh); } - - if (show_stats && b) { - fprintf(ofp, "%s %u packets, %u bytes", _SL_, pkts, b); - if (w) - fprintf(ofp, ", %u arrived on wrong iif.", w); - } - fprintf(ofp, "\n"); } - fclose(fp); + if (show_stats && tb[RTA_MFC_STATS]) { + struct rta_mfc_stats *mfcs = RTA_DATA(tb[RTA_MFC_STATS]); + + fprintf(fp, "%s %"PRIu64" packets, %"PRIu64" bytes", _SL_, + (uint64_t)mfcs->mfcs_packets, + (uint64_t)mfcs->mfcs_bytes); + if (mfcs->mfcs_wrong_if) + fprintf(fp, ", %"PRIu64" arrived on wrong iif.", + (uint64_t)mfcs->mfcs_wrong_if); + } + fprintf(fp, "\n"); + fflush(fp); + return 0; } +void ipmroute_reset_filter(void) +{ + memset(&filter, 0, sizeof(filter)); + filter.mdst.bitlen = -1; + filter.msrc.bitlen = -1; +} static int mroute_list(int argc, char **argv) { + char *id = NULL; + int family; + + ipmroute_reset_filter(); + if (preferred_family == AF_UNSPEC) + family = AF_INET; + else + family = AF_INET6; + if (family == AF_INET) { + filter.af = RTNL_FAMILY_IPMR; + filter.tb = RT_TABLE_DEFAULT; /* for backward compatibility */ + } else + filter.af = RTNL_FAMILY_IP6MR; + while (argc > 0) { - if (strcmp(*argv, "iif") == 0) { + if (matches(*argv, "table") == 0) { + __u32 tid; NEXT_ARG(); - strncpy(filter_dev, *argv, sizeof(filter_dev)-1); + if (rtnl_rttable_a2n(&tid, *argv)) { + if (strcmp(*argv, "all") == 0) { + filter.tb = 0; + } else if (strcmp(*argv, "help") == 0) { + usage(); + } else { + invarg("table id value is invalid\n", *argv); + } + } else + filter.tb = tid; + } else if (strcmp(*argv, "iif") == 0) { + NEXT_ARG(); + id = *argv; } else if (matches(*argv, "from") == 0) { NEXT_ARG(); - get_prefix(&filter.msrc, *argv, AF_INET); + get_prefix(&filter.msrc, *argv, family); } else { if (strcmp(*argv, "to") == 0) { NEXT_ARG(); } if (matches(*argv, "help") == 0) usage(); - get_prefix(&filter.mdst, *argv, AF_INET); + get_prefix(&filter.mdst, *argv, family); } - argv++; argc--; + argc--; argv++; } - read_viftable(); - read_mroute_list(stdout); - return 0; + ll_init_map(&rth); + + if (id) { + int idx; + + if ((idx = ll_name_to_index(id)) == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", id); + return -1; + } + filter.iif = idx; + } + + if (rtnl_wilddump_request(&rth, filter.af, RTM_GETROUTE) < 0) { + perror("Cannot send dump request"); + return 1; + } + + if (rtnl_dump_filter(&rth, print_mroute, stdout) < 0) { + fprintf(stderr, "Dump terminated\n"); + exit(1); + } + + exit(0); } int do_multiroute(int argc, char **argv) diff --git a/ip/ipneigh.c b/ip/ipneigh.c index 56e56b2d..d76e035c 100644 --- a/ip/ipneigh.c +++ b/ip/ipneigh.c @@ -52,7 +52,7 @@ static void usage(void) exit(-1); } -int nud_state_a2n(unsigned *state, char *arg) +static int nud_state_a2n(unsigned *state, const char *arg) { if (matches(arg, "permanent") == 0) *state = NUD_PERMANENT; @@ -189,7 +189,8 @@ int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) struct rtattr * tb[NDA_MAX+1]; char abuf[256]; - if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH) { + if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH && + n->nlmsg_type != RTM_GETNEIGH) { fprintf(stderr, "Not RTM_NEWNEIGH: %08x %08x %08x\n", n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); @@ -251,6 +252,8 @@ int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (n->nlmsg_type == RTM_DELNEIGH) fprintf(fp, "delete "); + else if (n->nlmsg_type == RTM_GETNEIGH) + fprintf(fp, "miss "); if (tb[NDA_DST]) { fprintf(fp, "%s ", format_host(r->ndm_family, @@ -310,13 +313,13 @@ int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) return 0; } -void ipneigh_reset_filter() +void ipneigh_reset_filter(void) { memset(&filter, 0, sizeof(filter)); filter.state = ~0; } -int do_show_or_flush(int argc, char **argv, int flush) +static int do_show_or_flush(int argc, char **argv, int flush) { char *filter_dev = NULL; int state_given = 0; diff --git a/ip/ipnetconf.c b/ip/ipnetconf.c new file mode 100644 index 00000000..8ceadfe4 --- /dev/null +++ b/ip/ipnetconf.c @@ -0,0 +1,183 @@ +/* + * ipnetconf.c "ip netconf". + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Nicolas Dichtel, + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rt_names.h" +#include "utils.h" +#include "ip_common.h" + +static struct +{ + int family; + int ifindex; +} filter; + +static void usage(void) __attribute__((noreturn)); + +static void usage(void) +{ + fprintf(stderr, "Usage: ip netconf show [ dev STRING ]\n"); + exit(-1); +} + +#define NETCONF_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct netconfmsg)))) + +int print_netconf(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) +{ + FILE *fp = (FILE*)arg; + struct netconfmsg *ncm = NLMSG_DATA(n); + int len = n->nlmsg_len; + struct rtattr *tb[NETCONFA_MAX+1]; + + if (n->nlmsg_type == NLMSG_ERROR) + return -1; + if (n->nlmsg_type != RTM_NEWNETCONF) { + fprintf(stderr, "Not RTM_NEWNETCONF: %08x %08x %08x\n", + n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); + + return -1; + } + len -= NLMSG_SPACE(sizeof(*ncm)); + if (len < 0) { + fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); + return -1; + } + + if (filter.family && filter.family != ncm->ncm_family) + return 0; + + parse_rtattr(tb, NETCONFA_MAX, NETCONF_RTA(ncm), + NLMSG_PAYLOAD(n, sizeof(*ncm))); + + switch (ncm->ncm_family) { + case AF_INET: + fprintf(fp, "ipv4 "); + break; + case AF_INET6: + fprintf(fp, "ipv6 "); + break; + default: + fprintf(fp, "unknown "); + break; + } + + if (tb[NETCONFA_IFINDEX]) { + int *ifindex = (int *)RTA_DATA(tb[NETCONFA_IFINDEX]); + + switch (*ifindex) { + case NETCONFA_IFINDEX_ALL: + fprintf(fp, "all "); + break; + case NETCONFA_IFINDEX_DEFAULT: + fprintf(fp, "default "); + break; + default: + fprintf(fp, "dev %s ", ll_index_to_name(*ifindex)); + break; + } + } + + if (tb[NETCONFA_FORWARDING]) + fprintf(fp, "forwarding %s ", + *(int *)RTA_DATA(tb[NETCONFA_FORWARDING])?"on":"off"); + if (tb[NETCONFA_RP_FILTER]) { + int rp_filter = *(int *)RTA_DATA(tb[NETCONFA_RP_FILTER]); + + if (rp_filter == 0) + fprintf(fp, "rp_filter off "); + else if (rp_filter == 1) + fprintf(fp, "rp_filter strict "); + else if (rp_filter == 2) + fprintf(fp, "rp_filter loose "); + else + fprintf(fp, "rp_filter unknown mode "); + } + if (tb[NETCONFA_MC_FORWARDING]) + fprintf(fp, "mc_forwarding %d ", + *(int *)RTA_DATA(tb[NETCONFA_MC_FORWARDING])); + + fprintf(fp, "\n"); + fflush(fp); + return 0; +} + +static void ipnetconf_reset_filter(void) +{ + memset(&filter, 0, sizeof(filter)); +} + +static int do_show(int argc, char **argv) +{ + struct { + struct nlmsghdr n; + struct netconfmsg ncm; + char buf[1024]; + } req; + + ipnetconf_reset_filter(); + filter.family = preferred_family; + if (filter.family == AF_UNSPEC) + filter.family = AF_INET; + filter.ifindex = NETCONFA_IFINDEX_ALL; + + while (argc > 0) { + if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); + filter.ifindex = ll_name_to_index(*argv); + if (filter.ifindex <= 0) { + fprintf(stderr, "Device \"%s\" does not exist.\n", + *argv); + return -1; + } + } + argv++; argc--; + } + + ll_init_map(&rth); + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct netconfmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + req.n.nlmsg_type = RTM_GETNETCONF; + req.ncm.ncm_family = filter.family; + addattr_l(&req.n, sizeof(req), NETCONFA_IFINDEX, &filter.ifindex, + sizeof(filter.ifindex)); + + rtnl_send(&rth, &req.n, req.n.nlmsg_len); + rtnl_listen(&rth, print_netconf, stdout); + + return 0; +} + +int do_ipnetconf(int argc, char **argv) +{ + if (argc > 0) { + if (matches(*argv, "show") == 0 || + matches(*argv, "lst") == 0 || + matches(*argv, "list") == 0) + return do_show(argc-1, argv+1); + if (matches(*argv, "help") == 0) + usage(); + } else + return do_show(0, NULL); + + fprintf(stderr, "Command \"%s\" is unknown, try \"ip netconf help\".\n", *argv); + exit(-1); +} diff --git a/ip/ipnetns.c b/ip/ipnetns.c index e41a598a..b047b979 100644 --- a/ip/ipnetns.c +++ b/ip/ipnetns.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "utils.h" #include "ip_common.h" @@ -40,17 +41,16 @@ static int setns(int fd, int nstype) } #endif /* HAVE_SETNS */ - -static void usage(void) __attribute__((noreturn)); - -static void usage(void) +static int usage(void) { fprintf(stderr, "Usage: ip netns list\n"); fprintf(stderr, " ip netns add NAME\n"); fprintf(stderr, " ip netns delete NAME\n"); + fprintf(stderr, " ip netns identify PID\n"); + fprintf(stderr, " ip netns pids NAME\n"); fprintf(stderr, " ip netns exec NAME cmd ...\n"); fprintf(stderr, " ip netns monitor\n"); - exit(-1); + return EXIT_FAILURE; } int get_netns_fd(const char *name) @@ -75,7 +75,7 @@ static int netns_list(int argc, char **argv) dir = opendir(NETNS_RUN_DIR); if (!dir) - return 0; + return EXIT_SUCCESS; while ((entry = readdir(dir)) != NULL) { if (strcmp(entry->d_name, ".") == 0) @@ -85,7 +85,7 @@ static int netns_list(int argc, char **argv) printf("%s\n", entry->d_name); } closedir(dir); - return 0; + return EXIT_SUCCESS; } static void bind_etc(const char *name) @@ -127,48 +127,193 @@ static int netns_exec(int argc, char **argv) if (argc < 1) { fprintf(stderr, "No netns name specified\n"); - return -1; + return EXIT_FAILURE; } if (argc < 2) { - fprintf(stderr, "No cmd specified\n"); - return -1; + fprintf(stderr, "No command specified\n"); + return EXIT_FAILURE; } name = argv[0]; cmd = argv[1]; snprintf(net_path, sizeof(net_path), "%s/%s", NETNS_RUN_DIR, name); netns = open(net_path, O_RDONLY); if (netns < 0) { - fprintf(stderr, "Cannot open network namespace: %s\n", - strerror(errno)); - return -1; + fprintf(stderr, "Cannot open network namespace \"%s\": %s\n", + name, strerror(errno)); + return EXIT_FAILURE; } if (setns(netns, CLONE_NEWNET) < 0) { - fprintf(stderr, "seting the network namespace failed: %s\n", - strerror(errno)); - return -1; + fprintf(stderr, "seting the network namespace \"%s\" failed: %s\n", + name, strerror(errno)); + return EXIT_FAILURE; } if (unshare(CLONE_NEWNS) < 0) { fprintf(stderr, "unshare failed: %s\n", strerror(errno)); - return -1; + return EXIT_FAILURE; + } + /* Don't let any mounts propogate back to the parent */ + if (mount("", "/", "none", MS_SLAVE | MS_REC, NULL)) { + fprintf(stderr, "\"mount --make-rslave /\" failed: %s\n", + strerror(errno)); + return EXIT_FAILURE; } /* Mount a version of /sys that describes the network namespace */ if (umount2("/sys", MNT_DETACH) < 0) { fprintf(stderr, "umount of /sys failed: %s\n", strerror(errno)); - return -1; + return EXIT_FAILURE; } if (mount(name, "/sys", "sysfs", 0, NULL) < 0) { fprintf(stderr, "mount of /sys failed: %s\n",strerror(errno)); - return -1; + return EXIT_FAILURE; } /* Setup bind mounts for config files in /etc */ bind_etc(name); if (execvp(cmd, argv + 1) < 0) - fprintf(stderr, "exec of %s failed: %s\n", + fprintf(stderr, "exec of \"%s\" failed: %s\n", cmd, strerror(errno)); - exit(-1); + return EXIT_FAILURE; +} + +static int is_pid(const char *str) +{ + int ch; + for (; (ch = *str); str++) { + if (!isdigit(ch)) + return 0; + } + return 1; +} + +static int netns_pids(int argc, char **argv) +{ + const char *name; + char net_path[MAXPATHLEN]; + int netns; + struct stat netst; + DIR *dir; + struct dirent *entry; + + if (argc < 1) { + fprintf(stderr, "No netns name specified\n"); + return EXIT_FAILURE; + } + if (argc > 1) { + fprintf(stderr, "extra arguments specified\n"); + return EXIT_FAILURE; + } + + name = argv[0]; + snprintf(net_path, sizeof(net_path), "%s/%s", NETNS_RUN_DIR, name); + netns = open(net_path, O_RDONLY); + if (netns < 0) { + fprintf(stderr, "Cannot open network namespace: %s\n", + strerror(errno)); + return EXIT_FAILURE; + } + if (fstat(netns, &netst) < 0) { + fprintf(stderr, "Stat of netns failed: %s\n", + strerror(errno)); + return EXIT_FAILURE; + } + dir = opendir("/proc/"); + if (!dir) { + fprintf(stderr, "Open of /proc failed: %s\n", + strerror(errno)); + return EXIT_FAILURE; + } + while((entry = readdir(dir))) { + char pid_net_path[MAXPATHLEN]; + struct stat st; + if (!is_pid(entry->d_name)) + continue; + snprintf(pid_net_path, sizeof(pid_net_path), "/proc/%s/ns/net", + entry->d_name); + if (stat(pid_net_path, &st) != 0) + continue; + if ((st.st_dev == netst.st_dev) && + (st.st_ino == netst.st_ino)) { + printf("%s\n", entry->d_name); + } + } + closedir(dir); + return EXIT_SUCCESS; + +} + +static int netns_identify(int argc, char **argv) +{ + const char *pidstr; + char net_path[MAXPATHLEN]; + int netns; + struct stat netst; + DIR *dir; + struct dirent *entry; + + if (argc < 1) { + fprintf(stderr, "No pid specified\n"); + return EXIT_FAILURE; + } + if (argc > 1) { + fprintf(stderr, "extra arguments specified\n"); + return EXIT_FAILURE; + } + pidstr = argv[0]; + + if (!is_pid(pidstr)) { + fprintf(stderr, "Specified string '%s' is not a pid\n", + pidstr); + return EXIT_FAILURE; + } + + snprintf(net_path, sizeof(net_path), "/proc/%s/ns/net", pidstr); + netns = open(net_path, O_RDONLY); + if (netns < 0) { + fprintf(stderr, "Cannot open network namespace: %s\n", + strerror(errno)); + return EXIT_FAILURE; + } + if (fstat(netns, &netst) < 0) { + fprintf(stderr, "Stat of netns failed: %s\n", + strerror(errno)); + return EXIT_FAILURE; + } + dir = opendir(NETNS_RUN_DIR); + if (!dir) { + /* Succeed treat a missing directory as an empty directory */ + if (errno == ENOENT) + return EXIT_SUCCESS; + + fprintf(stderr, "Failed to open directory %s:%s\n", + NETNS_RUN_DIR, strerror(errno)); + return EXIT_FAILURE; + } + + while((entry = readdir(dir))) { + char name_path[MAXPATHLEN]; + struct stat st; + + if (strcmp(entry->d_name, ".") == 0) + continue; + if (strcmp(entry->d_name, "..") == 0) + continue; + + snprintf(name_path, sizeof(name_path), "%s/%s", NETNS_RUN_DIR, + entry->d_name); + + if (stat(name_path, &st) != 0) + continue; + + if ((st.st_dev == netst.st_dev) && + (st.st_ino == netst.st_ino)) { + printf("%s\n", entry->d_name); + } + } + closedir(dir); + return EXIT_SUCCESS; + } static int netns_delete(int argc, char **argv) @@ -178,18 +323,18 @@ static int netns_delete(int argc, char **argv) if (argc < 1) { fprintf(stderr, "No netns name specified\n"); - return -1; + return EXIT_FAILURE; } name = argv[0]; snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name); umount2(netns_path, MNT_DETACH); if (unlink(netns_path) < 0) { - fprintf(stderr, "Cannot remove %s: %s\n", + fprintf(stderr, "Cannot remove namespace file \"%s\": %s\n", netns_path, strerror(errno)); - return -1; + return EXIT_FAILURE; } - return 0; + return EXIT_SUCCESS; } static int netns_add(int argc, char **argv) @@ -205,10 +350,11 @@ static int netns_add(int argc, char **argv) char netns_path[MAXPATHLEN]; const char *name; int fd; + int made_netns_run_dir_mount = 0; if (argc < 1) { fprintf(stderr, "No netns name specified\n"); - return -1; + return EXIT_FAILURE; } name = argv[0]; @@ -217,17 +363,40 @@ static int netns_add(int argc, char **argv) /* Create the base netns directory if it doesn't exist */ mkdir(NETNS_RUN_DIR, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); + /* Make it possible for network namespace mounts to propogate between + * mount namespaces. This makes it likely that a unmounting a network + * namespace file in one namespace will unmount the network namespace + * file in all namespaces allowing the network namespace to be freed + * sooner. + */ + while (mount("", NETNS_RUN_DIR, "none", MS_SHARED | MS_REC, NULL)) { + /* Fail unless we need to make the mount point */ + if (errno != EINVAL || made_netns_run_dir_mount) { + fprintf(stderr, "mount --make-shared %s failed: %s\n", + NETNS_RUN_DIR, strerror(errno)); + return EXIT_FAILURE; + } + + /* Upgrade NETNS_RUN_DIR to a mount point */ + if (mount(NETNS_RUN_DIR, NETNS_RUN_DIR, "none", MS_BIND, NULL)) { + fprintf(stderr, "mount --bind %s %s failed: %s\n", + NETNS_RUN_DIR, NETNS_RUN_DIR, strerror(errno)); + return EXIT_FAILURE; + } + made_netns_run_dir_mount = 1; + } + /* Create the filesystem state */ fd = open(netns_path, O_RDONLY|O_CREAT|O_EXCL, 0); if (fd < 0) { - fprintf(stderr, "Could not create %s: %s\n", + fprintf(stderr, "Cannot not create namespace file \"%s\": %s\n", netns_path, strerror(errno)); - return -1; + return EXIT_FAILURE; } close(fd); if (unshare(CLONE_NEWNET) < 0) { - fprintf(stderr, "Failed to create a new network namespace: %s\n", - strerror(errno)); + fprintf(stderr, "Failed to create a new network namespace \"%s\": %s\n", + name, strerror(errno)); goto out_delete; } @@ -237,11 +406,10 @@ static int netns_add(int argc, char **argv) netns_path, strerror(errno)); goto out_delete; } - return 0; + return EXIT_SUCCESS; out_delete: netns_delete(argc, argv); - exit(-1); - return -1; + return EXIT_FAILURE; } @@ -254,19 +422,19 @@ static int netns_monitor(int argc, char **argv) if (fd < 0) { fprintf(stderr, "inotify_init failed: %s\n", strerror(errno)); - return -1; + return EXIT_FAILURE; } if (inotify_add_watch(fd, NETNS_RUN_DIR, IN_CREATE | IN_DELETE) < 0) { fprintf(stderr, "inotify_add_watch failed: %s\n", strerror(errno)); - return -1; + return EXIT_FAILURE; } for(;;) { ssize_t len = read(fd, buf, sizeof(buf)); if (len < 0) { fprintf(stderr, "read failed: %s\n", strerror(errno)); - return -1; + return EXIT_FAILURE; } for (event = (struct inotify_event *)buf; (char *)event < &buf[len]; @@ -277,7 +445,7 @@ static int netns_monitor(int argc, char **argv) printf("delete %s\n", event->name); } } - return 0; + return EXIT_SUCCESS; } int do_netns(int argc, char **argv) @@ -290,7 +458,7 @@ int do_netns(int argc, char **argv) return netns_list(argc-1, argv+1); if (matches(*argv, "help") == 0) - usage(); + return usage(); if (matches(*argv, "add") == 0) return netns_add(argc-1, argv+1); @@ -298,6 +466,12 @@ int do_netns(int argc, char **argv) if (matches(*argv, "delete") == 0) return netns_delete(argc-1, argv+1); + if (matches(*argv, "identify") == 0) + return netns_identify(argc-1, argv+1); + + if (matches(*argv, "pids") == 0) + return netns_pids(argc-1, argv+1); + if (matches(*argv, "exec") == 0) return netns_exec(argc-1, argv+1); @@ -305,5 +479,5 @@ int do_netns(int argc, char **argv) return netns_monitor(argc-1, argv+1); fprintf(stderr, "Command \"%s\" is unknown, try \"ip netns help\".\n", *argv); - exit(-1); + return EXIT_FAILURE; } diff --git a/ip/ipntable.c b/ip/ipntable.c index 639f512c..5751114e 100644 --- a/ip/ipntable.c +++ b/ip/ipntable.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "utils.h" diff --git a/ip/ipprefix.c b/ip/ipprefix.c index d8327beb..97250778 100644 --- a/ip/ipprefix.c +++ b/ip/ipprefix.c @@ -26,8 +26,11 @@ #include #include #include +#include #include + #include "utils.h" +#include "ip_common.h" /* prefix flags; see kernel's net/ipv6/addrconf.c and include/net/if_inet6.h */ #define IF_PREFIX_ONLINK 0x01 @@ -59,7 +62,7 @@ int print_prefix(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) return 0; if (prefix->prefix_family != AF_INET6) { - fprintf(stderr, "wrong family %d\n", prefix->prefix_family); + fprintf(stderr, "incorrect protocol family: %d\n", prefix->prefix_family); return 0; } if (prefix->prefix_type != ND_OPT_PREFIX_INFORMATION) { diff --git a/ip/iproute.c b/ip/iproute.c index c60156f9..2c2a3319 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -92,7 +92,7 @@ static void usage(void) static struct { - int tb; + unsigned int tb; int cloned; int flushed; char *flushb; @@ -124,7 +124,7 @@ static int flush_update(void) return 0; } -int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) +static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) { struct rtmsg *r = NLMSG_DATA(n); inet_prefix dst; @@ -263,7 +263,7 @@ int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) return 1; } -int calc_host_len(struct rtmsg *r) +static int calc_host_len(const struct rtmsg *r) { if (r->rtm_family == AF_INET6) return 128; @@ -625,7 +625,9 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) } -int parse_one_nh(struct rtmsg *r, struct rtattr *rta, struct rtnexthop *rtnh, int *argcp, char ***argvp) +static int parse_one_nh(struct rtmsg *r, struct rtattr *rta, + struct rtnexthop *rtnh, + int *argcp, char ***argvp) { int argc = *argcp; char **argv = *argvp; @@ -668,7 +670,8 @@ int parse_one_nh(struct rtmsg *r, struct rtattr *rta, struct rtnexthop *rtnh, in return 0; } -int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r, int argc, char **argv) +static int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r, + int argc, char **argv) { char buf[1024]; struct rtattr *rta = (void*)buf; @@ -699,8 +702,7 @@ int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r, int argc, char **argv) return 0; } - -int iproute_modify(int cmd, unsigned flags, int argc, char **argv) +static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) { struct { struct nlmsghdr n; @@ -1070,7 +1072,8 @@ static int iproute_flush_cache(void) static __u32 route_dump_magic = 0x45311224; -int save_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) +static int save_route(const struct sockaddr_nl *who, struct nlmsghdr *n, + void *arg) { int ret; int len = n->nlmsg_len; @@ -1099,7 +1102,7 @@ static int save_route_prep(void) int ret; if (isatty(STDOUT_FILENO)) { - fprintf(stderr, "Not sending binary stream to stdout\n"); + fprintf(stderr, "Not sending a binary stream to stdout\n"); return -1; } @@ -1363,7 +1366,7 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action) } -int iproute_get(int argc, char **argv) +static int iproute_get(int argc, char **argv) { struct { struct nlmsghdr n; @@ -1445,7 +1448,7 @@ int iproute_get(int argc, char **argv) } if (req.r.rtm_dst_len == 0) { - fprintf(stderr, "need at least destination address\n"); + fprintf(stderr, "need at least a destination address\n"); exit(1); } @@ -1528,7 +1531,8 @@ int iproute_get(int argc, char **argv) exit(0); } -int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, void *arg) +static int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, + void *arg) { int ret; @@ -1562,7 +1566,7 @@ static int route_dump_check_magic(void) return 0; } -int iproute_restore(void) +static int iproute_restore(void) { if (route_dump_check_magic()) exit(-1); @@ -1584,7 +1588,7 @@ static int iproute_showdump(void) exit(rtnl_from_file(stdin, &show_handler, NULL)); } -void iproute_reset_filter() +void iproute_reset_filter(void) { memset(&filter, 0, sizeof(filter)); filter.mdst.bitlen = -1; diff --git a/ip/iptunnel.c b/ip/iptunnel.c index 0cf6cf8e..4275f260 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -41,7 +41,7 @@ static void usage(void) fprintf(stderr, "\n"); fprintf(stderr, "Where: NAME := STRING\n"); fprintf(stderr, " ADDR := { IP_ADDRESS | any }\n"); - fprintf(stderr, " TOS := { NUMBER | inherit }\n"); + fprintf(stderr, " TOS := { STRING | 00..ff | inherit | inherit/STRING | inherit/00..ff }\n"); fprintf(stderr, " TTL := { 1..255 | inherit }\n"); fprintf(stderr, " KEY := { DOTTED_QUAD | NUMBER }\n"); exit(-1); @@ -102,7 +102,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) p->iph.protocol = IPPROTO_IPIP; p->i_flags |= VTI_ISVTI; } else { - fprintf(stderr,"Cannot guess tunnel mode.\n"); + fprintf(stderr,"Unknown tunnel mode \"%s\"\n", *argv); exit(-1); } } else if (strcmp(*argv, "key") == 0) { @@ -114,7 +114,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) p->i_key = p->o_key = get_addr32(*argv); else { if (get_unsigned(&uval, *argv, 0)<0) { - fprintf(stderr, "invalid value of \"key\"\n"); + fprintf(stderr, "invalid value for \"key\": \"%s\"; it should be an unsigned integer\n", *argv); exit(-1); } p->i_key = p->o_key = htonl(uval); @@ -127,7 +127,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) p->i_key = get_addr32(*argv); else { if (get_unsigned(&uval, *argv, 0)<0) { - fprintf(stderr, "invalid value of \"ikey\"\n"); + fprintf(stderr, "invalid value for \"ikey\": \"%s\"; it should be an unsigned integer\n", *argv); exit(-1); } p->i_key = htonl(uval); @@ -140,7 +140,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) p->o_key = get_addr32(*argv); else { if (get_unsigned(&uval, *argv, 0)<0) { - fprintf(stderr, "invalid value of \"okey\"\n"); + fprintf(stderr, "invalid value for \"okey\": \"%s\"; it should be an unsigned integer\n", *argv); exit(-1); } p->o_key = htonl(uval); @@ -188,14 +188,21 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) } else if (strcmp(*argv, "tos") == 0 || strcmp(*argv, "tclass") == 0 || matches(*argv, "dsfield") == 0) { + char *dsfield; __u32 uval; NEXT_ARG(); + dsfield = *argv; + strsep(&dsfield, "/"); if (strcmp(*argv, "inherit") != 0) { - if (rtnl_dsfield_a2n(&uval, *argv)) - invarg("bad TOS value", *argv); - p->iph.tos = uval; + dsfield = *argv; + p->iph.tos = 0; } else p->iph.tos = 1; + if (dsfield) { + if (rtnl_dsfield_a2n(&uval, dsfield)) + invarg("bad TOS value", *argv); + p->iph.tos |= uval; + } } else { if (strcmp(*argv, "name") == 0) { NEXT_ARG(); @@ -235,7 +242,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) if (p->iph.protocol == IPPROTO_IPIP || p->iph.protocol == IPPROTO_IPV6) { if ((p->i_flags & GRE_KEY) || (p->o_flags & GRE_KEY)) { - fprintf(stderr, "Keys are not allowed with ipip and sit.\n"); + fprintf(stderr, "Keys are not allowed with ipip and sit tunnels\n"); return -1; } } @@ -255,7 +262,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) p->o_flags |= GRE_KEY; } if (IN_MULTICAST(ntohl(p->iph.daddr)) && !p->iph.saddr) { - fprintf(stderr, "Broadcast tunnel requires a source address.\n"); + fprintf(stderr, "A broadcast tunnel requires a source address\n"); return -1; } if (isatap) @@ -437,7 +444,7 @@ static int do_tunnels_list(struct ip_tunnel_parm *p) buf[sizeof(buf) - 1] = 0; if ((ptr = strchr(buf, ':')) == NULL || (*ptr++ = 0, sscanf(buf, "%s", name) != 1)) { - fprintf(stderr, "Wrong format of /proc/net/dev. Sorry.\n"); + fprintf(stderr, "Wrong format for /proc/net/dev. Giving up.\n"); fclose(fp); return -1; } @@ -454,7 +461,7 @@ static int do_tunnels_list(struct ip_tunnel_parm *p) continue; type = ll_index_to_type(index); if (type == -1) { - fprintf(stderr, "Failed to get type of [%s]\n", name); + fprintf(stderr, "Failed to get type of \"%s\"\n", name); continue; } if (type != ARPHRD_TUNNEL && type != ARPHRD_IPGRE && type != ARPHRD_SIT) @@ -551,17 +558,17 @@ static int do_prl(int argc, char **argv) strncpy(medium, *argv, IFNAMSIZ-1); devname++; } else { - fprintf(stderr,"%s: Invalid PRL parameter.\n", *argv); + fprintf(stderr,"Invalid PRL parameter \"%s\"\n", *argv); exit(-1); } if (count > 1) { - fprintf(stderr,"One PRL entry at a time.\n"); + fprintf(stderr,"One PRL entry at a time\n"); exit(-1); } argc--; argv++; } if (devname == 0) { - fprintf(stderr, "Must specify dev.\n"); + fprintf(stderr, "Must specify device\n"); exit(-1); } @@ -601,13 +608,13 @@ static int do_6rd(int argc, char **argv) strncpy(medium, *argv, IFNAMSIZ-1); devname++; } else { - fprintf(stderr,"%s: Invalid 6RD parameter.\n", *argv); + fprintf(stderr,"Invalid 6RD parameter \"%s\"\n", *argv); exit(-1); } argc--; argv++; } if (devname == 0) { - fprintf(stderr, "Must specify dev.\n"); + fprintf(stderr, "Must specify device\n"); exit(-1); } @@ -630,7 +637,7 @@ int do_iptunnel(int argc, char **argv) case AF_INET6: return do_ip6tunnel(argc, argv); default: - fprintf(stderr, "Unsupported family:%d\n", preferred_family); + fprintf(stderr, "Unsupported protocol family: %d\n", preferred_family); exit(-1); } @@ -654,6 +661,6 @@ int do_iptunnel(int argc, char **argv) } else return do_show(0, NULL); - fprintf(stderr, "Command \"%s\" is unknown, try \"ip tunnel help\".\n", *argv); + fprintf(stderr, "Command \"%s\" is unknown, try \"ip tunnel help\"\n", *argv); exit(-1); } diff --git a/ip/iptuntap.c b/ip/iptuntap.c index 20914e11..3d9ad7de 100644 --- a/ip/iptuntap.c +++ b/ip/iptuntap.c @@ -128,7 +128,7 @@ static int parse_args(int argc, char **argv, struct ifreq *ifr, uid_t *uid, gid_ } ifr->ifr_flags |= IFF_TAP; } else { - fprintf(stderr,"Cannot guess tunnel mode.\n"); + fprintf(stderr,"Unknown tunnel mode \"%s\"\n", *argv); exit(-1); } } else if (uid && matches(*argv, "user") == 0) { diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c index c7b34206..db519185 100644 --- a/ip/ipxfrm.c +++ b/ip/ipxfrm.c @@ -25,6 +25,7 @@ * Masahide NAKAMURA @USAGI */ +#include #include #include #include @@ -38,6 +39,7 @@ #include "utils.h" #include "xfrm.h" +#include "ip_common.h" #define STRBUF_SIZE (128) #define STRBUF_CAT(buf, str) \ @@ -555,16 +557,13 @@ static inline void xfrm_algo_print(struct xfrm_algo *algo, int type, int len, static void xfrm_aead_print(struct xfrm_algo_aead *algo, int len, FILE *fp, const char *prefix) { - struct { - struct xfrm_algo algo; - char key[algo->alg_key_len / 8]; - } base; + struct xfrm_algo *base_algo = alloca(sizeof(*base_algo) + algo->alg_key_len / 8); - memcpy(base.algo.alg_name, algo->alg_name, sizeof(base.algo.alg_name)); - base.algo.alg_key_len = algo->alg_key_len; - memcpy(base.algo.alg_key, algo->alg_key, algo->alg_key_len / 8); + memcpy(base_algo->alg_name, algo->alg_name, sizeof(base_algo->alg_name)); + base_algo->alg_key_len = algo->alg_key_len; + memcpy(base_algo->alg_key, algo->alg_key, algo->alg_key_len / 8); - __xfrm_algo_print(&base.algo, XFRMA_ALG_AEAD, len, fp, prefix, 0); + __xfrm_algo_print(base_algo, XFRMA_ALG_AEAD, len, fp, prefix, 0); fprintf(fp, " %d", algo->alg_icv_len); @@ -574,16 +573,13 @@ static void xfrm_aead_print(struct xfrm_algo_aead *algo, int len, static void xfrm_auth_trunc_print(struct xfrm_algo_auth *algo, int len, FILE *fp, const char *prefix) { - struct { - struct xfrm_algo algo; - char key[algo->alg_key_len / 8]; - } base; + struct xfrm_algo *base_algo = alloca(sizeof(*base_algo) + algo->alg_key_len / 8); - memcpy(base.algo.alg_name, algo->alg_name, sizeof(base.algo.alg_name)); - base.algo.alg_key_len = algo->alg_key_len; - memcpy(base.algo.alg_key, algo->alg_key, algo->alg_key_len / 8); + memcpy(base_algo->alg_name, algo->alg_name, sizeof(base_algo->alg_name)); + base_algo->alg_key_len = algo->alg_key_len; + memcpy(base_algo->alg_key, algo->alg_key, algo->alg_key_len / 8); - __xfrm_algo_print(&base.algo, XFRMA_ALG_AUTH_TRUNC, len, fp, prefix, 0); + __xfrm_algo_print(base_algo, XFRMA_ALG_AUTH_TRUNC, len, fp, prefix, 0); fprintf(fp, " %d", algo->alg_trunc_len); @@ -1236,7 +1232,7 @@ static int xfrm_selector_upspec_parse(struct xfrm_selector *sel, uval = htonl(get_addr32(*argv)); else { if (get_unsigned(&uval, *argv, 0)<0) { - fprintf(stderr, "invalid value of \"key\"\n"); + fprintf(stderr, "invalid value for \"key\"; it should be an unsigned integer\n"); exit(-1); } } diff --git a/ip/link_gre.c b/ip/link_gre.c index 839fb299..7e0b8966 100644 --- a/ip/link_gre.c +++ b/ip/link_gre.c @@ -141,7 +141,7 @@ get_failed: else { if (get_unsigned(&uval, *argv, 0) < 0) { fprintf(stderr, - "Invalid value for \"key\"\n"); + "Invalid value for \"key\": \"%s\"; it should be an unsigned integer\n", *argv); exit(-1); } uval = htonl(uval); @@ -157,7 +157,7 @@ get_failed: uval = get_addr32(*argv); else { if (get_unsigned(&uval, *argv, 0)<0) { - fprintf(stderr, "invalid value of \"ikey\"\n"); + fprintf(stderr, "invalid value for \"ikey\": \"%s\"; it should be an unsigned integer\n", *argv); exit(-1); } uval = htonl(uval); @@ -172,7 +172,7 @@ get_failed: uval = get_addr32(*argv); else { if (get_unsigned(&uval, *argv, 0)<0) { - fprintf(stderr, "invalid value of \"okey\"\n"); + fprintf(stderr, "invalid value for \"okey\": \"%s\"; it should be an unsigned integer\n", *argv); exit(-1); } uval = htonl(uval); @@ -247,7 +247,7 @@ get_failed: oflags |= GRE_KEY; } if (IN_MULTICAST(ntohl(daddr)) && !saddr) { - fprintf(stderr, "Broadcast tunnel requires a source address.\n"); + fprintf(stderr, "A broadcast tunnel requires a source address.\n"); return -1; } diff --git a/ip/link_ip6tnl.c b/ip/link_ip6tnl.c new file mode 100644 index 00000000..29473648 --- /dev/null +++ b/ip/link_ip6tnl.c @@ -0,0 +1,344 @@ +/* + * link_ip6tnl.c ip6tnl driver module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Nicolas Dichtel + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include "rt_names.h" +#include "utils.h" +#include "ip_common.h" +#include "tunnel.h" + +#define IP6_FLOWINFO_TCLASS htonl(0x0FF00000) +#define IP6_FLOWINFO_FLOWLABEL htonl(0x000FFFFF) + +#define DEFAULT_TNL_HOP_LIMIT (64) + +static void usage(void) __attribute__((noreturn)); +static void usage(void) +{ + fprintf(stderr, "Usage: ip link { add | set | change | replace | del } NAME\n"); + fprintf(stderr, " type ip6tnl [ remote ADDR ] [ local ADDR ]\n"); + fprintf(stderr, " [ dev PHYS_DEV ] [ encaplimit ELIM ]\n"); + fprintf(stderr ," [ hoplimit HLIM ] [ tclass TCLASS ] [ flowlabel FLOWLABEL ]\n"); + fprintf(stderr, " [ dscp inherit ] [ fwmark inherit ]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Where: NAME := STRING\n"); + fprintf(stderr, " ADDR := IPV6_ADDRESS\n"); + fprintf(stderr, " ELIM := { none | 0..255 }(default=%d)\n", + IPV6_DEFAULT_TNL_ENCAP_LIMIT); + fprintf(stderr, " HLIM := 0..255 (default=%d)\n", + DEFAULT_TNL_HOP_LIMIT); + fprintf(stderr, " TCLASS := { 0x0..0xff | inherit }\n"); + fprintf(stderr, " FLOWLABEL := { 0x0..0xfffff | inherit }\n"); + exit(-1); +} + +static int ip6tunnel_parse_opt(struct link_util *lu, int argc, char **argv, + struct nlmsghdr *n) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[2048]; + } req; + struct ifinfomsg *ifi = (struct ifinfomsg *)(n + 1); + struct rtattr *tb[IFLA_MAX + 1]; + struct rtattr *linkinfo[IFLA_INFO_MAX+1]; + struct rtattr *iptuninfo[IFLA_IPTUN_MAX + 1]; + int len; + struct in6_addr laddr; + struct in6_addr raddr; + __u8 hop_limit = DEFAULT_TNL_HOP_LIMIT; + __u8 encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT; + __u32 flowinfo = 0; + __u32 flags = 0; + __u32 link = 0; + __u8 proto = 0; + + memset(&laddr, 0, sizeof(laddr)); + memset(&raddr, 0, sizeof(raddr)); + + if (!(n->nlmsg_flags & NLM_F_CREATE)) { + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(*ifi)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETLINK; + req.i.ifi_family = preferred_family; + req.i.ifi_index = ifi->ifi_index; + + if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) { +get_failed: + fprintf(stderr, + "Failed to get existing tunnel info.\n"); + return -1; + } + + len = req.n.nlmsg_len; + len -= NLMSG_LENGTH(sizeof(*ifi)); + if (len < 0) + goto get_failed; + + parse_rtattr(tb, IFLA_MAX, IFLA_RTA(&req.i), len); + + if (!tb[IFLA_LINKINFO]) + goto get_failed; + + parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]); + + if (!linkinfo[IFLA_INFO_DATA]) + goto get_failed; + + parse_rtattr_nested(iptuninfo, IFLA_IPTUN_MAX, + linkinfo[IFLA_INFO_DATA]); + + if (iptuninfo[IFLA_IPTUN_LOCAL]) + memcpy(&laddr, RTA_DATA(iptuninfo[IFLA_IPTUN_LOCAL]), + sizeof(laddr)); + + if (iptuninfo[IFLA_IPTUN_REMOTE]) + memcpy(&raddr, RTA_DATA(iptuninfo[IFLA_IPTUN_REMOTE]), + sizeof(raddr)); + + if (iptuninfo[IFLA_IPTUN_TTL]) + hop_limit = rta_getattr_u8(iptuninfo[IFLA_IPTUN_TTL]); + + if (iptuninfo[IFLA_IPTUN_ENCAP_LIMIT]) + encap_limit = rta_getattr_u8(iptuninfo[IFLA_IPTUN_ENCAP_LIMIT]); + + if (iptuninfo[IFLA_IPTUN_FLOWINFO]) + flowinfo = rta_getattr_u32(iptuninfo[IFLA_IPTUN_FLOWINFO]); + + if (iptuninfo[IFLA_IPTUN_FLAGS]) + flags = rta_getattr_u32(iptuninfo[IFLA_IPTUN_FLAGS]); + + if (iptuninfo[IFLA_IPTUN_LINK]) + link = rta_getattr_u32(iptuninfo[IFLA_IPTUN_LINK]); + + if (iptuninfo[IFLA_IPTUN_PROTO]) + proto = rta_getattr_u8(iptuninfo[IFLA_IPTUN_PROTO]); + } + + while (argc > 0) { + if (matches(*argv, "mode") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "ipv6/ipv6") == 0 || + strcmp(*argv, "ip6ip6") == 0) + proto = IPPROTO_IPV6; + else if (strcmp(*argv, "ip/ipv6") == 0 || + strcmp(*argv, "ipv4/ipv6") == 0 || + strcmp(*argv, "ipip6") == 0 || + strcmp(*argv, "ip4ip6") == 0) + proto = IPPROTO_IPIP; + else if (strcmp(*argv, "any/ipv6") == 0 || + strcmp(*argv, "any") == 0) + proto = 0; + else + invarg("Cannot guess tunnel mode.", *argv); + } else if (strcmp(*argv, "remote") == 0) { + inet_prefix addr; + NEXT_ARG(); + get_prefix(&addr, *argv, preferred_family); + if (addr.family == AF_UNSPEC) + invarg("\"remote\" address family is AF_UNSPEC", *argv); + memcpy(&raddr, addr.data, addr.bytelen); + } else if (strcmp(*argv, "local") == 0) { + inet_prefix addr; + NEXT_ARG(); + get_prefix(&addr, *argv, preferred_family); + if (addr.family == AF_UNSPEC) + invarg("\"local\" address family is AF_UNSPEC", *argv); + memcpy(&laddr, addr.data, addr.bytelen); + } else if (matches(*argv, "dev") == 0) { + NEXT_ARG(); + link = if_nametoindex(*argv); + if (link == 0) + invarg("\"dev\" is invalid", *argv); + } else if (strcmp(*argv, "hoplimit") == 0 || + strcmp(*argv, "ttl") == 0 || + strcmp(*argv, "hlim") == 0) { + __u8 uval; + NEXT_ARG(); + if (get_u8(&uval, *argv, 0)) + invarg("invalid HLIM", *argv); + hop_limit = uval; + } else if (matches(*argv, "encaplimit") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "none") == 0) { + flags |= IP6_TNL_F_IGN_ENCAP_LIMIT; + } else { + __u8 uval; + if (get_u8(&uval, *argv, 0) < -1) + invarg("invalid ELIM", *argv); + encap_limit = uval; + flags &= ~IP6_TNL_F_IGN_ENCAP_LIMIT; + } + } else if (strcmp(*argv, "tclass") == 0 || + strcmp(*argv, "tc") == 0 || + strcmp(*argv, "tos") == 0 || + matches(*argv, "dsfield") == 0) { + __u8 uval; + NEXT_ARG(); + flowinfo &= ~IP6_FLOWINFO_TCLASS; + if (strcmp(*argv, "inherit") == 0) + flags |= IP6_TNL_F_USE_ORIG_TCLASS; + else { + if (get_u8(&uval, *argv, 16)) + invarg("invalid TClass", *argv); + flowinfo |= htonl((__u32)uval << 20) & IP6_FLOWINFO_TCLASS; + flags &= ~IP6_TNL_F_USE_ORIG_TCLASS; + } + } else if (strcmp(*argv, "flowlabel") == 0 || + strcmp(*argv, "fl") == 0) { + __u32 uval; + NEXT_ARG(); + flowinfo &= ~IP6_FLOWINFO_FLOWLABEL; + if (strcmp(*argv, "inherit") == 0) + flags |= IP6_TNL_F_USE_ORIG_FLOWLABEL; + else { + if (get_u32(&uval, *argv, 16)) + invarg("invalid Flowlabel", *argv); + if (uval > 0xFFFFF) + invarg("invalid Flowlabel", *argv); + flowinfo |= htonl(uval) & IP6_FLOWINFO_FLOWLABEL; + flags &= ~IP6_TNL_F_USE_ORIG_FLOWLABEL; + } + } else if (strcmp(*argv, "dscp") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "inherit") != 0) + invarg("not inherit", *argv); + flags |= IP6_TNL_F_RCV_DSCP_COPY; + } else if (strcmp(*argv, "fwmark") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "inherit") != 0) + invarg("not inherit", *argv); + flags |= IP6_TNL_F_USE_ORIG_FWMARK; + } else + usage(); + argc--, argv++; + } + + addattr8(n, 1024, IFLA_IPTUN_PROTO, proto); + addattr_l(n, 1024, IFLA_IPTUN_LOCAL, &laddr, sizeof(laddr)); + addattr_l(n, 1024, IFLA_IPTUN_REMOTE, &raddr, sizeof(raddr)); + addattr8(n, 1024, IFLA_IPTUN_TTL, hop_limit); + addattr8(n, 1024, IFLA_IPTUN_ENCAP_LIMIT, encap_limit); + addattr32(n, 1024, IFLA_IPTUN_FLOWINFO, flowinfo); + addattr32(n, 1024, IFLA_IPTUN_FLAGS, flags); + addattr32(n, 1024, IFLA_IPTUN_LINK, link); + + return 0; +} + +static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) +{ + char s1[256]; + char s2[64]; + int flags = 0; + __u32 flowinfo = 0; + + if (!tb) + return; + + if (tb[IFLA_IPTUN_FLAGS]) + flags = rta_getattr_u32(tb[IFLA_IPTUN_FLAGS]); + + if (tb[IFLA_IPTUN_FLOWINFO]) + flowinfo = rta_getattr_u32(tb[IFLA_IPTUN_FLOWINFO]); + + if (tb[IFLA_IPTUN_PROTO]) { + switch (rta_getattr_u8(tb[IFLA_IPTUN_PROTO])) { + case IPPROTO_IPIP: + fprintf(f, "ipip6 "); + break; + case IPPROTO_IPV6: + fprintf(f, "ip6ip6 "); + break; + case 0: + fprintf(f, "any "); + break; + } + } + + if (tb[IFLA_IPTUN_REMOTE]) { + fprintf(f, "remote %s ", + rt_addr_n2a(AF_INET6, + RTA_PAYLOAD(tb[IFLA_IPTUN_REMOTE]), + RTA_DATA(tb[IFLA_IPTUN_REMOTE]), + s1, sizeof(s1))); + } + + if (tb[IFLA_IPTUN_LOCAL]) { + fprintf(f, "local %s ", + rt_addr_n2a(AF_INET6, + RTA_PAYLOAD(tb[IFLA_IPTUN_LOCAL]), + RTA_DATA(tb[IFLA_IPTUN_LOCAL]), + s1, sizeof(s1))); + } + + if (tb[IFLA_IPTUN_LINK] && rta_getattr_u32(tb[IFLA_IPTUN_LINK])) { + unsigned link = rta_getattr_u32(tb[IFLA_IPTUN_LINK]); + const char *n = if_indextoname(link, s2); + + if (n) + fprintf(f, "dev %s ", n); + else + fprintf(f, "dev %u ", link); + } + + if (flags & IP6_TNL_F_IGN_ENCAP_LIMIT) + printf("encaplimit none "); + else if (tb[IFLA_IPTUN_ENCAP_LIMIT]) + fprintf(f, "encaplimit %u ", + rta_getattr_u8(tb[IFLA_IPTUN_ENCAP_LIMIT])); + + if (tb[IFLA_IPTUN_TTL]) + fprintf(f, "hoplimit %u ", rta_getattr_u8(tb[IFLA_IPTUN_TTL])); + + if (flags & IP6_TNL_F_USE_ORIG_TCLASS) + printf("tclass inherit "); + else if (tb[IFLA_IPTUN_FLOWINFO]) { + __u32 val = ntohl(flowinfo & IP6_FLOWINFO_TCLASS); + + printf("tclass 0x%02x ", (__u8)(val >> 20)); + } + + if (flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + printf("flowlabel inherit "); + else + printf("flowlabel 0x%05x ", ntohl(flowinfo & IP6_FLOWINFO_FLOWLABEL)); + + printf("(flowinfo 0x%08x) ", ntohl(flowinfo)); + + if (flags & IP6_TNL_F_RCV_DSCP_COPY) + printf("dscp inherit "); + + if (flags & IP6_TNL_F_MIP6_DEV) + fprintf(f, "mip6 "); + + if (flags & IP6_TNL_F_USE_ORIG_FWMARK) + fprintf(f, "fwmark inherit "); +} + +struct link_util ip6tnl_link_util = { + .id = "ip6tnl", + .maxattr = IFLA_IPTUN_MAX, + .parse_opt = ip6tunnel_parse_opt, + .print_opt = ip6tunnel_print_opt, +}; diff --git a/ip/link_iptnl.c b/ip/link_iptnl.c new file mode 100644 index 00000000..b00d8d90 --- /dev/null +++ b/ip/link_iptnl.c @@ -0,0 +1,340 @@ +/* + * link_iptnl.c ipip and sit driver module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Nicolas Dichtel + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include "rt_names.h" +#include "utils.h" +#include "ip_common.h" +#include "tunnel.h" + +static void usage(int sit) __attribute__((noreturn)); +static void usage(int sit) +{ + fprintf(stderr, "Usage: ip link { add | set | change | replace | del } NAME\n"); + fprintf(stderr, " type { ipip | sit } [ remote ADDR ] [ local ADDR ]\n"); + fprintf(stderr, " [ ttl TTL ] [ tos TOS ] [ [no]pmtudisc ] [ dev PHYS_DEV ]\n"); + fprintf(stderr, " [ 6rd-prefix ADDR ] [ 6rd-relay_prefix ADDR ] [ 6rd-reset ]\n"); + if (sit) + fprintf(stderr, " [ isatap ]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Where: NAME := STRING\n"); + fprintf(stderr, " ADDR := { IP_ADDRESS | any }\n"); + fprintf(stderr, " TOS := { NUMBER | inherit }\n"); + fprintf(stderr, " TTL := { 1..255 | inherit }\n"); + exit(-1); +} + +static int iptunnel_parse_opt(struct link_util *lu, int argc, char **argv, + struct nlmsghdr *n) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[2048]; + } req; + struct ifinfomsg *ifi = (struct ifinfomsg *)(n + 1); + struct rtattr *tb[IFLA_MAX + 1]; + struct rtattr *linkinfo[IFLA_INFO_MAX+1]; + struct rtattr *iptuninfo[IFLA_IPTUN_MAX + 1]; + int len; + __u32 link = 0; + __u32 laddr = 0; + __u32 raddr = 0; + __u8 ttl = 0; + __u8 tos = 0; + __u8 pmtudisc = 1; + __u16 iflags = 0; + struct in6_addr ip6rdprefix; + __u16 ip6rdprefixlen = 0; + __u32 ip6rdrelayprefix = 0; + __u16 ip6rdrelayprefixlen = 0; + + memset(&ip6rdprefix, 0, sizeof(ip6rdprefix)); + + if (!(n->nlmsg_flags & NLM_F_CREATE)) { + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(*ifi)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETLINK; + req.i.ifi_family = preferred_family; + req.i.ifi_index = ifi->ifi_index; + + if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) { +get_failed: + fprintf(stderr, + "Failed to get existing tunnel info.\n"); + return -1; + } + + len = req.n.nlmsg_len; + len -= NLMSG_LENGTH(sizeof(*ifi)); + if (len < 0) + goto get_failed; + + parse_rtattr(tb, IFLA_MAX, IFLA_RTA(&req.i), len); + + if (!tb[IFLA_LINKINFO]) + goto get_failed; + + parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]); + + if (!linkinfo[IFLA_INFO_DATA]) + goto get_failed; + + parse_rtattr_nested(iptuninfo, IFLA_IPTUN_MAX, + linkinfo[IFLA_INFO_DATA]); + + if (iptuninfo[IFLA_IPTUN_LOCAL]) + laddr = rta_getattr_u32(iptuninfo[IFLA_IPTUN_LOCAL]); + + if (iptuninfo[IFLA_IPTUN_REMOTE]) + raddr = rta_getattr_u32(iptuninfo[IFLA_IPTUN_REMOTE]); + + if (iptuninfo[IFLA_IPTUN_TTL]) + ttl = rta_getattr_u8(iptuninfo[IFLA_IPTUN_TTL]); + + if (iptuninfo[IFLA_IPTUN_TOS]) + tos = rta_getattr_u8(iptuninfo[IFLA_IPTUN_TOS]); + + if (iptuninfo[IFLA_IPTUN_PMTUDISC]) + pmtudisc = + rta_getattr_u8(iptuninfo[IFLA_IPTUN_PMTUDISC]); + + if (iptuninfo[IFLA_IPTUN_FLAGS]) + iflags = rta_getattr_u16(iptuninfo[IFLA_IPTUN_FLAGS]); + + if (iptuninfo[IFLA_IPTUN_LINK]) + link = rta_getattr_u32(iptuninfo[IFLA_IPTUN_LINK]); + + if (iptuninfo[IFLA_IPTUN_6RD_PREFIX]) + memcpy(&ip6rdprefix, + RTA_DATA(iptuninfo[IFLA_IPTUN_6RD_PREFIX]), + sizeof(laddr)); + + if (iptuninfo[IFLA_IPTUN_6RD_PREFIXLEN]) + ip6rdprefixlen = + rta_getattr_u16(iptuninfo[IFLA_IPTUN_6RD_PREFIXLEN]); + + if (iptuninfo[IFLA_IPTUN_6RD_RELAY_PREFIX]) + ip6rdrelayprefix = + rta_getattr_u32(iptuninfo[IFLA_IPTUN_6RD_RELAY_PREFIX]); + + if (iptuninfo[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) + ip6rdrelayprefixlen = + rta_getattr_u16(iptuninfo[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]); + } + + while (argc > 0) { + if (strcmp(*argv, "remote") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "any")) + raddr = get_addr32(*argv); + else + raddr = 0; + } else if (strcmp(*argv, "local") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "any")) + laddr = get_addr32(*argv); + else + laddr = 0; + } else if (matches(*argv, "dev") == 0) { + NEXT_ARG(); + link = if_nametoindex(*argv); + if (link == 0) + invarg("\"dev\" is invalid", *argv); + } else if (strcmp(*argv, "ttl") == 0 || + strcmp(*argv, "hoplimit") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "inherit") != 0) { + if (get_u8(&ttl, *argv, 0)) + invarg("invalid TTL\n", *argv); + } else + ttl = 0; + } else if (strcmp(*argv, "tos") == 0 || + strcmp(*argv, "tclass") == 0 || + matches(*argv, "dsfield") == 0) { + __u32 uval; + NEXT_ARG(); + if (strcmp(*argv, "inherit") != 0) { + if (rtnl_dsfield_a2n(&uval, *argv)) + invarg("bad TOS value", *argv); + tos = uval; + } else + tos = 1; + } else if (strcmp(*argv, "nopmtudisc") == 0) { + pmtudisc = 0; + } else if (strcmp(*argv, "pmtudisc") == 0) { + pmtudisc = 1; + } else if (strcmp(lu->id, "sit") == 0 && + strcmp(*argv, "isatap") == 0) { + iflags |= SIT_ISATAP; + } else if (strcmp(*argv, "6rd-prefix") == 0) { + inet_prefix prefix; + NEXT_ARG(); + if (get_prefix(&prefix, *argv, AF_INET6)) + invarg("invalid 6rd_prefix\n", *argv); + memcpy(&ip6rdprefix, prefix.data, 16); + ip6rdprefixlen = prefix.bitlen; + } else if (strcmp(*argv, "6rd-relay_prefix") == 0) { + inet_prefix prefix; + NEXT_ARG(); + if (get_prefix(&prefix, *argv, AF_INET)) + invarg("invalid 6rd-relay_prefix\n", *argv); + memcpy(&ip6rdrelayprefix, prefix.data, 4); + ip6rdrelayprefixlen = prefix.bitlen; + } else if (strcmp(*argv, "6rd-reset") == 0) { + inet_prefix prefix; + get_prefix(&prefix, "2002::", AF_INET6); + memcpy(&ip6rdprefix, prefix.data, 16); + ip6rdprefixlen = 16; + ip6rdrelayprefix = 0; + ip6rdrelayprefixlen = 0; + } else + usage(strcmp(lu->id, "sit") == 0); + argc--, argv++; + } + + if (ttl && pmtudisc == 0) { + fprintf(stderr, "ttl != 0 and noptmudisc are incompatible\n"); + exit(-1); + } + + addattr32(n, 1024, IFLA_IPTUN_LINK, link); + addattr32(n, 1024, IFLA_IPTUN_LOCAL, laddr); + addattr32(n, 1024, IFLA_IPTUN_REMOTE, raddr); + addattr8(n, 1024, IFLA_IPTUN_TTL, ttl); + addattr8(n, 1024, IFLA_IPTUN_TOS, tos); + addattr8(n, 1024, IFLA_IPTUN_PMTUDISC, pmtudisc); + if (strcmp(lu->id, "sit") == 0) { + addattr16(n, 1024, IFLA_IPTUN_FLAGS, iflags); + if (ip6rdprefixlen) { + addattr_l(n, 1024, IFLA_IPTUN_6RD_PREFIX, + &ip6rdprefix, sizeof(ip6rdprefix)); + addattr16(n, 1024, IFLA_IPTUN_6RD_PREFIXLEN, + ip6rdprefixlen); + addattr32(n, 1024, IFLA_IPTUN_6RD_RELAY_PREFIX, + ip6rdrelayprefix); + addattr16(n, 1024, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, + ip6rdrelayprefixlen); + } + } + + return 0; +} + +static void iptunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) +{ + char s1[1024]; + char s2[64]; + const char *local = "any"; + const char *remote = "any"; + + if (!tb) + return; + + if (tb[IFLA_IPTUN_REMOTE]) { + unsigned addr = rta_getattr_u32(tb[IFLA_IPTUN_REMOTE]); + + if (addr) + remote = format_host(AF_INET, 4, &addr, s1, sizeof(s1)); + } + + fprintf(f, "remote %s ", remote); + + if (tb[IFLA_IPTUN_LOCAL]) { + unsigned addr = rta_getattr_u32(tb[IFLA_IPTUN_LOCAL]); + + if (addr) + local = format_host(AF_INET, 4, &addr, s1, sizeof(s1)); + } + + fprintf(f, "local %s ", local); + + if (tb[IFLA_IPTUN_LINK] && rta_getattr_u32(tb[IFLA_IPTUN_LINK])) { + unsigned link = rta_getattr_u32(tb[IFLA_IPTUN_LINK]); + const char *n = if_indextoname(link, s2); + + if (n) + fprintf(f, "dev %s ", n); + else + fprintf(f, "dev %u ", link); + } + + if (tb[IFLA_IPTUN_TTL] && rta_getattr_u8(tb[IFLA_IPTUN_TTL])) + fprintf(f, "ttl %d ", rta_getattr_u8(tb[IFLA_IPTUN_TTL])); + else + fprintf(f, "ttl inherit "); + + if (tb[IFLA_IPTUN_TOS] && rta_getattr_u8(tb[IFLA_IPTUN_TOS])) { + int tos = rta_getattr_u8(tb[IFLA_IPTUN_TOS]); + + fputs("tos ", f); + if (tos == 1) + fputs("inherit ", f); + else + fprintf(f, "0x%x ", tos); + } + + if (tb[IFLA_IPTUN_PMTUDISC] && rta_getattr_u8(tb[IFLA_IPTUN_PMTUDISC])) + fprintf(f, "pmtudisc "); + else + fprintf(f, "nopmtudisc "); + + if (tb[IFLA_IPTUN_FLAGS]) { + __u16 iflags = rta_getattr_u16(tb[IFLA_IPTUN_FLAGS]); + + if (iflags & SIT_ISATAP) + fprintf(f, "isatap "); + } + + if (tb[IFLA_IPTUN_6RD_PREFIXLEN] && + *(__u16 *)RTA_DATA(tb[IFLA_IPTUN_6RD_PREFIXLEN])) { + __u16 prefixlen = rta_getattr_u16(tb[IFLA_IPTUN_6RD_PREFIXLEN]); + __u16 relayprefixlen = + rta_getattr_u16(tb[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]); + __u32 relayprefix = + rta_getattr_u32(tb[IFLA_IPTUN_6RD_RELAY_PREFIX]); + + printf("6rd-prefix %s/%u ", + inet_ntop(AF_INET6, RTA_DATA(tb[IFLA_IPTUN_6RD_PREFIX]), + s1, sizeof(s1)), + prefixlen); + if (relayprefix) { + printf("6rd-relay_prefix %s/%u ", + format_host(AF_INET, 4, &relayprefix, s1, + sizeof(s1)), + relayprefixlen); + } + } +} + +struct link_util ipip_link_util = { + .id = "ipip", + .maxattr = IFLA_IPTUN_MAX, + .parse_opt = iptunnel_parse_opt, + .print_opt = iptunnel_print_opt, +}; + +struct link_util sit_link_util = { + .id = "sit", + .maxattr = IFLA_IPTUN_MAX, + .parse_opt = iptunnel_parse_opt, + .print_opt = iptunnel_print_opt, +}; diff --git a/ip/link_vti.c b/ip/link_vti.c index 385f4355..77a7482c 100644 --- a/ip/link_vti.c +++ b/ip/link_vti.c @@ -117,7 +117,7 @@ get_failed: else { if (get_unsigned(&uval, *argv, 0) < 0) { fprintf(stderr, - "Invalid value for \"key\"\n"); + "Invalid value for \"key\": \"%s\"; it should be an unsigned integer\n", *argv); exit(-1); } uval = htonl(uval); @@ -132,7 +132,7 @@ get_failed: uval = get_addr32(*argv); else { if (get_unsigned(&uval, *argv, 0) < 0) { - fprintf(stderr, "invalid value of \"ikey\"\n"); + fprintf(stderr, "invalid value for \"ikey\": \"%s\"; it should be an unsigned integer\n", *argv); exit(-1); } uval = htonl(uval); @@ -146,7 +146,7 @@ get_failed: uval = get_addr32(*argv); else { if (get_unsigned(&uval, *argv, 0) < 0) { - fprintf(stderr, "invalid value of \"okey\"\n"); + fprintf(stderr, "invalid value for \"okey\": \"%s\"; it should be an unsigned integer\n", *argv); exit(-1); } uval = htonl(uval); @@ -155,7 +155,7 @@ get_failed: } else if (!matches(*argv, "remote")) { NEXT_ARG(); if (!strcmp(*argv, "any")) { - fprintf(stderr, "invalid value of \"remote\"\n"); + fprintf(stderr, "invalid value for \"remote\": \"%s\"\n", *argv); exit(-1); } else { daddr = get_addr32(*argv); @@ -163,7 +163,7 @@ get_failed: } else if (!matches(*argv, "local")) { NEXT_ARG(); if (!strcmp(*argv, "any")) { - fprintf(stderr, "invalid value of \"local\"\n"); + fprintf(stderr, "invalid value for \"local\": \"%s\"\n", *argv); exit(-1); } else { saddr = get_addr32(*argv); diff --git a/ip/rtmon.c b/ip/rtmon.c index c1416a0b..9227eacc 100644 --- a/ip/rtmon.c +++ b/ip/rtmon.c @@ -56,7 +56,7 @@ static int dump_msg(const struct sockaddr_nl *who, struct nlmsghdr *n, return 0; } -void usage(void) +static void usage(void) { fprintf(stderr, "Usage: rtmon file FILE [ all | LISTofOBJECTS]\n"); fprintf(stderr, "LISTofOBJECTS := [ link ] [ address ] [ route ]\n"); diff --git a/ip/tcp_metrics.c b/ip/tcp_metrics.c index 34e1d8e2..a01e1fb1 100644 --- a/ip/tcp_metrics.c +++ b/ip/tcp_metrics.c @@ -268,7 +268,7 @@ static int tcpm_do_cmd(int cmd, int argc, char **argv) case AF_INET6: break; default: - fprintf(stderr, "Unsupported family:%d\n", preferred_family); + fprintf(stderr, "Unsupported protocol family: %d\n", preferred_family); return -1; } diff --git a/ip/tunnel.c b/ip/tunnel.c index b176d3f0..a6a28466 100644 --- a/ip/tunnel.c +++ b/ip/tunnel.c @@ -74,7 +74,7 @@ int tnl_get_ioctl(const char *basedev, void *p) fd = socket(preferred_family, SOCK_DGRAM, 0); err = ioctl(fd, SIOCGETTUNNEL, &ifr); if (err) - fprintf(stderr, "get tunnel %s failed: %s\n", basedev, + fprintf(stderr, "get tunnel \"%s\" failed: %s\n", basedev, strerror(errno)); close(fd); @@ -95,7 +95,7 @@ int tnl_add_ioctl(int cmd, const char *basedev, const char *name, void *p) fd = socket(preferred_family, SOCK_DGRAM, 0); err = ioctl(fd, cmd, &ifr); if (err) - fprintf(stderr, "add tunnel %s failed: %s\n", ifr.ifr_name, + fprintf(stderr, "add tunnel \"%s\" failed: %s\n", ifr.ifr_name, strerror(errno)); close(fd); return err; @@ -116,7 +116,7 @@ int tnl_del_ioctl(const char *basedev, const char *name, void *p) fd = socket(preferred_family, SOCK_DGRAM, 0); err = ioctl(fd, SIOCDELTUNNEL, &ifr); if (err) - fprintf(stderr, "delete tunnel %s failed: %s\n", + fprintf(stderr, "delete tunnel \"%s\" failed: %s\n", ifr.ifr_name, strerror(errno)); close(fd); return err; diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c index ea17987d..bfc48f1b 100644 --- a/ip/xfrm_monitor.c +++ b/ip/xfrm_monitor.c @@ -206,7 +206,7 @@ static int xfrm_report_print(const struct sockaddr_nl *who, return 0; } -void xfrm_ae_flags_print(__u32 flags, void *arg) +static void xfrm_ae_flags_print(__u32 flags, void *arg) { FILE *fp = (FILE*)arg; fprintf(fp, " (0x%x) ", flags); diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c index 8e3aec50..bf263e0d 100644 --- a/ip/xfrm_policy.c +++ b/ip/xfrm_policy.c @@ -896,7 +896,7 @@ static int xfrm_policy_list_or_deleteall(int argc, char **argv, int deleteall) exit(0); } -int print_spdinfo( struct nlmsghdr *n, void *arg) +static int print_spdinfo( struct nlmsghdr *n, void *arg) { FILE *fp = (FILE*)arg; __u32 *f = NLMSG_DATA(n); diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index 0d98e787..35957bdd 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -1053,7 +1053,7 @@ static int xfrm_state_list_or_deleteall(int argc, char **argv, int deleteall) exit(0); } -int print_sadinfo(struct nlmsghdr *n, void *arg) +static int print_sadinfo(struct nlmsghdr *n, void *arg) { FILE *fp = (FILE*)arg; __u32 *f = NLMSG_DATA(n); diff --git a/lib/Makefile b/lib/Makefile index bfbe6725..a42b8859 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,3 +1,5 @@ +include ../Config + CFLAGS += -fPIC UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o diff --git a/lib/inet_proto.c b/lib/inet_proto.c index a55e0e7b..57a83511 100644 --- a/lib/inet_proto.c +++ b/lib/inet_proto.c @@ -20,9 +20,10 @@ #include #include +#include "rt_names.h" #include "utils.h" -char *inet_proto_n2a(int proto, char *buf, int len) +const char *inet_proto_n2a(int proto, char *buf, int len) { static char ncache[16]; static int icache = -1; @@ -42,7 +43,7 @@ char *inet_proto_n2a(int proto, char *buf, int len) return buf; } -int inet_proto_a2n(char *buf) +int inet_proto_a2n(const char *buf) { static char ncache[16]; static int icache = -1; diff --git a/lib/ipx_ntop.c b/lib/ipx_ntop.c index 7b6d728d..1e46bc21 100644 --- a/lib/ipx_ntop.c +++ b/lib/ipx_ntop.c @@ -1,5 +1,6 @@ #include #include +#include #include #include "utils.h" diff --git a/lib/ipx_pton.c b/lib/ipx_pton.c index 1a52b7f1..3dca2713 100644 --- a/lib/ipx_pton.c +++ b/lib/ipx_pton.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include "utils.h" diff --git a/lib/ll_addr.c b/lib/ll_addr.c index f558050e..c12ab075 100644 --- a/lib/ll_addr.c +++ b/lib/ll_addr.c @@ -57,7 +57,7 @@ const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int } /*NB: lladdr is char * (rather than u8 *) because sa_data is char * (1003.1g) */ -int ll_addr_a2n(char *lladdr, int len, char *arg) +int ll_addr_a2n(char *lladdr, int len, const char *arg) { if (strchr(arg, '.')) { inet_prefix pfx; diff --git a/lib/ll_proto.c b/lib/ll_proto.c index 3337b146..3aa82525 100644 --- a/lib/ll_proto.c +++ b/lib/ll_proto.c @@ -100,10 +100,10 @@ const char * ll_proto_n2a(unsigned short id, char *buf, int len) return buf; } -int ll_proto_a2n(unsigned short *id, char *buf) +int ll_proto_a2n(unsigned short *id, const char *buf) { int i; - for (i=0; i=256) { - snprintf(buf, len, "%d", id); + snprintf(buf, len, "%u", id); return buf; } if (!rtnl_rtprot_tab[id]) { @@ -149,11 +148,11 @@ char * rtnl_rtprot_n2a(int id, char *buf, int len) } if (rtnl_rtprot_tab[id]) return rtnl_rtprot_tab[id]; - snprintf(buf, len, "%d", id); + snprintf(buf, len, "%u", id); return buf; } -int rtnl_rtprot_a2n(__u32 *id, char *arg) +int rtnl_rtprot_a2n(__u32 *id, const char *arg) { static char *cache = NULL; static unsigned long res; @@ -186,7 +185,6 @@ int rtnl_rtprot_a2n(__u32 *id, char *arg) } - static char * rtnl_rtscope_tab[256] = { "global", }; @@ -204,7 +202,7 @@ static void rtnl_rtscope_initialize(void) rtnl_rtscope_tab, 256); } -char * rtnl_rtscope_n2a(int id, char *buf, int len) +const char *rtnl_rtscope_n2a(int id, char *buf, int len) { if (id<0 || id>=256) { snprintf(buf, len, "%d", id); @@ -220,9 +218,9 @@ char * rtnl_rtscope_n2a(int id, char *buf, int len) return buf; } -int rtnl_rtscope_a2n(__u32 *id, char *arg) +int rtnl_rtscope_a2n(__u32 *id, const char *arg) { - static char *cache = NULL; + static const char *cache = NULL; static unsigned long res; char *end; int i; @@ -253,7 +251,6 @@ int rtnl_rtscope_a2n(__u32 *id, char *arg) } - static char * rtnl_rtrealm_tab[256] = { "unknown", }; @@ -267,7 +264,7 @@ static void rtnl_rtrealm_initialize(void) rtnl_rtrealm_tab, 256); } -char * rtnl_rtrealm_n2a(int id, char *buf, int len) +const char *rtnl_rtrealm_n2a(int id, char *buf, int len) { if (id<0 || id>=256) { snprintf(buf, len, "%d", id); @@ -284,7 +281,7 @@ char * rtnl_rtrealm_n2a(int id, char *buf, int len) } -int rtnl_rtrealm_a2n(__u32 *id, char *arg) +int rtnl_rtrealm_a2n(__u32 *id, const char *arg) { static char *cache = NULL; static unsigned long res; @@ -336,7 +333,7 @@ static void rtnl_rttable_initialize(void) rtnl_rttable_hash, 256); } -char * rtnl_rttable_n2a(__u32 id, char *buf, int len) +const char * rtnl_rttable_n2a(__u32 id, char *buf, int len) { struct rtnl_hash_entry *entry; @@ -355,9 +352,9 @@ char * rtnl_rttable_n2a(__u32 id, char *buf, int len) return buf; } -int rtnl_rttable_a2n(__u32 *id, char *arg) +int rtnl_rttable_a2n(__u32 *id, const char *arg) { - static char *cache = NULL; + static const char *cache = NULL; static unsigned long res; struct rtnl_hash_entry *entry; char *end; @@ -404,7 +401,7 @@ static void rtnl_rtdsfield_initialize(void) rtnl_rtdsfield_tab, 256); } -char * rtnl_dsfield_n2a(int id, char *buf, int len) +const char *rtnl_dsfield_n2a(int id, char *buf, int len) { if (id<0 || id>=256) { snprintf(buf, len, "%d", id); @@ -421,7 +418,7 @@ char * rtnl_dsfield_n2a(int id, char *buf, int len) } -int rtnl_dsfield_a2n(__u32 *id, char *arg) +int rtnl_dsfield_a2n(__u32 *id, const char *arg) { static char *cache = NULL; static unsigned long res; @@ -469,9 +466,9 @@ static void rtnl_group_initialize(void) rtnl_group_hash, 256); } -int rtnl_group_a2n(int *id, char *arg) +int rtnl_group_a2n(int *id, const char *arg) { - static char *cache = NULL; + static const char *cache = NULL; static unsigned long res; struct rtnl_hash_entry *entry; char *end; diff --git a/lib/utils.c b/lib/utils.c index 7ecaab3c..5bcdbcfb 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/man/Makefile b/man/Makefile index 67fea056..749faa11 100644 --- a/man/Makefile +++ b/man/Makefile @@ -2,19 +2,13 @@ INSTALL=install INSTALLDIR=install -m 0755 -d INSTALLMAN=install -m 0644 -SUBDIRS = man3 man8 +SUBDIRS = man3 man7 man8 -all: - @for subdir in $(SUBDIRS); do $(MAKE) -C $$subdir; done +all clean install: + @for subdir in $(SUBDIRS); do $(MAKE) -C $$subdir $@ || exit $$?; done distclean: clean -clean: - @for subdir in $(SUBDIRS); do $(MAKE) -C $$subdir clean; done - -install: - @for subdir in $(SUBDIRS); do $(MAKE) -C $$subdir install; done - .PHONY: install clean distclean .EXPORT_ALL_VARIABLES: diff --git a/man/man3/libnetlink.3 b/man/man3/libnetlink.3 index 15a478a7..e999bd68 100644 --- a/man/man3/libnetlink.3 +++ b/man/man3/libnetlink.3 @@ -100,7 +100,7 @@ in case of error. NULL for means to not use a filter. .B junk is used to filter messages not destined to the local socket. -Only one message bundle is received. Unless there is no message +Only one message bundle is received. If there is a message pending, this function does not block. .TP @@ -112,7 +112,7 @@ is a callback that gets the message source address, the message itself, and the .B jarg cookie as arguments. It will get called for all received messages. -Only one message bundle is received. Unless there is no message +Only one message bundle is received. If there is a message pending this function does not block. .TP @@ -123,7 +123,7 @@ but reads a netlink message bundle from the file .B file and passes the messages to .B handler -for parsing. The file contains raw data as received from a rtnetlink socket. +for parsing. The file should contain raw data as received from a rtnetlink socket. .PP The following functions are useful to construct custom rtnetlink messages. For simple database dumping with filtering it is better to use the higher level @@ -183,6 +183,8 @@ Initialize the rtnetlink attribute with a variable length data value. .SH BUGS +This library is meant for internal use, use libmnl for new programs. + The functions sometimes use fprintf and exit when a fatal error occurs. This library should be named librtnetlink. diff --git a/man/man7/Makefile b/man/man7/Makefile new file mode 100644 index 00000000..ccfd8398 --- /dev/null +++ b/man/man7/Makefile @@ -0,0 +1,13 @@ +MAN7PAGES = tc-hfsc.7 + +all: + +distclean: clean + +clean: + +install: + $(INSTALLDIR) $(DESTDIR)$(MANDIR)/man7 + $(INSTALLMAN) $(MAN7PAGES) $(DESTDIR)$(MANDIR)/man7 + +.PHONY: install clean distclean diff --git a/man/man7/tc-hfsc.7 b/man/man7/tc-hfsc.7 index d4e63f27..ca049619 100644 --- a/man/man7/tc-hfsc.7 +++ b/man/man7/tc-hfsc.7 @@ -4,13 +4,12 @@ tc-hfcs \- Hierarchical Fair Service Curve . .SH "HISTORY & INTRODUCTION" . -HFSC \- \fBHierarchical Fair Service Curve\fR was first presented at +HFSC (Hierarchical Fair Service Curve) is a network packet scheduling algorithm that was first presented at SIGCOMM'97. Developed as a part of ALTQ (ALTernative Queuing) on NetBSD, found its way quickly to other BSD systems, and then a few years ago became part of the linux kernel. Still, it's not the most popular scheduling algorithm \- -especially if compared to HTB \- and it's not well documented from enduser's -perspective. This introduction aims to explain how HFSC works without -going to deep into math side of things (although some if it will be +especially if compared to HTB \- and it's not well documented for the enduser. This introduction aims to explain how HFSC works without using +too much math (although some math it will be inevitable). In short HFSC aims to: @@ -30,10 +29,10 @@ service provided during linksharing . The main "selling" point of HFSC is feature \fB(1)\fR, which is achieved by using nonlinear service curves (more about what it actually is later). This is -particularly useful in VoIP or games, where not only guarantee of consistent -bandwidth is important, but initial delay of a data stream as well. Note that +particularly useful in VoIP or games, where not only a guarantee of consistent +bandwidth is important, but also limiting the initial delay of a data stream. Note that it matters only for leaf classes (where the actual queues are) \- thus class -hierarchy is ignored in realtime case. +hierarchy is ignored in the realtime case. Feature \fB(2)\fR is well, obvious \- any algorithm featuring class hierarchy (such as HTB or CBQ) strives to achieve that. HFSC does that well, although @@ -44,8 +43,8 @@ Feature \fB(3)\fR is mentioned due to the nature of the problem. There may be situations where it's either not possible to guarantee service of all curves at the same time, and/or it's impossible to do so fairly. Both will be explained later. Note that this is mainly related to interior (aka aggregate) classes, as -the leafs are already handled by \fB(1)\fR. Still \- it's perfectly possible to -create a leaf class w/o realtime service, and in such case \- the caveats will +the leafs are already handled by \fB(1)\fR. Still, it's perfectly possible to +create a leaf class without realtime service, and in such a case the caveats will naturally extend to leaf classes as well. .SH ABBREVIATIONS @@ -62,21 +61,22 @@ SC \- service curve .SH "BASICS OF HFSC" . To understand how HFSC works, we must first introduce a service curve. -Overall, it's a nondecreasing function of some time unit, returning amount of -service (allowed or allocated amount of bandwidth) by some specific point in -time. The purpose of it should be subconsciously obvious \- if a class was -allowed to transfer not less than the amount specified by its service curve \- -then service curve is not violated. +Overall, it's a nondecreasing function of some time unit, returning the amount +of +service (an allowed or allocated amount of bandwidth) at some specific point in +time. The purpose of it should be subconsciously obvious: if a class was +allowed to transfer not less than the amount specified by its service curve, +then the service curve is not violated. -Still \- we need more elaborate criterion than just the above (although in -most generic case it can be reduced to it). The criterion has to take two +Still, we need more elaborate criterion than just the above (although in +the most generic case it can be reduced to it). The criterion has to take two things into account: . .RS 4 .IP \(bu 4 idling periods .IP \(bu -ability to "look back", so if during current active period service curve is violated, maybe it +the ability to "look back", so if during current active period the service curve is violated, maybe it isn't if we count excess bandwidth received during earlier active period(s) .RE .PP @@ -102,9 +102,9 @@ as in (a), but with a larger gap .RE . .PP -Consider \fB(a)\fR \- if the service received during both periods meets -\fB(1)\fR, then all is good. But what if it doesn't do so during the 2nd -period ? If the amount of service received during the 1st period is bigger +Consider \fB(a)\fR: if the service received during both periods meets +\fB(1)\fR, then all is well. But what if it doesn't do so during the 2nd +period? If the amount of service received during the 1st period is larger than the service curve, then it might compensate for smaller service during the 2nd period \fIand\fR the gap \- if the gap is small enough. @@ -172,42 +172,43 @@ curves and the above "utility" functions. .SH "REALTIME CRITERION" . RT criterion \fIignores class hierarchy\fR and guarantees precise bandwidth and -delay allocation. We say that packet is eligible for sending, when current real -time is bigger than eligible time. From all packets eligible, the one most -suited for sending, is the one with the smallest deadline time. Sounds simply, -but consider following example: +delay allocation. We say that a packet is eligible for sending, when the +current real +time is later than the eligible time of the packet. From all eligible packets, the one most +suited for sending is the one with the shortest deadline time. This sounds +simple, but consider the following example: -Interface 10mbit, two classes, both with two\-piece linear service curves: +Interface 10Mbit, two classes, both with two\-piece linear service curves: .RS 4 .IP \(bu 4 -1st class \- 2mbit for 100ms, then 7mbit (convex \- 1st slope < 2nd slope) +1st class \- 2Mbit for 100ms, then 7Mbit (convex \- 1st slope < 2nd slope) .IP \(bu -2nd class \- 7mbit for 100ms, then 2mbit (concave \- 1st slope > 2nd slope) +2nd class \- 7Mbit for 100ms, then 2Mbit (concave \- 1st slope > 2nd slope) .RE .PP Assume for a moment, that we only use D() for both finding eligible packets, and choosing the most fitting one, thus eligible time would be computed as D^(\-1)(w) and deadline time would be computed as D^(\-1)(w+l). If the 2nd class starts sending packets 1 second after the 1st class, it's of course -impossible to guarantee 14mbit, as the interface capability is only 10mbit. +impossible to guarantee 14Mbit, as the interface capability is only 10Mbit. The only workaround in this scenario is to allow the 1st class to send the packets earlier that would normally be allowed. That's where separate E() comes to help. Putting all the math aside (see HFSC paper for details), E() for RT concave service curve is just like D(), but for the RT convex service curve \- it's constructed using \fIonly\fR RT service curve's 2nd slope (in our example -\- 7mbit). + 7Mbit). The effect of such E() \- packets will be sent earlier, and at the same time -D() \fIwill\fR be updated \- so current deadline time calculated from it will -be bigger. Thus, when the 2nd class starts sending packets later, both the 1st -and the 2nd class will be eligible, but the 2nd session's deadline time will be -smaller and its packets will be sent first. When the 1st class becomes idle at -some later point, the 2nd class will be able to "buffer" up again for later -active period of the 1st class. +D() \fIwill\fR be updated \- so the current deadline time calculated from it +will be later. Thus, when the 2nd class starts sending packets later, both +the 1st and the 2nd class will be eligible, but the 2nd session's deadline +time will be smaller and its packets will be sent first. When the 1st class +becomes idle at some later point, the 2nd class will be able to "buffer" up +again for later active period of the 1st class. A short remark \- in a situation, where the total amount of bandwidth -available on the interface is bigger than the allocated total realtime parts -(imagine interface 10 mbit, but 1mbit/2mbit and 2mbit/1mbit classes), the sole +available on the interface is larger than the allocated total realtime parts +(imagine a 10 Mbit interface, but 1Mbit/2Mbit and 2Mbit/1Mbit classes), the sole speed of the interface could suffice to guarantee the times. Important part of RT criterion is that apart from updating its D() and E(), @@ -233,18 +234,18 @@ real time and virtual time \- the decision is based solely on direct comparison of virtual times of all active subclasses \- the one with the smallest vt wins and gets scheduled. One immediate conclusion from this fact is that absolute values don't matter \- only ratios between them (so for example, two children -classes with simple linear 1mbit service curves will get the same treatment -from LS criterion's perspective, as if they were 5mbit). The other conclusion +classes with simple linear 1Mbit service curves will get the same treatment +from LS criterion's perspective, as if they were 5Mbit). The other conclusion is, that in perfectly fluid system with linear curves, all virtual times across whole class hierarchy would be equal. -Why is VC defined in term of virtual time (and what is it) ? +Why is VC defined in term of virtual time (and what is it)? Imagine an example: class A with two children \- A1 and A2, both with let's say -10mbit SCs. If A2 is idle, A1 receives all the bandwidth of A (and update its +10Mbit SCs. If A2 is idle, A1 receives all the bandwidth of A (and update its V() in the process). When A2 becomes active, A1's virtual time is already -\fIfar\fR bigger than A2's one. Considering the type of decision made by LS -criterion, A1 would become idle for a lot of time. We can workaround this +\fIfar\fR later than A2's one. Considering the type of decision made by LS +criterion, A1 would become idle for a long time. We can workaround this situation by adjusting virtual time of the class becoming active \- we do that by getting such time "up to date". HFSC uses a mean of the smallest and the biggest virtual time of currently active children fit for sending. As it's not @@ -259,20 +260,20 @@ either it's impossible to guarantee service curves and satisfy fairness during certain time periods: .RS 4 -Recall the example from RT section, slightly modified (with 3mbit slopes -instead of 2mbit ones): +Recall the example from RT section, slightly modified (with 3Mbit slopes +instead of 2Mbit ones): .IP \(bu 4 -1st class \- 3mbit for 100ms, then 7mbit (convex \- 1st slope < 2nd slope) +1st class \- 3Mbit for 100ms, then 7Mbit (convex \- 1st slope < 2nd slope) .IP \(bu -2nd class \- 7mbit for 100ms, then 3mbit (concave \- 1st slope > 2nd slope) +2nd class \- 7Mbit for 100ms, then 3Mbit (concave \- 1st slope > 2nd slope) .PP -They sum up nicely to 10mbit \- interface's capacity. But if we wanted to only +They sum up nicely to 10Mbit \- the interface's capacity. But if we wanted to only use LS for guarantees and fairness \- it simply won't work. In LS context, only V() is used for making decision which class to schedule. If the 2nd class becomes active when the 1st one is in its second slope, the fairness will be -preserved \- ratio will be 1:1 (7mbit:7mbit), but LS itself is of course +preserved \- ratio will be 1:1 (7Mbit:7Mbit), but LS itself is of course unable to guarantee the absolute values themselves \- as it would have to go beyond of what the interface is capable of. .RE @@ -287,28 +288,28 @@ This is similar to the above case, but a bit more subtle. We will consider two subtrees, arbitrated by their common (root here) parent: .nf -R (root) -\ 10mbit +R (root) -\ 10Mbit -A \- 7mbit, then 3mbit -A1 \- 5mbit, then 2mbit -A2 \- 2mbit, then 1mbit +A \- 7Mbit, then 3Mbit +A1 \- 5Mbit, then 2Mbit +A2 \- 2Mbit, then 1Mbit -B \- 3mbit, then 7mbit +B \- 3Mbit, then 7Mbit .fi R arbitrates between left subtree (A) and right (B). Assume that A2 and B are constantly backlogged, and at some later point A1 becomes backlogged (when all other classes are in their 2nd linear part). -What happens now ? B (choice made by R) will \fIalways\fR get 7 mbit as R is +What happens now? B (choice made by R) will \fIalways\fR get 7 Mbit as R is only (obviously) concerned with the ratio between its direct children. Thus A -subtree gets 3mbit, but its children would want (at the point when A1 became -backlogged) 5mbit + 1mbit. That's of course impossible, as they can only get -3mbit due to interface limitation. +subtree gets 3Mbit, but its children would want (at the point when A1 became +backlogged) 5Mbit + 1Mbit. That's of course impossible, as they can only get +3Mbit due to interface limitation. In the left subtree \- we have the same situation as previously (fair split between A1 and A2, but violated guarantees), but in the whole tree \- there's -no fairness (B got 7mbit, but A1 and A2 have to fit together in 3mbit) and +no fairness (B got 7Mbit, but A1 and A2 have to fit together in 3Mbit) and there's no guarantees for all classes (only B got what it wanted). Even if we violated fairness in the A subtree and set A2's service curve to 0, A1 would still not get the required bandwidth. @@ -317,83 +318,83 @@ still not get the required bandwidth. .SH "UPPERLIMIT CRITERION" . UL criterion is an extensions to LS one, that permits sending packets only -if current real time is bigger than fit\-time ('ft'). So the modified LS +if current real time is later than fit\-time ('ft'). So the modified LS criterion becomes: choose the smallest virtual time from all active children, such that fit\-time < current real time also holds. Fit\-time is calculated from F(), which is based on UL service curve. As you can see, its role is kinda similar to E() used in RT criterion. Also, for obvious reasons \- you can't specify UL service curve without LS one. -Main purpose of UL service curve is to limit HFSC to bandwidth available on the +The main purpose of the UL service curve is to limit HFSC to bandwidth available on the upstream router (think adsl home modem/router, and linux server as -nat/firewall/etc. with 100mbit+ connection to mentioned modem/router). +NAT/firewall/etc. with 100Mbit+ connection to mentioned modem/router). Typically, it's used to create a single class directly under root, setting -linear UL service curve to available bandwidth \- and then creating your class -structure from that class downwards. Of course, you're free to add UL service -(linear or not) curve to any class with LS criterion. +a linear UL service curve to available bandwidth \- and then creating your class +structure from that class downwards. Of course, you're free to add a UL service +curve (linear or not) to any class with LS criterion. -Important part about UL service curve is, that whenever at some point in time +An important part about the UL service curve is that whenever at some point in time a class doesn't qualify for linksharing due to its fit\-time, the next time it -does qualify, it will update its virtual time to the smallest virtual time of -all active children fit for linksharing. This way, one of the main things LS +does qualify it will update its virtual time to the smallest virtual time of +all active children fit for linksharing. This way, one of the main things the LS criterion tries to achieve \- equality of all virtual times across whole hierarchy \- is preserved (in perfectly fluid system with only linear curves, all virtual times would be equal). Without that, 'vt' would lag behind other virtual times, and could cause -problems. Consider interface with capacity 10mbit, and following leaf classes +problems. Consider an interface with a capacity of 10Mbit, and the following leaf classes (just in case you're skipping this text quickly \- this example shows behavior that \f(BIdoesn't happen\fR): .nf -A \- ls 5.0mbit -B \- ls 2.5mbit -C \- ls 2.5mbit, ul 2.5mbit +A \- ls 5.0Mbit +B \- ls 2.5Mbit +C \- ls 2.5Mbit, ul 2.5Mbit .fi -If B was idle, while A and C were constantly backlogged, they would normally +If B was idle, while A and C were constantly backlogged, A and C would normally (as far as LS criterion is concerned) divide bandwidth in 2:1 ratio. But due -to UL service curve in place, C would get at most 2.5mbit, and A would get the -remaining 7.5mbit. The longer the backlogged period, the more virtual times of +to UL service curve in place, C would get at most 2.5Mbit, and A would get the +remaining 7.5Mbit. The longer the backlogged period, the more the virtual times of A and C would drift apart. If B became backlogged at some later point in time, its virtual time would be set to (A's\~vt\~+\~C's\~vt)/2, thus blocking A from -sending any traffic, until B's virtual time catches up with A. +sending any traffic until B's virtual time catches up with A. . .SH "SEPARATE LS / RT SCs" . -Another difference from original HFSC paper, is that RT and LS SCs can be -specified separately. Moreover \- leaf classes are allowed to have only either -RT SC or LS SC. For interior classes, only LS SCs make sense \- Any RT SC will +Another difference from the original HFSC paper is that RT and LS SCs can be +specified separately. Moreover, leaf classes are allowed to have only either +RT SC or LS SC. For interior classes, only LS SCs make sense: any RT SC will be ignored. . .SH "CORNER CASES" . -Separate service curves for LS and RT criteria can lead to certain traps, +Separate service curves for LS and RT criteria can lead to certain traps that come from "fighting" between ideal linksharing and enforced realtime guarantees. Those situations didn't exist in original HFSC paper, where specifying separate LS / RT service curves was not discussed. -Consider interface with capacity 10mbit, with following leaf classes: +Consider an interface with a 10Mbit capacity, with the following leaf classes: .nf -A \- ls 5.0mbit, rt 8mbit -B \- ls 2.5mbit -C \- ls 2.5mbit +A \- ls 5.0Mbit, rt 8Mbit +B \- ls 2.5Mbit +C \- ls 2.5Mbit .fi Imagine A and C are constantly backlogged. As B is idle, A and C would divide bandwidth in 2:1 ratio, considering LS service curve (so in theory \- 6.66 and -3.33). Alas RT criterion takes priority, so A will get 8mbit and LS will be -able to compensate class C for only 2 mbit \- this will cause discrepancy +3.33). Alas RT criterion takes priority, so A will get 8Mbit and LS will be +able to compensate class C for only 2 Mbit \- this will cause discrepancy between virtual times of A and C. -Assume this situation lasts for a lot of time with no idle periods, and +Assume this situation lasts for a long time with no idle periods, and suddenly B becomes active. B's virtual time will be updated to (A's\~vt\~+\~C's\~vt)/2, effectively landing in the middle between A's and C's virtual time. The effect \- B, having no RT guarantees, will be punished and will not be allowed to transfer until C's virtual time catches up. -If the interface had higher capacity \- for example 100mbit, this example +If the interface had a higher capacity, for example 100Mbit, this example would behave perfectly fine though. Let's look a bit closer at the above example \- it "cleverly" invalidates one @@ -401,8 +402,8 @@ of the basic things LS criterion tries to achieve \- equality of all virtual times across class hierarchy. Leaf classes without RT service curves are literally left to their own fate (governed by messed up virtual times). -Also - it doesn't make much sense. Class A will always be guaranteed up to -8mbit, and this is more than any absolute bandwidth that could happen from its +Also, it doesn't make much sense. Class A will always be guaranteed up to +8Mbit, and this is more than any absolute bandwidth that could happen from its LS criterion (excluding trivial case of only A being active). If the bandwidth taken by A is smaller than absolute value from LS criterion, the unused part will be automatically assigned to other active classes (as A has idling periods @@ -411,7 +412,7 @@ average, bursts would be handled at the speed defined by RT criterion. Still, if extra speed is needed (e.g. due to latency), non linear service curves should be used in such case. -In the other words - LS criterion is meaningless in the above example. +In the other words: the LS criterion is meaningless in the above example. You can quickly "workaround" it by making sure each leaf class has RT service curve assigned (thus guaranteeing all of them will get some bandwidth), but it @@ -422,13 +423,13 @@ happen \fIonly\fR in the first segment, then there's little wrong with "overusing" RT curve a bit: .nf -A \- ls 5.0mbit, rt 9mbit/30ms, then 1mbit -B \- ls 2.5mbit -C \- ls 2.5mbit +A \- ls 5.0Mbit, rt 9Mbit/30ms, then 1Mbit +B \- ls 2.5Mbit +C \- ls 2.5Mbit .fi Here, the vt of A will "spike" in the initial period, but then A will never get more -than 1mbit, until B & C catch up. Then everything will be back to normal. +than 1Mbit until B & C catch up. Then everything will be back to normal. . .SH "LINUX AND TIMER RESOLUTION" . @@ -457,43 +458,43 @@ or aren't available. This is important to keep those settings in mind, as in scenario like: no tickless, no HR timers, frequency set to 100hz \- throttling accuracy would be -at 10ms. It doesn't automatically mean you would be limited to ~0.8mbit/s +at 10ms. It doesn't automatically mean you would be limited to ~0.8Mbit/s (assuming packets at ~1KB) \- as long as your queues are prepared to cover for -timer inaccuracy. Of course, in case of e.g. locally generated udp traffic \- +timer inaccuracy. Of course, in case of e.g. locally generated UDP traffic \- appropriate socket size is needed as well. Short example to make it more understandable (assume hardcore anti\-schedule settings \- HZ=100, no HR timers, no tickless): .nf tc qdisc add dev eth0 root handle 1:0 hfsc default 1 -tc class add dev eth0 parent 1:0 classid 1:1 hfsc rt m2 10mbit +tc class add dev eth0 parent 1:0 classid 1:1 hfsc rt m2 10Mbit .fi -Assuming packet of ~1KB size and HZ=100, that averages to ~0.8mbit \- anything -beyond it (e.g. the above example with specified rate over 10x bigger) will +Assuming packet of ~1KB size and HZ=100, that averages to ~0.8Mbit \- anything +beyond it (e.g. the above example with specified rate over 10x larger) will require appropriate queuing and cause bursts every ~10 ms. As you can imagine, any HFSC's RT guarantees will be seriously invalidated by that. Aforementioned example is mainly important if you deal with old hardware \- as -it's particularly popular for home server chores. Even then, you can easily +is particularly popular for home server chores. Even then, you can easily set HZ=1000 and have very accurate scheduling for typical adsl speeds. Anything modern (apic or even hpet msi based timers + \&'tickless system') -will provide enough accuracy for superb 1gbit scheduling. For example, on one -of basically cheap dual core AMD boards I have with following settings: +will provide enough accuracy for superb 1Gbit scheduling. For example, on one +of my cheap dual-core AMD boards I have the following settings: .nf tc qdisc add dev eth0 parent root handle 1:0 hfsc default 1 -tc class add dev eth0 paretn 1:0 classid 1:1 hfsc rt m2 300mbit +tc class add dev eth0 parent 1:0 classid 1:1 hfsc rt m2 300mbit .fi -And simple: +And a simple: .nf nc \-u dst.host.com 54321 /dev/null .fi -\&...will yield following effects over period of ~10 seconds (taken from +\&...will yield the following effects over a period of ~10 seconds (taken from /proc/interrupts): .nf @@ -502,16 +503,16 @@ nc \-l \-p 54321 >/dev/null .fi That's roughly 31000/s. Now compare it with HZ=1000 setting. The obvious -drawback of it is that cpu load can be rather extensive with servicing that -many timer interrupts. Example with 300mbit RT service curve on 1gbit link is +drawback of it is that cpu load can be rather high with servicing that +many timer interrupts. The example with 300Mbit RT service curve on 1Gbit link is particularly ugly, as it requires a lot of throttling with minuscule delays. -Also note that it's just an example showing capability of current hardware. -The above example (essentially 300mbit TBF emulator) is pointless on internal -interface to begin with \- you will pretty much always want regular LS service -curve there, and in such scenario HFSC simply doesn't throttle at all. +Also note that it's just an example showing the capabilities of current hardware. +The above example (essentially a 300Mbit TBF emulator) is pointless on an internal +interface to begin with: you will pretty much always want a regular LS service +curve there, and in such a scenario HFSC simply doesn't throttle at all. -300mbit RT service curve (selected columns from mpstat \-P ALL 1): +300Mbit RT service curve (selected columns from mpstat \-P ALL 1): .nf 10:56:43 PM CPU %sys %irq %soft %idle @@ -520,28 +521,28 @@ curve there, and in such scenario HFSC simply doesn't throttle at all. 10:56:44 PM 1 4.95 12.87 6.93 73.27 .fi -So, in rare case you need those speeds with only RT service curve, or with UL -service curve \- remember about drawbacks. +So, in the rare case you need those speeds with only a RT service curve, or with a UL +service curve: remember the drawbacks. . .SH "CAVEAT: RANDOM ONLINE EXAMPLES" . For reasons unknown (though well guessed), many examples you can google love to overuse UL criterion and stuff it in every node possible. This makes no sense and works against what HFSC tries to do (and does pretty damn well). Use UL -where it makes sense - on the uppermost node to match upstream router's uplink -capacity. Or - in special cases, such as testing (limit certain subtree to some -speed) or customers that must never get more than certain speed. In the last -case you can usually achieve the same by just using RT criterion without LS+UL +where it makes sense: on the uppermost node to match upstream router's uplink +capacity. Or in special cases, such as testing (limit certain subtree to some +speed), or customers that must never get more than certain speed. In the last +case you can usually achieve the same by just using a RT criterion without LS+UL on leaf nodes. -As for router case - remember it's good to differentiate between "traffic to +As for the router case - remember it's good to differentiate between "traffic to router" (remote console, web config, etc.) and "outgoing traffic", so for example: .nf tc qdisc add dev eth0 root handle 1:0 hfsc default 0x8002 -tc class add dev eth0 parent 1:0 classid 1:999 hfsc rt m2 50mbit -tc class add dev eth0 parent 1:0 classid 1:1 hfsc ls m2 2mbit ul m2 2mbit +tc class add dev eth0 parent 1:0 classid 1:999 hfsc rt m2 50Mbit +tc class add dev eth0 parent 1:0 classid 1:1 hfsc ls m2 2Mbit ul m2 2Mbit .fi \&... so "internet" tree under 1:1 and "router itself" as 1:999 diff --git a/man/man8/Makefile b/man/man8/Makefile index 4bad9d63..d208f3b0 100644 --- a/man/man8/Makefile +++ b/man/man8/Makefile @@ -9,7 +9,7 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 ss.8 \ ip-addrlabel.8 ip-l2tp.8 \ ip-maddress.8 ip-monitor.8 ip-mroute.8 ip-neighbour.8 \ ip-netns.8 ip-ntable.8 ip-rule.8 ip-tunnel.8 ip-xfrm.8 \ - ip-tcp_metrics.8 + ip-tcp_metrics.8 ip-netconf.8 all: $(TARGETS) diff --git a/man/man8/arpd.8 b/man/man8/arpd.8 index a14044b4..6b9a43a9 100644 --- a/man/man8/arpd.8 +++ b/man/man8/arpd.8 @@ -4,12 +4,12 @@ arpd \- userspace arp daemon. .SH SYNOPSIS -Usage: arpd [ -lkh? ] [ -a N ] [ -b dbase ] [ -B number ] [ -f file ] [-p interval ] [ -n time ] [ -R rate ] [ interfaces ] +Usage: arpd [ -lkh? ] [ -a N ] [ -b dbase ] [ -B number ] [ -f file ] [-p interval ] [ -n time ] [ -R rate ] [ ] .SH DESCRIPTION The .B arpd -daemon collects gratuitous ARP information, saving it on local disk and feeding it to kernel on demand to avoid redundant broadcasting due to limited size of kernel ARP cache. +daemon collects gratuitous ARP information, saving it on local disk and feeding it to the kernel on demand to avoid redundant broadcasting due to limited size of the kernel ARP cache. .SH OPTIONS .TP @@ -17,41 +17,41 @@ daemon collects gratuitous ARP information, saving it on local disk and feeding Print help .TP -l -Dump arpd database to stdout and exit. Output consists of three columns: interface index, IP address and MAC address. Negative entries for dead hosts are also shown, in this case MAC address is replaced by word FAILED followed by colon and time when the fact that host is dead was proven the last time. +Dump the arpd database to stdout and exit. The output consists of three columns: the interface index, the IP address of the interface, and the MAC address of the interface. Negative entries for dead hosts are also shown, in this case the MAC address is replaced by the word FAILED followed by a colon and the most recent time when the fact that the host is dead was proven. .TP -f -Read and load arpd database from FILE in text format similar dumped by option -l. Exit after load, probably listing resulting database, if option -l is also given. If FILE is -, stdin is read to get ARP table. +Read and load an arpd database from FILE in a text format similar to that dumped by option -l. Exit after load, possibly listing resulting database, if option -l is also given. If FILE is -, stdin is read to get the ARP table. .TP -b -location of database file. Default location is /var/lib/arpd/arpd.db +the location of the database file. The default location is /var/lib/arpd/arpd.db .TP -a -arpd not only passively listens ARP on wire, but also send brodcast queries itself. NUMBER is number of such queries to make before destination is considered as dead. When arpd is started as kernel helper (i.e. with app_solicit enabled in sysctl or even with option -k) without this option and still did not learn enough information, you can observe 1 second gaps in service. Not fatal, but not good. +With this option, arpd not only passively listens for ARP packets on the interface, but also sends brodcast queries itself. NUMBER is the number of such queries to make before a destination is considered dead. When arpd is started as kernel helper (i.e. with app_solicit enabled in sysctl or even with option -k) without this option and still did not learn enough information, you can observe 1 second gaps in service. Not fatal, but not good. .TP -k -Suppress sending broadcast queries by kernel. It takes sense together with option -a. +Suppress sending broadcast queries by the kernel. This option only makes sense together with option -a. .TP -n