From 1f01dd89f533b5637c83ddbfd483c165411b5cde Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Feb 2015 16:58:45 -0800 Subject: [PATCH 001/141] update headers to 3.20-rc1 Add net_namespace.h and update other headers --- include/linux/fou.h | 1 + include/linux/if_link.h | 1 + include/linux/net_namespace.h | 23 +++++++++++++++++++++++ include/linux/pkt_sched.h | 2 ++ 4 files changed, 27 insertions(+) create mode 100644 include/linux/net_namespace.h diff --git a/include/linux/fou.h b/include/linux/fou.h index 8e638059..13a78e41 100644 --- a/include/linux/fou.h +++ b/include/linux/fou.h @@ -14,6 +14,7 @@ enum { FOU_ATTR_AF, /* u8 */ FOU_ATTR_IPPROTO, /* u8 */ FOU_ATTR_TYPE, /* u8 */ + FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */ __FOU_ATTR_MAX, }; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index ac64724c..3450c3fb 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -372,6 +372,7 @@ enum { IFLA_VXLAN_REMCSUM_TX, IFLA_VXLAN_REMCSUM_RX, IFLA_VXLAN_GBP, + IFLA_VXLAN_REMCSUM_NOPARTIAL, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/linux/net_namespace.h b/include/linux/net_namespace.h new file mode 100644 index 00000000..9a92b7e1 --- /dev/null +++ b/include/linux/net_namespace.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2015 6WIND S.A. + * Author: Nicolas Dichtel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ +#ifndef _LINUX_NET_NAMESPACE_H_ +#define _LINUX_NET_NAMESPACE_H_ + +/* Attributes of RTM_NEWNSID/RTM_GETNSID messages */ +enum { + NETNSA_NONE, +#define NETNSA_NSID_NOT_ASSIGNED -1 + NETNSA_NSID, + NETNSA_PID, + NETNSA_FD, + __NETNSA_MAX, +}; + +#define NETNSA_MAX (__NETNSA_MAX - 1) + +#endif /* _LINUX_NET_NAMESPACE_H_ */ diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d62316ba..534b8471 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -774,6 +774,8 @@ enum { TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + __TCA_FQ_MAX }; From 1527a17ed88fa4d1778f71e066bca1d68d5eba0a Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Fri, 13 Feb 2015 13:01:08 +0200 Subject: [PATCH 002/141] ss: Fix filter expression parser Seems expression parser did not work correctly some long time and such simple things did not work too: # ss -a '( sport = :ssh )' Signed-off-by: Vadim Kochan --- misc/ss.c | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 0a6a65ee..eb4e0ec0 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -240,10 +240,11 @@ static void filter_db_set(struct filter *f, int db) static void filter_af_set(struct filter *f, int af) { - f->dbs |= default_afs[af].dbs; - f->states |= default_afs[af].states; - f->families |= 1 << af; - do_default = 0; + f->dbs |= default_afs[af].dbs; + f->states |= default_afs[af].states; + f->families |= 1 << af; + do_default = 0; + preferred_family = af; } static int filter_af_get(struct filter *f, int af) @@ -1316,15 +1317,12 @@ static int xll_name_to_index(const char *dev) void *parse_hostcond(char *addr) { char *port = NULL; - struct aafilter a; + struct aafilter a = { .port = -1 }; struct aafilter *res; - int fam = 0; + int fam = preferred_family; struct filter *f = ¤t_filter; - memset(&a, 0, sizeof(a)); - a.port = -1; - - if (filter_af_get(f, AF_UNIX) || strncmp(addr, "unix:", 5) == 0) { + if (fam == AF_UNIX || strncmp(addr, "unix:", 5) == 0) { char *p; a.addr.family = AF_UNIX; if (strncmp(addr, "unix:", 5) == 0) @@ -1336,7 +1334,7 @@ void *parse_hostcond(char *addr) goto out; } - if (filter_af_get(f, AF_PACKET) || strncmp(addr, "link:", 5) == 0) { + if (fam == AF_PACKET || strncmp(addr, "link:", 5) == 0) { a.addr.family = AF_PACKET; a.addr.bitlen = 0; if (strncmp(addr, "link:", 5) == 0) @@ -1362,7 +1360,7 @@ void *parse_hostcond(char *addr) goto out; } - if (filter_af_get(f, AF_NETLINK) || strncmp(addr, "netlink:", 8) == 0) { + if (fam == AF_NETLINK || strncmp(addr, "netlink:", 8) == 0) { a.addr.family = AF_NETLINK; a.addr.bitlen = 0; if (strncmp(addr, "netlink:", 8) == 0) @@ -1388,12 +1386,14 @@ void *parse_hostcond(char *addr) goto out; } - if (filter_af_get(f, AF_INET) || !strncmp(addr, "inet:", 5)) { - addr += 5; + if (fam == AF_INET || !strncmp(addr, "inet:", 5)) { fam = AF_INET; - } else if (filter_af_get(f, AF_INET6) || !strncmp(addr, "inet6:", 6)) { - addr += 6; + if (!strncmp(addr, "inet:", 5)) + addr += 5; + } else if (fam == AF_INET6 || !strncmp(addr, "inet6:", 6)) { fam = AF_INET6; + if (!strncmp(addr, "inet6:", 6)) + addr += 6; } /* URL-like literal [] */ @@ -1461,8 +1461,11 @@ void *parse_hostcond(char *addr) } out: - if (fam) + if (fam != AF_UNSPEC) { + f->families = 0; filter_af_set(f, fam); + filter_merge(f, f, 0); + } res = malloc(sizeof(*res)); if (res) @@ -2212,6 +2215,9 @@ static int tcp_show(struct filter *f, int socktype) char *buf = NULL; int bufsize = 64*1024; + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + dg_proto = TCP_PROTO; if (getenv("TCPDIAG_FILE")) @@ -2331,6 +2337,9 @@ static int udp_show(struct filter *f) { FILE *fp = NULL; + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + dg_proto = UDP_PROTO; if (!getenv("PROC_NET_UDP") && !getenv("PROC_ROOT") @@ -2367,6 +2376,9 @@ static int raw_show(struct filter *f) { FILE *fp = NULL; + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + dg_proto = RAW_PROTO; if (f->families&(1< Date: Fri, 13 Feb 2015 22:13:58 +0200 Subject: [PATCH 003/141] ss: Split tcpstap struct to sockstat & tcpstat Signed-off-by: Vadim Kochan --- misc/ss.c | 154 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 68 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index eb4e0ec0..922cf8c6 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -691,16 +691,7 @@ static const char *sstate_namel[] = { [SS_CLOSING] = "closing", }; -struct dctcpstat -{ - unsigned int ce_state; - unsigned int alpha; - unsigned int ab_ecn; - unsigned int ab_tot; - bool enabled; -}; - -struct tcpstat +struct sockstat { inet_prefix local; inet_prefix remote; @@ -713,6 +704,20 @@ struct tcpstat int refcnt; unsigned int iface; unsigned long long sk; +}; + +struct dctcpstat +{ + unsigned int ce_state; + unsigned int alpha; + unsigned int ab_ecn; + unsigned int ab_tot; + bool enabled; +}; + +struct tcpstat +{ + struct sockstat ss; int timer; int timeout; int probes; @@ -1000,7 +1005,7 @@ static int unix_match(const inet_prefix *a, const inet_prefix *p) return !fnmatch(pattern, addr, 0); } -static int run_ssfilter(struct ssfilter *f, struct tcpstat *s) +static int run_ssfilter(struct ssfilter *f, struct sockstat *s) { switch (f->type) { case SSF_S_AUTO: @@ -1487,7 +1492,7 @@ static char *proto_name(int protocol) return "???"; } -static void inet_stats_print(struct tcpstat *s, int protocol) +static void inet_stats_print(struct sockstat *s, int protocol) { char *buf = NULL; @@ -1501,17 +1506,6 @@ static void inet_stats_print(struct tcpstat *s, int protocol) formatted_print(&s->local, s->lport, s->iface); formatted_print(&s->remote, s->rport, 0); - if (show_options) { - if (s->timer) { - if (s->timer > 4) - s->timer = 5; - printf(" timer:(%s,%s,%d)", - tmr_name[s->timer], - print_ms_timer(s->timeout), - s->retrans); - } - } - if (show_proc_ctx || show_sock_ctx) { if (find_entry(s->ino, &buf, (show_proc_ctx & show_sock_ctx) ? @@ -1527,7 +1521,8 @@ static void inet_stats_print(struct tcpstat *s, int protocol) } } -static int proc_parse_inet_addr(char *loc, char *rem, int family, struct tcpstat *s) +static int proc_parse_inet_addr(char *loc, char *rem, int family, struct + sockstat *s) { s->local.family = s->remote.family = family; if (family == AF_INET) { @@ -1658,7 +1653,7 @@ static void tcp_stats_print(struct tcpstat *s) printf(" retrans:%u/%u", s->retrans, s->retrans_total); if (s->lost) printf(" lost:%u", s->lost); - if (s->sacked && s->state != SS_LISTEN) + if (s->sacked && s->ss.state != SS_LISTEN) printf(" sacked:%u", s->sacked); if (s->fackets) printf(" fackets:%u", s->fackets); @@ -1670,6 +1665,18 @@ static void tcp_stats_print(struct tcpstat *s) printf(" rcv_space:%d", s->rcv_space); } +static void tcp_timer_print(struct tcpstat *s) +{ + if (s->timer) { + if (s->timer > 4) + s->timer = 5; + printf(" timer:(%s,%s,%d)", + tmr_name[s->timer], + print_ms_timer(s->timeout), + s->retrans); + } +} + static int tcp_show_line(char *line, const struct filter *f, int family) { int rto = 0, ato = 0; @@ -1686,17 +1693,17 @@ static int tcp_show_line(char *line, const struct filter *f, int family) if (!(f->states & (1 << state))) return 0; - proc_parse_inet_addr(loc, rem, family, &s); + proc_parse_inet_addr(loc, rem, family, &s.ss); - if (f->f && run_ssfilter(f->f, &s) == 0) + if (f->f && run_ssfilter(f->f, &s.ss) == 0) return 0; opt[0] = 0; n = sscanf(data, "%x %x:%x %x:%x %x %d %d %u %d %llx %d %d %d %d %d %[^\n]\n", - &s.state, &s.wq, &s.rq, - &s.timer, &s.timeout, &s.retrans, &s.uid, &s.probes, &s.ino, - &s.refcnt, &s.sk, &rto, &ato, &s.qack, - &s.cwnd, &s.ssthresh, opt); + &s.ss.state, &s.ss.wq, &s.ss.rq, + &s.timer, &s.timeout, &s.retrans, &s.ss.uid, &s.probes, + &s.ss.ino, &s.ss.refcnt, &s.ss.sk, &rto, &ato, &s.qack, &s.cwnd, + &s.ssthresh, opt); if (n < 17) opt[0] = 0; @@ -1716,13 +1723,16 @@ static int tcp_show_line(char *line, const struct filter *f, int family) s.ssthresh = s.ssthresh == -1 ? 0 : s.ssthresh; s.rto = s.rto != 3 * hz ? s.rto / hz : 0; - inet_stats_print(&s, IPPROTO_TCP); + inet_stats_print(&s.ss, IPPROTO_TCP); + + if (show_options) + tcp_timer_print(&s); if (show_details) { - if (s.uid) - printf(" uid:%u", (unsigned)s.uid); - printf(" ino:%u", s.ino); - printf(" sk:%llx", s.sk); + if (s.ss.uid) + printf(" uid:%u", (unsigned)s.ss.uid); + printf(" ino:%u", s.ss.ino); + printf(" sk:%llx", s.ss.sk); if (opt[0]) printf(" opt:\"%s\"", opt); } @@ -1807,6 +1817,8 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, double rtt = 0; struct tcpstat s = {}; + s.ss.state = r->idiag_state; + print_skmeminfo(tb, INET_DIAG_SKMEMINFO); if (tb[INET_DIAG_INFO]) { @@ -1916,7 +1928,7 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) { struct rtattr * tb[INET_DIAG_MAX+1]; struct inet_diag_msg *r = NLMSG_DATA(nlh); - struct tcpstat s = {}; + struct sockstat s = {}; parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr*)(r+1), nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); @@ -1927,9 +1939,6 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) s.rport = ntohs(r->id.idiag_dport); s.wq = r->idiag_wqueue; s.rq = r->idiag_rqueue; - s.timer = r->idiag_timer; - s.timeout = r->idiag_expires; - s.retrans = r->idiag_retrans; s.ino = r->idiag_inode; s.uid = r->idiag_uid; s.iface = r->id.idiag_if; @@ -1948,6 +1957,15 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) inet_stats_print(&s, protocol); + if (show_options) { + struct tcpstat t = {}; + + t.timer = r->idiag_timer; + t.timeout = r->idiag_expires; + t.retrans = r->idiag_retrans; + tcp_timer_print(&t); + } + if (show_details) { if (r->idiag_uid) printf(" uid:%u", (unsigned)r->idiag_uid); @@ -2292,7 +2310,7 @@ outerr: static int dgram_show_line(char *line, const struct filter *f, int family) { - struct tcpstat s = {}; + struct sockstat s = {}; char *loc, *rem, *data; char opt[256]; int n; @@ -2495,15 +2513,15 @@ static void unix_stats_print(struct unixstat *list, struct filter *f) } if (f->f) { - struct tcpstat tst; - tst.local.family = AF_UNIX; - tst.remote.family = AF_UNIX; - memcpy(tst.local.data, &s->name, sizeof(s->name)); + struct sockstat st; + st.local.family = AF_UNIX; + st.remote.family = AF_UNIX; + memcpy(st.local.data, &s->name, sizeof(s->name)); if (strcmp(peer, "*") == 0) - memset(tst.remote.data, 0, sizeof(peer)); + memset(st.remote.data, 0, sizeof(peer)); else - memcpy(tst.remote.data, &peer, sizeof(peer)); - if (run_ssfilter(f->f, &tst) == 0) + memcpy(st.remote.data, &peer, sizeof(peer)); + if (run_ssfilter(f->f, &st) == 0) continue; } @@ -2728,14 +2746,14 @@ static int packet_stats_print(struct pktstat *s, const struct filter *f) char *buf = NULL; if (f->f) { - struct tcpstat tst; - tst.local.family = AF_PACKET; - tst.remote.family = AF_PACKET; - tst.rport = 0; - tst.lport = s->iface; - tst.local.data[0] = s->prot; - tst.remote.data[0] = 0; - if (run_ssfilter(f->f, &tst) == 0) + struct sockstat st; + st.local.family = AF_PACKET; + st.remote.family = AF_PACKET; + st.rport = 0; + st.lport = s->iface; + st.local.data[0] = s->prot; + st.remote.data[0] = 0; + if (run_ssfilter(f->f, &st) == 0) return 1; } @@ -2911,14 +2929,14 @@ static void netlink_show_one(struct filter *f, SPRINT_BUF(prot_name); if (f->f) { - struct tcpstat tst; - tst.local.family = AF_NETLINK; - tst.remote.family = AF_NETLINK; - tst.rport = -1; - tst.lport = pid; - tst.local.data[0] = prot; - tst.remote.data[0] = 0; - if (run_ssfilter(f->f, &tst) == 0) + struct sockstat st; + st.local.family = AF_NETLINK; + st.remote.family = AF_NETLINK; + st.rport = -1; + st.lport = pid; + st.local.data[0] = prot; + st.remote.data[0] = 0; + if (run_ssfilter(f->f, &st) == 0) return; } @@ -3129,7 +3147,7 @@ static int get_snmp_int(char *proto, char *key, int *result) /* Get stats from sockstat */ -struct sockstat +struct ssummary { int socks; int tcp_mem; @@ -3148,7 +3166,7 @@ struct sockstat int frag6_mem; }; -static void get_sockstat_line(char *line, struct sockstat *s) +static void get_sockstat_line(char *line, struct ssummary *s) { char id[256], rem[256]; @@ -3177,7 +3195,7 @@ static void get_sockstat_line(char *line, struct sockstat *s) &s->tcp_orphans, &s->tcp_tws, &s->tcp_total, &s->tcp_mem); } -static int get_sockstat(struct sockstat *s) +static int get_sockstat(struct ssummary *s) { char buf[256]; FILE *fp; @@ -3201,7 +3219,7 @@ static int get_sockstat(struct sockstat *s) static int print_summary(void) { - struct sockstat s; + struct ssummary s; struct snmpstat sn; if (get_sockstat(&s) < 0) From 89f634f9177b5f228d78298e2e42be7a65808af8 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Fri, 13 Feb 2015 22:13:59 +0200 Subject: [PATCH 004/141] ss: Replace pktstat struct by new sockstat struct Signed-off-by: Vadim Kochan --- misc/ss.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 922cf8c6..4a7cbef1 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -693,6 +693,8 @@ static const char *sstate_namel[] = { struct sockstat { + uint8_t type; + uint16_t prot; inet_prefix local; inet_prefix remote; int lport; @@ -2731,29 +2733,18 @@ static int unix_show(struct filter *f) return 0; } -struct pktstat { - uint8_t type; - uint16_t prot; - uint32_t iface; - int state; - uint32_t rq; - uid_t uid; - ino_t ino; -}; - -static int packet_stats_print(struct pktstat *s, const struct filter *f) +static int packet_stats_print(struct sockstat *s, const struct filter *f) { char *buf = NULL; if (f->f) { - struct sockstat st; - st.local.family = AF_PACKET; - st.remote.family = AF_PACKET; - st.rport = 0; - st.lport = s->iface; - st.local.data[0] = s->prot; - st.remote.data[0] = 0; - if (run_ssfilter(f->f, &st) == 0) + s->local.family = AF_PACKET; + s->remote.family = AF_PACKET; + s->rport = 0; + s->lport = s->iface; + s->local.data[0] = s->prot; + s->remote.data[0] = 0; + if (run_ssfilter(f->f, s) == 0) return 1; } @@ -2802,7 +2793,7 @@ static int packet_show_sock(const struct sockaddr_nl *addr, const struct filter *f = arg; struct packet_diag_msg *r = NLMSG_DATA(nlh); struct rtattr *tb[PACKET_DIAG_MAX+1]; - struct pktstat stat = {}; + struct sockstat stat = {}; parse_rtattr(tb, PACKET_DIAG_MAX, (struct rtattr*)(r+1), nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); @@ -2871,7 +2862,7 @@ static int packet_show_netlink(struct filter *f) static int packet_show_line(char *buf, const struct filter *f, int fam) { unsigned long long sk; - struct pktstat stat = {}; + struct sockstat stat = {}; int type, prot, iface, state, rq, uid, ino; sscanf(buf, "%llx %*d %d %x %d %d %u %u %u", From ec4d0d8a9def3523af615f1b48fb55a247ee921d Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Fri, 13 Feb 2015 22:14:00 +0200 Subject: [PATCH 005/141] ss: Replace unixstat struct by new sockstat struct Signed-off-by: Vadim Kochan --- misc/ss.c | 141 +++++++++++++++++++++++++++++------------------------- 1 file changed, 75 insertions(+), 66 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 4a7cbef1..03692a5d 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -661,6 +661,18 @@ static int get_slabstat(struct slabstat *s) return 0; } +static inline void sock_addr_set_str(inet_prefix *prefix, char **ptr) +{ + memcpy(prefix->data, ptr, sizeof(char *)); +} + +static inline char *sock_addr_get_str(const inet_prefix *prefix) +{ + char *tmp ; + memcpy(&tmp, prefix->data, sizeof(char *)); + return tmp; +} + static const char *sstate_name[] = { "UNKNOWN", [SS_ESTABLISHED] = "ESTAB", @@ -693,7 +705,8 @@ static const char *sstate_namel[] = { struct sockstat { - uint8_t type; + struct sockstat *next; + unsigned int type; uint16_t prot; inet_prefix local; inet_prefix remote; @@ -997,9 +1010,9 @@ static int inet2_addr_match(const inet_prefix *a, const inet_prefix *p, static int unix_match(const inet_prefix *a, const inet_prefix *p) { - char *addr, *pattern; - memcpy(&addr, a->data, sizeof(addr)); - memcpy(&pattern, p->data, sizeof(pattern)); + char *addr = sock_addr_get_str(a); + char *pattern = sock_addr_get_str(p); + if (pattern == NULL) return 1; if (addr == NULL) @@ -1015,8 +1028,7 @@ static int run_ssfilter(struct ssfilter *f, struct sockstat *s) static int low, high=65535; if (s->local.family == AF_UNIX) { - char *p; - memcpy(&p, s->local.data, sizeof(p)); + char *p = sock_addr_get_str(&s->local); return p == NULL || (p[0] == '@' && strlen(p) == 6 && strspn(p+1, "0123456789abcdef") == 5); } @@ -1336,7 +1348,7 @@ void *parse_hostcond(char *addr) addr+=5; p = strdup(addr); a.addr.bitlen = 8*strlen(p); - memcpy(a.addr.data, &p, sizeof(p)); + sock_addr_set_str(&a.addr, &p); fam = AF_UNIX; goto out; } @@ -2427,31 +2439,21 @@ outerr: } while (0); } -struct unixstat -{ - struct unixstat *next; - int ino; - int peer; - char *peer_name; - int rq; - int wq; - int state; - int type; - char *name; -}; - int unix_state_map[] = { SS_CLOSE, SS_SYN_SENT, SS_ESTABLISHED, SS_CLOSING }; -#define MAX_UNIX_REMEMBER (1024*1024/sizeof(struct unixstat)) +#define MAX_UNIX_REMEMBER (1024*1024/sizeof(struct sockstat)) -static void unix_list_free(struct unixstat *list) +static void unix_list_free(struct sockstat *list) { while (list) { - struct unixstat *s = list; + struct sockstat *s = list; + char *name = sock_addr_get_str(&s->local); + list = list->next; - if (s->name) - free(s->name); + + if (name) + free(name); free(s); } } @@ -2475,7 +2477,7 @@ static const char *unix_netid_name(int type) return netid; } -static bool unix_type_skip(struct unixstat *s, struct filter *f) +static bool unix_type_skip(struct sockstat *s, struct filter *f) { if (s->type == SOCK_STREAM && !(f->dbs&(1<next) { - if (!(f->states & (1<state))) + if (!(f->states & (1 << s->state))) continue; if (unix_type_skip(s, f)) continue; - peer = "*"; - if (s->peer_name) - peer = s->peer_name; + local = sock_addr_get_str(&s->local); + peer = "*"; + + if (s->rport && use_proc) { + struct sockstat *p; - if (s->peer && !s->peer_name) { - struct unixstat *p; for (p = list; p; p = p->next) { - if (s->peer == p->ino) + if (s->rport == p->lport) break; } + if (!p) { peer = "?"; } else { - peer = p->name ? : "*"; + peer = sock_addr_get_str(&p->local); + peer = peer ? : "*"; } } if (f->f) { - struct sockstat st; - st.local.family = AF_UNIX; - st.remote.family = AF_UNIX; - memcpy(st.local.data, &s->name, sizeof(s->name)); if (strcmp(peer, "*") == 0) - memset(st.remote.data, 0, sizeof(peer)); + memset(s->remote.data, 0, sizeof(char *)); else - memcpy(st.remote.data, &peer, sizeof(peer)); - if (run_ssfilter(f->f, &st) == 0) + sock_addr_set_str(&s->remote, &peer); + + if (run_ssfilter(f->f, s) == 0) continue; } @@ -2534,8 +2541,8 @@ static void unix_stats_print(struct unixstat *list, struct filter *f) printf("%-*s ", state_width, sstate_name[s->state]); printf("%-6d %-6d ", s->rq, s->wq); printf("%*s %-*d %*s %-*d", - addr_width, s->name ? : "*", serv_width, s->ino, - addr_width, peer, serv_width, s->peer); + addr_width, local ? : "*", serv_width, + s->lport, addr_width, peer, serv_width, s->rport); char *buf = NULL; if (show_proc_ctx || show_sock_ctx) { @@ -2561,15 +2568,16 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh, struct filter *f = (struct filter *)arg; struct unix_diag_msg *r = NLMSG_DATA(nlh); struct rtattr *tb[UNIX_DIAG_MAX+1]; - char name[128]; - struct unixstat stat = { .name = "*" , .peer_name = "*" }; + char *name = NULL; + struct sockstat stat = {}; parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr*)(r+1), nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); stat.type = r->udiag_type; stat.state = r->udiag_state; - stat.ino = r->udiag_ino; + stat.ino = stat.lport = r->udiag_ino; + stat.local.family = stat.remote.family = AF_UNIX; if (unix_type_skip(&stat, f)) return 0; @@ -2582,14 +2590,15 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh, if (tb[UNIX_DIAG_NAME]) { int len = RTA_PAYLOAD(tb[UNIX_DIAG_NAME]); + name = malloc(len + 1); memcpy(name, RTA_DATA(tb[UNIX_DIAG_NAME]), len); name[len] = '\0'; if (name[0] == '\0') name[0] = '@'; - stat.name = &name[0]; + sock_addr_set_str(&stat.local, &name); } if (tb[UNIX_DIAG_PEER]) - stat.peer = rta_getattr_u32(tb[UNIX_DIAG_PEER]); + stat.rport = rta_getattr_u32(tb[UNIX_DIAG_PEER]); unix_stats_print(&stat, f); @@ -2606,6 +2615,9 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh, } if (show_mem || show_details) printf("\n"); + + if (name) + free(name); return 0; } @@ -2652,13 +2664,12 @@ static int unix_show(struct filter *f) char name[128]; int newformat = 0; int cnt; - struct unixstat *list = NULL; + struct sockstat *list = NULL; if (!filter_af_get(f, AF_UNIX)) return 0; - if (!getenv("PROC_NET_UNIX") && !getenv("PROC_ROOT") - && unix_show_netlink(f) == 0) + if (!unix_use_proc() && unix_show_netlink(f) == 0) return 0; if ((fp = net_unix_open()) == NULL) @@ -2670,31 +2681,30 @@ static int unix_show(struct filter *f) cnt = 0; while (fgets(buf, sizeof(buf)-1, fp)) { - struct unixstat *u, **insp; + struct sockstat *u, **insp; int flags; if (!(u = malloc(sizeof(*u)))) break; - u->name = NULL; - u->peer_name = NULL; if (sscanf(buf, "%x: %x %x %x %x %x %d %s", - &u->peer, &u->rq, &u->wq, &flags, &u->type, + &u->rport, &u->rq, &u->wq, &flags, &u->type, &u->state, &u->ino, name) < 8) name[0] = 0; - if (flags&(1<<16)) { + u->lport = u->ino; + u->local.family = u->remote.family = AF_UNIX; + + if (flags & (1 << 16)) { u->state = SS_LISTEN; } else { u->state = unix_state_map[u->state-1]; - if (u->type == SOCK_DGRAM && - u->state == SS_CLOSE && - u->peer) + if (u->type == SOCK_DGRAM && u->state == SS_CLOSE && u->rport) u->state = SS_ESTABLISHED; } if (!newformat) { - u->peer = 0; + u->rport = 0; u->rq = 0; u->wq = 0; } @@ -2711,9 +2721,8 @@ static int unix_show(struct filter *f) *insp = u; if (name[0]) { - if ((u->name = malloc(strlen(name)+1)) == NULL) - break; - strcpy(u->name, name); + char *tmp = strdup(name); + sock_addr_set_str(&u->local, &tmp); } if (++cnt > MAX_UNIX_REMEMBER) { unix_stats_print(list, f); From 2d791bc87c1243819521010cd0b4d3cbe0308236 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Fri, 13 Feb 2015 22:14:01 +0200 Subject: [PATCH 006/141] ss: Unify state socket output:netid, state, rq, wq Signed-off-by: Vadim Kochan --- misc/ss.c | 61 +++++++++++++++++++++++++------------------------------ 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 03692a5d..c370d56d 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -766,6 +766,16 @@ struct tcpstat struct dctcpstat *dctcp; }; +static void sock_state_print(struct sockstat *s, const char *sock_name) +{ + if (netid_width) + printf("%-*s ", netid_width, sock_name); + if (state_width) + printf("%-*s ", state_width, sstate_name[s->state]); + + printf("%-6d %-6d ", s->rq, s->wq); +} + static const char *tmr_name[] = { "off", "on", @@ -1510,12 +1520,7 @@ static void inet_stats_print(struct sockstat *s, int protocol) { char *buf = NULL; - if (netid_width) - printf("%-*s ", netid_width, proto_name(protocol)); - if (state_width) - printf("%-*s ", state_width, sstate_name[s->state]); - - printf("%-6d %-6d ", s->rq, s->wq); + sock_state_print(s, proto_name(protocol)); formatted_print(&s->local, s->lport, s->iface); formatted_print(&s->remote, s->rport, 0); @@ -2534,12 +2539,8 @@ static void unix_stats_print(struct sockstat *list, struct filter *f) continue; } - if (netid_width) - printf("%-*s ", netid_width, - unix_netid_name(s->type)); - if (state_width) - printf("%-*s ", state_width, sstate_name[s->state]); - printf("%-6d %-6d ", s->rq, s->wq); + sock_state_print(s, unix_netid_name(s->type)); + printf("%*s %-*d %*s %-*d", addr_width, local ? : "*", serv_width, s->lport, addr_width, peer, serv_width, s->rport); @@ -2749,21 +2750,13 @@ static int packet_stats_print(struct sockstat *s, const struct filter *f) if (f->f) { s->local.family = AF_PACKET; s->remote.family = AF_PACKET; - s->rport = 0; - s->lport = s->iface; s->local.data[0] = s->prot; - s->remote.data[0] = 0; if (run_ssfilter(f->f, s) == 0) return 1; } - if (netid_width) - printf("%-*s ", netid_width, - s->type == SOCK_RAW ? "p_raw" : "p_dgr"); - if (state_width) - printf("%-*s ", state_width, "UNCONN"); + sock_state_print(s, s->type == SOCK_RAW ? "p_raw" : "p_dgr"); - printf("%-6d %-6d ", s->rq, 0); if (s->prot == 3) { printf("%*s:", addr_width, "*"); } else { @@ -2811,9 +2804,10 @@ static int packet_show_sock(const struct sockaddr_nl *addr, if (!tb[PACKET_DIAG_MEMINFO]) return -1; - stat.type = r->pdiag_type; - stat.prot = r->pdiag_num; - stat.ino = r->pdiag_ino; + stat.type = r->pdiag_type; + stat.prot = r->pdiag_num; + stat.ino = r->pdiag_ino; + stat.state = SS_CLOSE; if (tb[PACKET_DIAG_MEMINFO]) { __u32 *skmeminfo = RTA_DATA(tb[PACKET_DIAG_MEMINFO]); @@ -2822,7 +2816,7 @@ static int packet_show_sock(const struct sockaddr_nl *addr, if (tb[PACKET_DIAG_INFO]) { struct packet_diag_info *pinfo = RTA_DATA(tb[PACKET_DIAG_INFO]); - stat.iface = pinfo->pdi_index; + stat.lport = stat.iface = pinfo->pdi_index; } if (packet_stats_print(&stat, f)) @@ -2886,11 +2880,13 @@ static int packet_show_line(char *buf, const struct filter *f, int fam) stat.type = type; stat.prot = prot; - stat.iface = iface; + stat.lport = stat.iface = iface; stat.state = state; stat.rq = rq; stat.uid = uid; stat.ino = ino; + stat.state = SS_CLOSE; + if (packet_stats_print(&stat, f)) return 0; @@ -2926,25 +2922,24 @@ static void netlink_show_one(struct filter *f, int rq, int wq, unsigned long long sk, unsigned long long cb) { + struct sockstat st; SPRINT_BUF(prot_name); + st.state = SS_CLOSE; + st.rq = rq; + st.wq = wq; + if (f->f) { - struct sockstat st; st.local.family = AF_NETLINK; st.remote.family = AF_NETLINK; st.rport = -1; st.lport = pid; st.local.data[0] = prot; - st.remote.data[0] = 0; if (run_ssfilter(f->f, &st) == 0) return; } - if (netid_width) - printf("%-*s ", netid_width, "nl"); - if (state_width) - printf("%-*s ", state_width, "UNCONN"); - printf("%-6d %-6d ", rq, wq); + sock_state_print(&st, "nl"); if (resolve_services) { printf("%*s:", addr_width, nl_proto_n2a(prot, prot_name, From f1b39e1bd6681af208d428e02a43ff1df3b5fb16 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Fri, 13 Feb 2015 22:14:02 +0200 Subject: [PATCH 007/141] ss: Unify details info output:ino,uid,sk Signed-off-by: Vadim Kochan --- misc/ss.c | 78 +++++++++++++++++++++++++------------------------------ 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index c370d56d..4d80806f 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -673,6 +673,11 @@ static inline char *sock_addr_get_str(const inet_prefix *prefix) return tmp; } +static unsigned long cookie_sk_get(uint32_t *cookie) +{ + return (((unsigned long)cookie[1] << 31) << 1) | cookie[0]; +} + static const char *sstate_name[] = { "UNKNOWN", [SS_ESTABLISHED] = "ESTAB", @@ -776,6 +781,15 @@ static void sock_state_print(struct sockstat *s, const char *sock_name) printf("%-6d %-6d ", s->rq, s->wq); } +static void sock_details_print(struct sockstat *s) +{ + if (s->uid) + printf(" uid:%u", s->uid); + + printf(" ino:%u", s->ino); + printf(" sk:%llx", s->sk); +} + static const char *tmr_name[] = { "off", "on", @@ -1748,10 +1762,7 @@ static int tcp_show_line(char *line, const struct filter *f, int family) tcp_timer_print(&s); if (show_details) { - if (s.ss.uid) - printf(" uid:%u", (unsigned)s.ss.uid); - printf(" ino:%u", s.ss.ino); - printf(" sk:%llx", s.ss.sk); + sock_details_print(&s.ss); if (opt[0]) printf(" opt:\"%s\"", opt); } @@ -1952,15 +1963,16 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr*)(r+1), nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); - s.state = r->idiag_state; - s.local.family = s.remote.family = r->idiag_family; - s.lport = ntohs(r->id.idiag_sport); - s.rport = ntohs(r->id.idiag_dport); - s.wq = r->idiag_wqueue; - s.rq = r->idiag_rqueue; - s.ino = r->idiag_inode; - s.uid = r->idiag_uid; - s.iface = r->id.idiag_if; + s.state = r->idiag_state; + s.local.family = s.remote.family = r->idiag_family; + s.lport = ntohs(r->id.idiag_sport); + s.rport = ntohs(r->id.idiag_dport); + s.wq = r->idiag_wqueue; + s.rq = r->idiag_rqueue; + s.ino = r->idiag_inode; + s.uid = r->idiag_uid; + s.iface = r->id.idiag_if; + s.sk = cookie_sk_get(&r->id.idiag_cookie[0]); if (s.local.family == AF_INET) { s.local.bytelen = s.remote.bytelen = 4; @@ -1986,13 +1998,7 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol) } if (show_details) { - if (r->idiag_uid) - printf(" uid:%u", (unsigned)r->idiag_uid); - printf(" ino:%u", r->idiag_inode); - printf(" sk:"); - if (r->id.idiag_cookie[1] != 0) - printf("%08x", r->id.idiag_cookie[1]); - printf("%08x", r->id.idiag_cookie[0]); + sock_details_print(&s); if (tb[INET_DIAG_SHUTDOWN]) { unsigned char mask; mask = *(__u8 *)RTA_DATA(tb[INET_DIAG_SHUTDOWN]); @@ -2357,14 +2363,8 @@ static int dgram_show_line(char *line, const struct filter *f, int family) inet_stats_print(&s, IPPROTO_UDP); - if (show_details) { - if (s.uid) - printf(" uid=%u", (unsigned)s.uid); - printf(" ino=%u", s.ino); - printf(" sk=%llx", s.sk); - if (opt[0]) - printf(" opt:\"%s\"", opt); - } + if (show_details && opt[0]) + printf(" opt:\"%s\"", opt); printf("\n"); return 0; @@ -2786,6 +2786,9 @@ static int packet_stats_print(struct sockstat *s, const struct filter *f) } } + if (show_details) + sock_details_print(s); + return 0; } @@ -2808,6 +2811,7 @@ static int packet_show_sock(const struct sockaddr_nl *addr, stat.prot = r->pdiag_num; stat.ino = r->pdiag_ino; stat.state = SS_CLOSE; + stat.sk = cookie_sk_get(&r->pdiag_cookie[0]); if (tb[PACKET_DIAG_MEMINFO]) { __u32 *skmeminfo = RTA_DATA(tb[PACKET_DIAG_MEMINFO]); @@ -2819,21 +2823,12 @@ static int packet_show_sock(const struct sockaddr_nl *addr, stat.lport = stat.iface = pinfo->pdi_index; } + if (tb[PACKET_DIAG_UID]) + stat.uid = *(__u32 *)RTA_DATA(tb[PACKET_DIAG_UID]); + if (packet_stats_print(&stat, f)) return 0; - if (show_details) { - __u32 uid = 0; - - if (tb[PACKET_DIAG_UID]) - uid = *(__u32 *)RTA_DATA(tb[PACKET_DIAG_UID]); - - printf(" ino=%u uid=%u sk=", r->pdiag_ino, uid); - if (r->pdiag_cookie[1] != 0) - printf("%08x", r->pdiag_cookie[1]); - printf("%08x", r->pdiag_cookie[0]); - } - if (show_bpf && tb[PACKET_DIAG_FILTER]) { struct sock_filter *fil = RTA_DATA(tb[PACKET_DIAG_FILTER]); @@ -2890,9 +2885,6 @@ static int packet_show_line(char *buf, const struct filter *f, int fam) if (packet_stats_print(&stat, f)) return 0; - if (show_details) { - printf(" ino=%u uid=%u sk=%llx", ino, uid, sk); - } printf("\n"); return 0; } From b217df108ce9ecfa13c6a8ddfd1e3ff69af9faba Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Fri, 13 Feb 2015 22:14:03 +0200 Subject: [PATCH 008/141] ss: Unify socket address output by one generic func Signed-off-by: Vadim Kochan --- include/utils.h | 2 + lib/utils.c | 6 +++ misc/ss.c | 126 ++++++++++++++++++++++++++++-------------------- 3 files changed, 81 insertions(+), 53 deletions(-) diff --git a/include/utils.h b/include/utils.h index 3da22837..fec9ef4f 100644 --- a/include/utils.h +++ b/include/utils.h @@ -167,4 +167,6 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, extern int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, bool show_label); +char *int_to_str(int val, char *buf); + #endif /* __UTILS_H__ */ diff --git a/lib/utils.c b/lib/utils.c index efebe189..e2b05bc0 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -906,3 +906,9 @@ int do_each_netns(int (*func)(char *nsname, void *arg), void *arg, return netns_foreach(on_netns, &nsf); } + +char *int_to_str(int val, char *buf) +{ + sprintf(buf, "%d", val); + return buf; +} diff --git a/misc/ss.c b/misc/ss.c index 4d80806f..3be5bd1c 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -790,6 +790,24 @@ static void sock_details_print(struct sockstat *s) printf(" sk:%llx", s->sk); } +static void sock_addr_print_width(int addr_len, const char *addr, char *delim, + int port_len, const char *port, const char *ifname) +{ + if (ifname) { + printf("%*s%%%s%s%-*s ", addr_len, addr, ifname, delim, + port_len, port); + } + else { + printf("%*s%s%-*s ", addr_len, addr, delim, port_len, port); + } +} + +static void sock_addr_print(const char *addr, char *delim, const char *port, + const char *ifname) +{ + sock_addr_print_width(addr_width, addr, delim, serv_width, port, ifname); +} + static const char *tmr_name[] = { "off", "on", @@ -972,13 +990,12 @@ static const char *resolve_service(int port) return buf; } -static void formatted_print(const inet_prefix *a, int port, unsigned int ifindex) +static void inet_addr_print(const inet_prefix *a, int port, unsigned int ifindex) { char buf[1024]; const char *ap = buf; - int est_len; - - est_len = addr_width; + int est_len = addr_width; + const char *ifname = NULL; if (a->family == AF_INET) { if (a->data[0] == 0) { @@ -995,14 +1012,14 @@ static void formatted_print(const inet_prefix *a, int port, unsigned int ifindex else est_len = addr_width + ((est_len-addr_width+3)/4)*4; } - if (ifindex) { - const char *ifname = ll_index_to_name(ifindex); - const int len = strlen(ifname) + 1; /* +1 for percent char */ - printf("%*s%%%s:%-*s ", est_len - len, ap, ifname, serv_width, - resolve_service(port)); - } else - printf("%*s:%-*s ", est_len, ap, serv_width, resolve_service(port)); + if (ifindex) { + ifname = ll_index_to_name(ifindex); + est_len -= strlen(ifname) + 1; /* +1 for percent char */ + } + + sock_addr_print_width(est_len, ap, ":", serv_width, resolve_service(port), + ifname); } struct aafilter @@ -1536,8 +1553,8 @@ static void inet_stats_print(struct sockstat *s, int protocol) sock_state_print(s, proto_name(protocol)); - formatted_print(&s->local, s->lport, s->iface); - formatted_print(&s->remote, s->rport, 0); + inet_addr_print(&s->local, s->lport, s->iface); + inet_addr_print(&s->remote, s->rport, 0); if (show_proc_ctx || show_sock_ctx) { if (find_entry(s->ino, &buf, @@ -2502,7 +2519,9 @@ static void unix_stats_print(struct sockstat *list, struct filter *f) { struct sockstat *s; char *local, *peer; + char *ctx_buf = NULL; bool use_proc = unix_use_proc(); + char port_name[30] = {}; for (s = list; s; s = s->next) { if (!(f->states & (1 << s->state))) @@ -2541,22 +2560,22 @@ static void unix_stats_print(struct sockstat *list, struct filter *f) sock_state_print(s, unix_netid_name(s->type)); - printf("%*s %-*d %*s %-*d", - addr_width, local ? : "*", serv_width, - s->lport, addr_width, peer, serv_width, s->rport); - char *buf = NULL; + sock_addr_print(local ?: "*", " ", + int_to_str(s->lport, port_name), NULL); + sock_addr_print(peer, " ", int_to_str(s->rport, port_name), + NULL); if (show_proc_ctx || show_sock_ctx) { - if (find_entry(s->ino, &buf, + if (find_entry(s->ino, &ctx_buf, (show_proc_ctx & show_sock_ctx) ? PROC_SOCK_CTX : PROC_CTX) > 0) { - printf(" users:(%s)", buf); - free(buf); + printf(" users:(%s)", ctx_buf); + free(ctx_buf); } } else if (show_users) { - if (find_entry(s->ino, &buf, USERS) > 0) { - printf(" users:(%s)", buf); - free(buf); + if (find_entry(s->ino, &ctx_buf, USERS) > 0) { + printf(" users:(%s)", ctx_buf); + free(ctx_buf); } } printf("\n"); @@ -2746,6 +2765,8 @@ static int unix_show(struct filter *f) static int packet_stats_print(struct sockstat *s, const struct filter *f) { char *buf = NULL; + const char *addr, *port; + char ll_name[16]; if (f->f) { s->local.family = AF_PACKET; @@ -2757,20 +2778,18 @@ static int packet_stats_print(struct sockstat *s, const struct filter *f) sock_state_print(s, s->type == SOCK_RAW ? "p_raw" : "p_dgr"); - if (s->prot == 3) { - printf("%*s:", addr_width, "*"); - } else { - char tb[16]; - printf("%*s:", addr_width, - ll_proto_n2a(htons(s->prot), tb, sizeof(tb))); - } - if (s->iface == 0) { - printf("%-*s ", serv_width, "*"); - } else { - printf("%-*s ", serv_width, xll_index_to_name(s->iface)); - } + if (s->prot == 3) + addr = "*"; + else + addr = ll_proto_n2a(htons(s->prot), ll_name, sizeof(ll_name)); - printf("%*s*%-*s", addr_width, "", serv_width, ""); + if (s->iface == 0) + port = "*"; + else + port = xll_index_to_name(s->iface); + + sock_addr_print(addr, ":", port, NULL); + sock_addr_print("", "*", "", NULL); if (show_proc_ctx || show_sock_ctx) { if (find_entry(s->ino, &buf, @@ -2915,7 +2934,9 @@ static void netlink_show_one(struct filter *f, unsigned long long sk, unsigned long long cb) { struct sockstat st; - SPRINT_BUF(prot_name); + SPRINT_BUF(prot_buf) = {}; + const char *prot_name; + char procname[64] = {}; st.state = SS_CLOSE; st.rq = rq; @@ -2933,46 +2954,45 @@ static void netlink_show_one(struct filter *f, sock_state_print(&st, "nl"); - if (resolve_services) { - printf("%*s:", addr_width, nl_proto_n2a(prot, prot_name, - sizeof(prot_name))); - } else { - printf("%*d:", addr_width, prot); - } + if (resolve_services) + prot_name = nl_proto_n2a(prot, prot_buf, sizeof(prot_buf)); + else + prot_name = int_to_str(prot, prot_buf); if (pid == -1) { - printf("%-*s ", serv_width, "*"); + procname[0] = '*'; } else if (resolve_services) { int done = 0; if (!pid) { done = 1; - printf("%-*s ", serv_width, "kernel"); + strncpy(procname, "kernel", 6); } else if (pid > 0) { - char procname[64]; FILE *fp; sprintf(procname, "%s/%d/stat", getenv("PROC_ROOT") ? : "/proc", pid); if ((fp = fopen(procname, "r")) != NULL) { if (fscanf(fp, "%*d (%[^)])", procname) == 1) { sprintf(procname+strlen(procname), "/%d", pid); - printf("%-*s ", serv_width, procname); done = 1; } fclose(fp); } } if (!done) - printf("%-*d ", serv_width, pid); + int_to_str(pid, procname); } else { - printf("%-*d ", serv_width, pid); + int_to_str(pid, procname); } + sock_addr_print(prot_name, ":", procname, NULL); + if (state == NETLINK_CONNECTED) { - printf("%*d:%-*d", - addr_width, dst_group, serv_width, dst_pid); + char dst_group_buf[30]; + char dst_pid_buf[30]; + sock_addr_print(int_to_str(dst_group, dst_group_buf), ":", + int_to_str(dst_pid, dst_pid_buf), NULL); } else { - printf("%*s*%-*s", - addr_width, "", serv_width, ""); + sock_addr_print("", "*", "", NULL); } char *pid_context = NULL; From 11ba90fcbddf60ce2c83f9c011fd4676e651acf7 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Fri, 13 Feb 2015 22:14:04 +0200 Subject: [PATCH 009/141] ss: Fixed wrong tcp ato value from netlink Signed-off-by: Vadim Kochan --- misc/ss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/ss.c b/misc/ss.c index 3be5bd1c..a5c482f6 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -1906,7 +1906,7 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, s.backoff = info->tcpi_backoff; s.rtt = (double)info->tcpi_rtt / 1000; s.rttvar = (double)info->tcpi_rttvar / 1000; - s.ato = (double)info->tcpi_rttvar / 1000; + s.ato = (double)info->tcpi_ato / 1000; s.mss = info->tcpi_snd_mss; s.rcv_space = info->tcpi_rcv_space; s.rcv_rtt = (double)info->tcpi_rcv_rtt / 1000; From a221d621bb4af414b974ccc40bba26481337d7cf Mon Sep 17 00:00:00 2001 From: Bryton Lee Date: Thu, 12 Feb 2015 14:16:04 +0800 Subject: [PATCH 010/141] prevent the read ahead of /proc/slabinfo in ss Signed-off-by: Bryton Lee --- misc/ss.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index a5c482f6..2678033f 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -618,7 +618,7 @@ struct slabstat int skbs; }; -struct slabstat slabstat; +static struct slabstat slabstat; static const char *slabstat_ids[] = { @@ -634,6 +634,10 @@ static int get_slabstat(struct slabstat *s) char buf[256]; FILE *fp; int cnt; + static int slabstat_valid; + + if (slabstat_valid) + return 0; memset(s, 0, sizeof(*s)); @@ -657,6 +661,8 @@ static int get_slabstat(struct slabstat *s) break; } + slabstat_valid = 1; + fclose(fp); return 0; } @@ -2297,6 +2303,8 @@ static int tcp_show(struct filter *f, int socktype) * it is able to give us some memory for snapshot. */ if (1) { + get_slabstat(&slabstat); + int guess = slabstat.socks+slabstat.tcp_syns; if (f->states&(1< Date: Sat, 14 Feb 2015 19:45:04 +0200 Subject: [PATCH 011/141] ip xfrm: Allow to specify "all" option for monitor Just to be aligned with the usage output. Signed-off-by: Vadim Kochan --- ip/xfrm_monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c index 79453e41..8aa6f495 100644 --- a/ip/xfrm_monitor.c +++ b/ip/xfrm_monitor.c @@ -374,7 +374,7 @@ int do_xfrm_monitor(int argc, char **argv) groups = 0; } else if (matches(*argv, "help") == 0) { usage(); - } else { + } else if (strcmp(*argv, "all")) { fprintf(stderr, "Argument \"%s\" is unknown, try \"ip xfrm monitor help\".\n", *argv); exit(-1); } From c16298bea095380bb8ef0ceaf13e674f21d49c61 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sat, 14 Feb 2015 20:07:44 +0200 Subject: [PATCH 012/141] ip xfrm mon: Add objects list to the usage output Signed-off-by: Vadim Kochan --- ip/xfrm_monitor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c index 8aa6f495..50116a7b 100644 --- a/ip/xfrm_monitor.c +++ b/ip/xfrm_monitor.c @@ -36,7 +36,8 @@ static void usage(void) __attribute__((noreturn)); static void usage(void) { - fprintf(stderr, "Usage: ip xfrm monitor [ all | LISTofXFRM-OBJECTS ]\n"); + fprintf(stderr, "Usage: ip xfrm monitor [ all | OBJECTS | help ]\n"); + fprintf(stderr, "OBJECTS := { acquire | expire | SA | aevent | policy | report }\n"); exit(-1); } From d182ee1307c7a83b581d8f6d473bbce2004420c0 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 17 Feb 2015 17:30:37 +0100 Subject: [PATCH 013/141] ipnetns: allow to get and set netns ids The kernel now provides ids for peer netns. This patch implements a new command 'set' to assign an id. When netns are listed, if an id is assigned, it is now displayed. Example: $ ip netns add foo $ ip netns set foo 1 $ ip netns foo (id: 1) init_net Signed-off-by: Nicolas Dichtel --- include/libnetlink.h | 8 +++ ip/ipnetns.c | 113 ++++++++++++++++++++++++++++++++++++++++++- man/man8/ip-netns.8 | 14 ++++++ 3 files changed, 134 insertions(+), 1 deletion(-) diff --git a/include/libnetlink.h b/include/libnetlink.h index d081e542..898275b8 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -158,6 +158,14 @@ extern int rtnl_from_file(FILE *, rtnl_filter_t handler, #define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) #endif +#ifndef NETNS_RTA +#define NETNS_RTA(r) \ + ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg)))) +#endif +#ifndef NETNS_PAYLOAD +#define NETNS_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtgenmsg)) +#endif + /* User defined nlmsg_type which is used mostly for logging netlink * messages from dump file */ #define NLMSG_TSTAMP 15 diff --git a/ip/ipnetns.c b/ip/ipnetns.c index e4038ea7..5a213dcf 100644 --- a/ip/ipnetns.c +++ b/ip/ipnetns.c @@ -15,6 +15,8 @@ #include #include +#include + #include "utils.h" #include "ip_common.h" #include "namespace.h" @@ -23,6 +25,7 @@ static int usage(void) { fprintf(stderr, "Usage: ip netns list\n"); fprintf(stderr, " ip netns add NAME\n"); + fprintf(stderr, " ip netns set NAME NETNSID\n"); fprintf(stderr, " ip [-all] netns delete [NAME]\n"); fprintf(stderr, " ip netns identify [PID]\n"); fprintf(stderr, " ip netns pids NAME\n"); @@ -31,10 +34,56 @@ static int usage(void) exit(-1); } +static int get_netnsid_from_name(const char *name) +{ + struct { + struct nlmsghdr n; + struct rtgenmsg g; + char buf[1024]; + } req, answer; + struct rtattr *tb[NETNSA_MAX + 1]; + struct rtgenmsg *rthdr; + int len, fd; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_GETNSID; + req.g.rtgen_family = AF_UNSPEC; + + fd = netns_get_fd(name); + if (fd < 0) + return fd; + + addattr32(&req.n, 1024, NETNSA_FD, fd); + if (rtnl_talk(&rth, &req.n, 0, 0, &answer.n) < 0) { + close(fd); + return -2; + } + close(fd); + + /* Validate message and parse attributes */ + if (answer.n.nlmsg_type == NLMSG_ERROR) + return -1; + + rthdr = NLMSG_DATA(&answer.n); + len = answer.n.nlmsg_len - NLMSG_SPACE(sizeof(*rthdr)); + if (len < 0) + return -1; + + parse_rtattr(tb, NETNSA_MAX, NETNS_RTA(rthdr), len); + + if (tb[NETNSA_NSID]) + return rta_getattr_u32(tb[NETNSA_NSID]); + + return -1; +} + static int netns_list(int argc, char **argv) { struct dirent *entry; DIR *dir; + int id; dir = opendir(NETNS_RUN_DIR); if (!dir) @@ -45,7 +94,11 @@ static int netns_list(int argc, char **argv) continue; if (strcmp(entry->d_name, "..") == 0) continue; - printf("%s\n", entry->d_name); + printf("%s", entry->d_name); + id = get_netnsid_from_name(entry->d_name); + if (id >= 0) + printf(" (id: %d)", id); + printf("\n"); } closedir(dir); return 0; @@ -375,6 +428,61 @@ out_delete: return -1; } +static int set_netnsid_from_name(const char *name, int nsid) +{ + struct { + struct nlmsghdr n; + struct rtgenmsg g; + char buf[1024]; + } req; + int fd, err = 0; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_NEWNSID; + req.g.rtgen_family = AF_UNSPEC; + + fd = netns_get_fd(name); + if (fd < 0) + return fd; + + addattr32(&req.n, 1024, NETNSA_FD, fd); + addattr32(&req.n, 1024, NETNSA_NSID, nsid); + if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) + err = -2; + + close(fd); + return err; +} + +static int netns_set(int argc, char **argv) +{ + char netns_path[MAXPATHLEN]; + const char *name; + int netns, nsid; + + if (argc < 1) { + fprintf(stderr, "No netns name specified\n"); + return -1; + } + if (argc < 2) { + fprintf(stderr, "No nsid specified\n"); + return -1; + } + name = argv[0]; + nsid = atoi(argv[1]); + + snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name); + netns = open(netns_path, O_RDONLY | O_CLOEXEC); + if (netns < 0) { + fprintf(stderr, "Cannot open network namespace \"%s\": %s\n", + name, strerror(errno)); + return -1; + } + + return set_netnsid_from_name(name, nsid); +} static int netns_monitor(int argc, char **argv) { @@ -430,6 +538,9 @@ int do_netns(int argc, char **argv) if (matches(*argv, "add") == 0) return netns_add(argc-1, argv+1); + if (matches(*argv, "set") == 0) + return netns_set(argc-1, argv+1); + if (matches(*argv, "delete") == 0) return netns_delete(argc-1, argv+1); diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8 index 8e6999c0..28a95441 100644 --- a/man/man8/ip-netns.8 +++ b/man/man8/ip-netns.8 @@ -23,6 +23,10 @@ ip-netns \- process network namespace management .B ip [-all] netns del .RI "[ " NETNSNAME " ]" +.ti -8 +.BR "ip netns" " { " set " } " +.I NETNSNAME NETNSID + .ti -8 .BR "ip netns identify" .RI "[ " PID " ]" @@ -92,6 +96,16 @@ If .B -all option was specified then all the network namespace names will be removed. +.TP +.B ip netns set NAME NETNSID - assign an id to a peer network namespace +.sp +This command assigns a id to a peer network namespace. This id is valid +only in the current network namespace. +This id will be used by the kernel in some netlink messages. If no id is +assigned when the kernel needs it, it will be automatically assigned by +the kernel. +Once it is assigned, it's not possible to change it. + .TP .B ip netns identify [PID] - Report network namespaces names for process .sp From ccdcbf35f120c754660b3b3f48fa67cc950a6407 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 17 Feb 2015 17:30:38 +0100 Subject: [PATCH 014/141] iplink: add support of IFLA_LINK_NETNSID attribute This new attribute is now advertised by the kernel for x-netns interfaces. It's also possible to set it when an interface is created (and thus creating a x-netns interface with one single message). Example: $ ip netns add foo $ ip netns add bar $ ip -n foo netns set bar 15 $ ip -n foo link add ipip1 link-netnsid 15 type ipip remote 10.16.0.121 local 10.16.0.249 $ ip -n foo link ls ipip1 3: ipip1@NONE: mtu 1480 qdisc noop state DOWN mode DEFAULT group default link/ipip 10.16.0.249 peer 10.16.0.121 link-netnsid 15 Signed-off-by: Nicolas Dichtel --- ip/ipaddress.c | 20 +++++++++++++++++--- ip/iplink.c | 10 ++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 3730424a..99a6ab59 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "rt_names.h" #include "utils.h" @@ -614,9 +615,13 @@ int print_linkinfo(const struct sockaddr_nl *who, if (iflink == 0) fprintf(fp, "@NONE: "); else { - fprintf(fp, "@%s: ", ll_idx_n2a(iflink, b1)); - m_flag = ll_index_to_flags(iflink); - m_flag = !(m_flag & IFF_UP); + if (tb[IFLA_LINK_NETNSID]) + fprintf(fp, "@if%d: ", iflink); + else { + fprintf(fp, "@%s: ", ll_idx_n2a(iflink, b1)); + m_flag = ll_index_to_flags(iflink); + m_flag = !(m_flag & IFF_UP); + } } } else { fprintf(fp, ": "); @@ -678,6 +683,15 @@ int print_linkinfo(const struct sockaddr_nl *who, } } + if (tb[IFLA_LINK_NETNSID]) { + int id = *(int*)RTA_DATA(tb[IFLA_LINK_NETNSID]); + + if (id >= 0) + fprintf(fp, " link-netnsid %d", id); + else + fprintf(fp, " link-netnsid unknown"); + } + if (tb[IFLA_PROMISCUITY] && show_details) fprintf(fp, " promiscuity %u ", *(int*)RTA_DATA(tb[IFLA_PROMISCUITY])); diff --git a/ip/iplink.c b/ip/iplink.c index c93d1dc3..5893ee40 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -72,6 +72,7 @@ void iplink_usage(void) fprintf(stderr, " [ mtu MTU ]\n"); fprintf(stderr, " [ netns PID ]\n"); fprintf(stderr, " [ netns NAME ]\n"); + fprintf(stderr, " [ link-netnsid ID ]\n"); fprintf(stderr, " [ alias NAME ]\n"); fprintf(stderr, " [ vf NUM [ mac LLADDR ]\n"); fprintf(stderr, " [ vlan VLANID [ qos VLAN-QOS ] ]\n"); @@ -386,6 +387,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, int numtxqueues = -1; int numrxqueues = -1; int dev_index = 0; + int link_netnsid = -1; *group = -1; ret = argc; @@ -588,6 +590,14 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, addattr8(&req->n, sizeof(*req), IFLA_INET6_ADDR_GEN_MODE, mode); addattr_nest_end(&req->n, afs6); addattr_nest_end(&req->n, afs); + } else if (matches(*argv, "link-netnsid") == 0) { + NEXT_ARG(); + if (link_netnsid != -1) + duparg("link-netnsid", *argv); + if (get_integer(&link_netnsid, *argv, 0)) + invarg("Invalid \"link-netnsid\" value\n", *argv); + addattr32(&req->n, sizeof(*req), IFLA_LINK_NETNSID, + link_netnsid); } else { if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); From a4797670d3f38315716231cccf9de2a493feb35f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 17 Feb 2015 17:30:39 +0100 Subject: [PATCH 015/141] bridge/fdb: display link netns id When this attribute is set, it means that the i/o part of the related netdevice is in another netns. Signed-off-by: Nicolas Dichtel --- bridge/fdb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bridge/fdb.c b/bridge/fdb.c index c01a5020..6941edd9 100644 --- a/bridge/fdb.c +++ b/bridge/fdb.c @@ -131,12 +131,16 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (ifindex) { char ifname[IF_NAMESIZE]; - if (if_indextoname(ifindex, ifname)) + if (!tb[NDA_LINK_NETNSID] && + if_indextoname(ifindex, ifname)) fprintf(fp, "via %s ", ifname); else fprintf(fp, "via ifindex %u ", ifindex); } } + if (tb[NDA_LINK_NETNSID]) + fprintf(fp, "link-netnsid %d ", + rta_getattr_u32(tb[NDA_LINK_NETNSID])); if (show_stats && tb[NDA_CACHEINFO]) { struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]); From 6b8c871dc104576c9f55d87937d6dd445d77f34f Mon Sep 17 00:00:00 2001 From: Alex Pilon Date: Thu, 19 Feb 2015 14:27:46 -0500 Subject: [PATCH 016/141] Allow specifying bridge port STP state by name rather than number. The existing behaviour forces one to memorize the integer constants for STP port states. # bridge link set dev dummy0 state 3 This patch makes it possible to use the lowercased port state name. # bridge link set dev dummy0 state forwarding Invalid non-integer inputs now cause exit with status -1. Signed-off-by: Alex Pilon --- bridge/link.c | 14 +++++++++++++- man/man8/bridge.8 | 4 +++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/bridge/link.c b/bridge/link.c index c8555f82..a7bd85f9 100644 --- a/bridge/link.c +++ b/bridge/link.c @@ -316,7 +316,19 @@ static int brlink_modify(int argc, char **argv) priority = atoi(*argv); } else if (strcmp(*argv, "state") == 0) { NEXT_ARG(); - state = atoi(*argv); + char *endptr; + size_t nstates = sizeof(port_states) / sizeof(*port_states); + state = strtol(*argv, &endptr, 10); + if (!(**argv != '\0' && *endptr == '\0')) { + for (state = 0; state < nstates; state++) + if (strcmp(port_states[state], *argv) == 0) + break; + if (state == nstates) { + fprintf(stderr, + "Error: invalid STP port state\n"); + exit(-1); + } + } } else if (strcmp(*argv, "hwmode") == 0) { NEXT_ARG(); flags = BRIDGE_FLAGS_SELF; diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index e344db28..68ad71e5 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -207,7 +207,9 @@ droot port selectio algorithms. .TP .BI state " STATE " the operation state of the port. This is primarily used by user space STP/RSTP -implementation. The following is a list of valid values: +implementation. One may enter a lowercased port state name, or one of the +numbers below. Negative inputs are ignored, and unrecognized names return an +error. .B 0 - port is DISABLED. Make this port completely inactive. From 29999b0ff23d76e5d93d238a0461c59841afea73 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sun, 22 Feb 2015 22:23:10 +0200 Subject: [PATCH 017/141] ss: Add filter before printing unix stats from Netlink Detailed info can be printed if filter should not pass the socket info. Signed-off-by: Vadim Kochan --- misc/ss.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/misc/ss.c b/misc/ss.c index 2678033f..5320d387 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -2556,7 +2556,7 @@ static void unix_stats_print(struct sockstat *list, struct filter *f) } } - if (f->f) { + if (use_proc && f->f) { if (strcmp(peer, "*") == 0) memset(s->remote.data, 0, sizeof(char *)); else @@ -2628,6 +2628,9 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh, if (tb[UNIX_DIAG_PEER]) stat.rport = rta_getattr_u32(tb[UNIX_DIAG_PEER]); + if (f->f && run_ssfilter(f->f, &stat) == 0) + return 0; + unix_stats_print(&stat, f); if (show_mem) { From 5f24ec0e0658415c4b50578af9aa338a3902b4d8 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sun, 22 Feb 2015 22:23:11 +0200 Subject: [PATCH 018/141] ss: Skip filtered netlink sockets before detailed info Signed-off-by: Vadim Kochan --- misc/ss.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 5320d387..5113d85c 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -2938,7 +2938,7 @@ static int packet_show(struct filter *f) return 0; } -static void netlink_show_one(struct filter *f, +static int netlink_show_one(struct filter *f, int prot, int pid, unsigned groups, int state, int dst_pid, unsigned dst_group, int rq, int wq, @@ -2960,7 +2960,7 @@ static void netlink_show_one(struct filter *f, st.lport = pid; st.local.data[0] = prot; if (run_ssfilter(f->f, &st) == 0) - return; + return 1; } sock_state_print(&st, "nl"); @@ -3032,7 +3032,7 @@ static void netlink_show_one(struct filter *f, } printf("\n"); - return; + return 0; } static int netlink_show_sock(const struct sockaddr_nl *addr, @@ -3058,9 +3058,11 @@ static int netlink_show_sock(const struct sockaddr_nl *addr, wq = skmeminfo[SK_MEMINFO_WMEM_ALLOC]; } - netlink_show_one(f, r->ndiag_protocol, r->ndiag_portid, groups, + if (netlink_show_one(f, r->ndiag_protocol, r->ndiag_portid, groups, r->ndiag_state, r->ndiag_dst_portid, r->ndiag_dst_group, - rq, wq, 0, 0); + rq, wq, 0, 0)) { + return 0; + } if (show_mem) { printf("\t"); From 2dd5909d9d447ccfb5365007f32152303eea8139 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 24 Feb 2015 16:15:00 +0100 Subject: [PATCH 019/141] ip-monitor: allow to monitor ip rules Now done by default or with 'ip monitor rule'. Signed-off-by: Nicolas Dichtel --- ip/ipmonitor.c | 14 +++++++++++++- man/man8/ip-monitor.8 | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c index 5ec8f418..6b5e6653 100644 --- a/ip/ipmonitor.c +++ b/ip/ipmonitor.c @@ -32,7 +32,7 @@ static void usage(void) fprintf(stderr, "Usage: ip monitor [ all | LISTofOBJECTS ] [ FILE ]" "[ label ] [dev DEVICE]\n"); fprintf(stderr, "LISTofOBJECTS := link | address | route | mroute | prefix |\n"); - fprintf(stderr, " neigh | netconf\n"); + fprintf(stderr, " neigh | netconf | rule\n"); fprintf(stderr, "FILE := file FILENAME\n"); exit(-1); } @@ -150,6 +150,7 @@ int do_ipmonitor(int argc, char **argv) int lprefix=0; int lneigh=0; int lnetconf=0; + int lrule=0; int ifindex=0; groups |= nl_mgrp(RTNLGRP_LINK); @@ -163,6 +164,8 @@ int do_ipmonitor(int argc, char **argv) groups |= nl_mgrp(RTNLGRP_NEIGH); groups |= nl_mgrp(RTNLGRP_IPV4_NETCONF); groups |= nl_mgrp(RTNLGRP_IPV6_NETCONF); + groups |= nl_mgrp(RTNLGRP_IPV4_RULE); + groups |= nl_mgrp(RTNLGRP_IPV6_RULE); rtnl_close(&rth); @@ -193,6 +196,9 @@ int do_ipmonitor(int argc, char **argv) } else if (matches(*argv, "netconf") == 0) { lnetconf = 1; groups = 0; + } else if (matches(*argv, "rule") == 0) { + lrule = 1; + groups = 0; } else if (strcmp(*argv, "all") == 0) { prefix_banner=1; } else if (matches(*argv, "help") == 0) { @@ -249,6 +255,12 @@ int do_ipmonitor(int argc, char **argv) if (!preferred_family || preferred_family == AF_INET6) groups |= nl_mgrp(RTNLGRP_IPV6_NETCONF); } + if (lrule) { + if (!preferred_family || preferred_family == AF_INET) + groups |= nl_mgrp(RTNLGRP_IPV4_RULE); + if (!preferred_family || preferred_family == AF_INET6) + groups |= nl_mgrp(RTNLGRP_IPV6_RULE); + } if (file) { FILE *fp; fp = fopen(file, "r"); diff --git a/man/man8/ip-monitor.8 b/man/man8/ip-monitor.8 index 544b6256..5ce8dc73 100644 --- a/man/man8/ip-monitor.8 +++ b/man/man8/ip-monitor.8 @@ -49,7 +49,7 @@ command is the first in the command line and then the object list follows: is the list of object types that we want to monitor. It may contain .BR link ", " address ", " route ", " mroute ", " prefix ", " -.BR neigh " and " netconf "." +.BR neigh ", " netconf " and " rule "." If no .B file argument is given, From c9ae9bae6e097b1ff7e7273baf112b79b91f7e27 Mon Sep 17 00:00:00 2001 From: Lennart Sorensen Date: Tue, 24 Feb 2015 15:29:15 -0500 Subject: [PATCH 020/141] Fix misspelling of defrag in ip-l2tp.8 --- man/man8/ip-l2tp.8 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/man/man8/ip-l2tp.8 b/man/man8/ip-l2tp.8 index 2efda9f0..1738035f 100644 --- a/man/man8/ip-l2tp.8 +++ b/man/man8/ip-l2tp.8 @@ -356,16 +356,16 @@ the recipient expects to receive ethernet frames exactly as transmitted. In such cases, it is important that frames leaving the tunnel are reassembled back into a single frame before being forwarded on. To do so, enable netfilter connection tracking -(conntrack) or manually load the Linux netfilter degrag modules at +(conntrack) or manually load the Linux netfilter defrag modules at each tunnel endpoint. .PP .nf -site-A:# modprobe nf_degrag_ipv4 +site-A:# modprobe nf_defrag_ipv4 -site-B:# modprobe nf_degrag_ipv4 +site-B:# modprobe nf_defrag_ipv4 .fi .PP -If L2TP is being used over IPv6, use the IPv6 degrag module. +If L2TP is being used over IPv6, use the IPv6 defrag module. .SH INTEROPERABILITY .PP Unmanaged (static) L2TPv3 tunnels are supported by some network From b5024ee1eddae64875dc761f509d0d31362788e3 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Tue, 24 Feb 2015 22:24:51 +0100 Subject: [PATCH 021/141] ss: group DCTCP socket statistics Keep ss output consistent and format DCTCP socket statistics similar to skmen and timer where a group of logical values are grouped by brackets. This makes parser scripts *and* humans more happy. Current output of 'ss -inetm dst :80': ESTAB 0 0 192.168.11.14:55511 173.194.66.189:443 timer:(keepalive,14sec,0) uid:1000 ino:428768 sk:ffff88020ceb5b00 <-> skmem:(r0,rb372480,t0,tb87040,f0,w0,o0,bl0) ts sack wscale:7,7 rto:250 rtt:49.225/20.837 ato:40 mss:1408 cwnd:10 ce_state 23 alpha 23 ab_ecn 23 ab_tot 23 send 2.3Mbps lastsnd:121026 lastrcv:121026 lastack:30850 pacing_rate 4.6Mbps retrans:0/2 rcv_rtt:40.416 rcv_space:2920 New grouped output: ESTAB 0 0 192.168.11.14:55511 173.194.66.189:443 timer:(keepalive,14sec,0) uid:1000 ino:428768 sk:ffff88020ceb5b00 <-> skmem:(r0,rb372480,t0,tb87040,f0,w0,o0,bl0) ts sack wscale:7,7 rto:250 rtt:49.225/20.837 ato:40 mss:1408 cwnd:10 dctcp(ce_state:23,alpha:23,ab_ecn:23,ab_tot:23) send 2.3Mbps lastsnd:121026 lastrcv:121026 lastack:30850 pacing_rate 4.6Mbps retrans:0/2 rcv_rtt:40.416 rcv_space:2920 Cc: Stephen Hemminger Acked-by: Daniel Borkmann Acked-by: Florian Westphal Signed-off-by: Hagen Paul Pfeifer --- misc/ss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 5113d85c..21a4366a 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -1680,11 +1680,11 @@ static void tcp_stats_print(struct tcpstat *s) if (s->dctcp && s->dctcp->enabled) { struct dctcpstat *dctcp = s->dctcp; - printf(" ce_state %u alpha %u ab_ecn %u ab_tot %u", + printf("dctcp:(ce_state:%u,alpha:%u,ab_ecn:%u,ab_tot:%u)", dctcp->ce_state, dctcp->alpha, dctcp->ab_ecn, dctcp->ab_tot); } else if (s->dctcp) { - printf(" fallback_mode"); + printf("dctcp:fallback_mode"); } if (s->send_bps) From 34c8a95cd7c75d79319f699c965e9422237a7d29 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Thu, 26 Feb 2015 03:12:08 +0200 Subject: [PATCH 022/141] man ip-link: Add short info about 'dynamic' flag Signed-off-by: Vadim Kochan --- man/man8/ip-link.8.in | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 057125b5..398348af 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -625,7 +625,9 @@ flag on the device. .BR "dynamic on " or " dynamic off" change the .B DYNAMIC -flag on the device. +flag on the device. Indicates that address can change when interface goes down (currently +.B NOT +used by the Linux). .TP .BI name " NAME" From 22a98f5140373198ea2a5ca721fea937c6f7b509 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 26 Feb 2015 00:12:59 -0800 Subject: [PATCH 023/141] bridge link: add support to specify master This patch adds support to specify 'master' keyword, to target a bridge link command explicitly to the software bridge driver. Adds self/master keywords to usage and man page v2: fix usage to say (self and master) and not (self or master) Signed-off-by: Roopa Prabhu --- bridge/link.c | 5 ++++- man/man8/bridge.8 | 12 +++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/bridge/link.c b/bridge/link.c index a7bd85f9..aa40692d 100644 --- a/bridge/link.c +++ b/bridge/link.c @@ -227,6 +227,7 @@ static void usage(void) fprintf(stderr, " [ learning_sync {on | off} ]\n"); fprintf(stderr, " [ flood {on | off} ]\n"); fprintf(stderr, " [ hwmode {vepa | veb} ]\n"); + fprintf(stderr, " [ self ] [ master ]\n"); fprintf(stderr, " bridge link show [dev DEV]\n"); exit(-1); } @@ -343,7 +344,9 @@ static int brlink_modify(int argc, char **argv) exit(-1); } } else if (strcmp(*argv, "self") == 0) { - flags = BRIDGE_FLAGS_SELF; + flags |= BRIDGE_FLAGS_SELF; + } else if (strcmp(*argv, "master") == 0) { + flags |= BRIDGE_FLAGS_MASTER; } else { usage(); } diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 68ad71e5..2005b9f0 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -40,7 +40,8 @@ bridge \- show / manipulate bridge addresses and devices .BR learning " { " on " | " off " } ] [ " .BR learning_sync " { " on " | " off " } ] [ " .BR flood " { " on " | " off " } ] [ " -.BR hwmode " { " vepa " | " veb " } ] " +.BR hwmode " { " vepa " | " veb " } ] [ " +.BR self " ] [ " master " ] " .ti -8 .BR "bridge link" " [ " show " ] [ " @@ -286,6 +287,15 @@ switch. .B veb - bridging happens in hardware. +.TP +.BI self +link setting is configured on specified physical device + +.TP +.BI master +link setting is configured on the software bridge (default) + + .SS bridge link show - list bridge port configuration. This command displays the current bridge port configuration and flags. From 409998c5a4eb113e1997712b674e946366a872c6 Mon Sep 17 00:00:00 2001 From: Ebben Aries Date: Fri, 27 Feb 2015 09:27:39 -0700 Subject: [PATCH 024/141] iproute: ip-gue/ip-fou manpages Add missing GUE/FOU manpages to Makefile Signed-off-by: Ebben Aries --- man/man8/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/Makefile b/man/man8/Makefile index e9989288..152747a3 100644 --- a/man/man8/Makefile +++ b/man/man8/Makefile @@ -6,7 +6,7 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 ss.8 \ tc-mqprio.8 tc-netem.8 tc-pfifo.8 tc-pfifo_fast.8 tc-prio.8 tc-red.8 \ tc-sfb.8 tc-sfq.8 tc-stab.8 tc-tbf.8 \ bridge.8 rtstat.8 ctstat.8 nstat.8 routef.8 \ - ip-addrlabel.8 ip-l2tp.8 \ + ip-addrlabel.8 ip-fou.8 ip-gue.8 ip-l2tp.8 \ ip-maddress.8 ip-monitor.8 ip-mroute.8 ip-neighbour.8 \ ip-netns.8 ip-ntable.8 ip-rule.8 ip-tunnel.8 ip-xfrm.8 \ ip-tcp_metrics.8 ip-netconf.8 ip-token.8 From 32caee9fc7cc92091596994ba7482ffa6bf936fc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 27 Feb 2015 17:52:36 +0100 Subject: [PATCH 025/141] m_bpf: remove unrelevant help lines Left-overs when copying this over from cls_bpf. ;) Lets remove them. Signed-off-by: Daniel Borkmann Cc: Jiri Pirko --- tc/m_bpf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tc/m_bpf.c b/tc/m_bpf.c index 611135ea..3b864f9d 100644 --- a/tc/m_bpf.c +++ b/tc/m_bpf.c @@ -31,8 +31,6 @@ static void explain(void) fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n"); fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n"); - fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n"); - fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n"); } static void usage(void) From 36324eba3758ad13318d9065aea5eb9b3303d55c Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Wed, 25 Feb 2015 15:46:22 +0200 Subject: [PATCH 026/141] man ip-link: Add notice about local netns devices Added some clarification why 'ip link set netns' can not change network namespace for some kind of devices. Signed-off-by: Vadim Kochan --- man/man8/ip-link.8.in | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 398348af..be4b481f 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -662,14 +662,29 @@ the interface is .IR "POINTOPOINT" . .TP -.BI netns " PID" -move the device to the network namespace associated with the process -.IR "PID". - -.TP -.BI netns " NETNSNAME" +.BI netns " NETNSNAME " \fR| " PID" move the device to the network namespace associated with name -.IR "NETNSNAME". +.IR "NETNSNAME " or +.RI process " PID". + +Some devices are not allowed to change network namespace: loopback, bridge, +ppp, wireless. These are network namespace local devices. In such case +.B ip +tool will return "Invalid argument" error. It is possible to find out if device is local +to a single network namespace by checking +.B netns-local +flag in the output of the +.BR ethtool ":" + +.in +8 +.B ethtool -k +.I DEVICE +.in -8 + +To change network namespace for wireless devices the +.B iw +tool can be used. But it allows to change network namespace only for physical devices and by process +.IR PID . .TP .BI alias " NAME" From 032b4f4d19e1d0880f800a0becfd05025206bb7d Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sat, 7 Mar 2015 15:43:37 +0200 Subject: [PATCH 027/141] man ip-link: Add short description about 'group' Signed-off-by: Vadim Kochan --- man/man8/ip-link.8.in | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index be4b481f..9bbf2d30 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -841,6 +841,12 @@ print human readable rates in IEC units (ie. 1K = 1024). .I "TYPE" specifies which help of link type to dislpay. +.SS +.I GROUP +may be a number or a string from the file +.B /etc/iproute2/group +which can be manually filled. + .SH "EXAMPLES" .PP ip link show From 8ce21c6b93c81a5646533212edfa45f3cbe17ebf Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Mon, 9 Mar 2015 01:07:17 +0200 Subject: [PATCH 028/141] man tc: Highlight minor & major, notice they are hex Also added some trivial form of the ID as "major:minor", just for visualisation of explained words. Signed-off-by: Vadim Kochan --- man/man8/tc.8 | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/man/man8/tc.8 b/man/man8/tc.8 index a6aed0ab..ea4d445f 100644 --- a/man/man8/tc.8 +++ b/man/man8/tc.8 @@ -250,23 +250,33 @@ attached to that class. Check qdisc specific manpages for details, however. All qdiscs, classes and filters have IDs, which can either be specified or be automatically assigned. -IDs consist of a major number and a minor number, separated by a colon. -Both major and minor number are limited to 16 bits. There are two special -values: root is signified by major and minor of all ones, and unspecified -is all zeros. +IDs consist of a +.BR major " number and a " minor +number, separated by a colon - +.BR major ":" minor "." +Both +.BR major " and " minor +are hexadecimal numbers and are limited to 16 bits. There are two special +values: root is signified by +.BR major " and " minor +of all ones, and unspecified is all zeros. .TP QDISCS -A qdisc, which potentially can have children, -gets assigned a major number, called a 'handle', leaving the minor +A qdisc, which potentially can have children, gets assigned a +.B major +number, called a 'handle', leaving the +.B minor number namespace available for classes. The handle is expressed as '10:'. -It is customary to explicitly assign a handle to qdiscs expected to have -children. +It is customary to explicitly assign a handle to qdiscs expected to have children. .TP CLASSES -Classes residing under a qdisc share their qdisc major number, but each have -a separate minor number called a 'classid' that has no relation to their +Classes residing under a qdisc share their qdisc +.B major +number, but each have a separate +.B minor +number called a 'classid' that has no relation to their parent classes, only to their parent qdisc. The same naming custom as for qdiscs applies. From ee9b34778c8bf7743b47c15206ae03af1b94d085 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sun, 15 Mar 2015 17:49:16 +0200 Subject: [PATCH 029/141] man ip-netns: Notice about loose device when do 'del' Signed-off-by: Vadim Kochan --- man/man8/ip-netns.8 | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8 index 28a95441..d4966758 100644 --- a/man/man8/ip-netns.8 +++ b/man/man8/ip-netns.8 @@ -46,6 +46,9 @@ ip-netns \- process network namespace management A network namespace is logically another copy of the network stack, with its own routes, firewall rules, and network devices. +By default process inherits network namespace from its parent. Initially all +the processes share the same default network namespace from the init process. + By convention a named network namespace is an object at .BR "/var/run/netns/" NAME that can be opened. The file descriptor resulting from opening @@ -88,14 +91,30 @@ network namespace and assigns NAME. .sp If NAME is present in /var/run/netns it is umounted and the mount point is removed. If this is the last user of the network namespace the -network namespace will be freed, otherwise the network namespace -persists until it has no more users. ip netns delete may fail if -the mount point is in use in another mount namespace. +network namespace will be freed and all physical devices will be moved to the +default one, otherwise the network namespace persists until it has no more +users. ip netns delete may fail if the mount point is in use in another mount +namespace. If .B -all option was specified then all the network namespace names will be removed. +It is possible to lose the physical device when it was moved to netns and +then this netns was deleted with a running process: + + $ ip netns add net0 + $ ip link set dev eth0 netns net0 + $ ip netns exec net0 SOME_PROCESS_IN_BACKGROUND + $ ip netns del net0 + +and eth0 will appear in the default netns only after SOME_PROCESS_IN_BACKGROUND +will exit or will be killed. To prevent this the processes running in net0 +should be killed before deleting the netns: + + $ ip netns pids net0 | xargs kill + $ ip netns del net0 + .TP .B ip netns set NAME NETNSID - assign an id to a peer network namespace .sp From 7871f7dbf0912500e364efdb5abfaaad531591d0 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Fri, 27 Feb 2015 23:54:36 +0200 Subject: [PATCH 030/141] ss: Allow to specify sport/dport without ':' Ugly change but it allows to specify sport/dport w/o ':' # ss dport = 80 and sport = 44862 Signed-off-by: Vadim Kochan --- misc/ss.c | 14 +++++++++----- misc/ssfilter.h | 4 +++- misc/ssfilter.y | 23 +++++++++++++++++------ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 21a4366a..196b020c 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -1380,7 +1380,7 @@ static int xll_name_to_index(const char *dev) return ll_name_to_index(dev); } -void *parse_hostcond(char *addr) +void *parse_hostcond(char *addr, bool is_port) { char *port = NULL; struct aafilter a = { .port = -1 }; @@ -1473,10 +1473,14 @@ void *parse_hostcond(char *addr) } else { port = strrchr(strchr(addr, '/') ? : addr, ':'); } + + if (is_port) + port = addr; + if (port && *port) { - if (*port != ':') - return NULL; - *port++ = 0; + if (*port == ':') + *port++ = 0; + if (*port && *port != '*') { if (get_integer(&a.port, port, 0)) { struct servent *se1 = NULL; @@ -1517,7 +1521,7 @@ void *parse_hostcond(char *addr) } } } - if (addr && *addr && *addr != '*') { + if (!is_port && addr && *addr && *addr != '*') { if (get_prefix_1(&a.addr, addr, fam)) { if (get_dns_host(&a, addr, fam)) { fprintf(stderr, "Error: an inet prefix is expected rather than \"%s\".\n", addr); diff --git a/misc/ssfilter.h b/misc/ssfilter.h index 00b92e3d..b20092bc 100644 --- a/misc/ssfilter.h +++ b/misc/ssfilter.h @@ -9,6 +9,8 @@ #define SSF_S_LE 8 #define SSF_S_AUTO 9 +#include + struct ssfilter { int type; @@ -17,5 +19,5 @@ struct ssfilter }; int ssfilter_parse(struct ssfilter **f, int argc, char **argv, FILE *fp); -void *parse_hostcond(char*); +void *parse_hostcond(char *addr, bool is_port); diff --git a/misc/ssfilter.y b/misc/ssfilter.y index 2e9d9626..a258d04b 100644 --- a/misc/ssfilter.y +++ b/misc/ssfilter.y @@ -25,6 +25,7 @@ static char **yy_argv; static int yy_argc; static FILE *yy_fp; static ssfilter_t *yy_ret; +static int tok_type = -1; static int yylex(void); @@ -220,14 +221,22 @@ int yylex(void) return '('; if (strcmp(curtok, ")") == 0) return ')'; - if (strcmp(curtok, "dst") == 0) + if (strcmp(curtok, "dst") == 0) { + tok_type = DCOND; return DCOND; - if (strcmp(curtok, "src") == 0) + } + if (strcmp(curtok, "src") == 0) { + tok_type = SCOND; return SCOND; - if (strcmp(curtok, "dport") == 0) + } + if (strcmp(curtok, "dport") == 0) { + tok_type = DPORT; return DPORT; - if (strcmp(curtok, "sport") == 0) + } + if (strcmp(curtok, "sport") == 0) { + tok_type = SPORT; return SPORT; + } if (strcmp(curtok, ">=") == 0 || strcmp(curtok, "ge") == 0 || strcmp(curtok, "geq") == 0) @@ -250,9 +259,11 @@ int yylex(void) if (strcmp(curtok, "<") == 0 || strcmp(curtok, "lt") == 0) return '<'; - if (strcmp(curtok, "autobound") == 0) + if (strcmp(curtok, "autobound") == 0) { + tok_type = AUTOBOUND; return AUTOBOUND; - yylval = (void*)parse_hostcond(curtok); + } + yylval = (void*)parse_hostcond(curtok, tok_type == SPORT || tok_type == DPORT); if (yylval == NULL) { fprintf(stderr, "Cannot parse dst/src address.\n"); exit(1); From 2e7e805d0aac4dd7a3d66951e28c0e3f457781ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Mar 2015 09:56:47 -0700 Subject: [PATCH 031/141] ss: better 32bit support Socket cookies are 64bit, even if ss happens to be a 32bit binary, running on a 64 bit host. Signed-off-by: Eric Dumazet --- misc/ss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 196b020c..954a30bd 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -679,9 +679,9 @@ static inline char *sock_addr_get_str(const inet_prefix *prefix) return tmp; } -static unsigned long cookie_sk_get(uint32_t *cookie) +static unsigned long long cookie_sk_get(const uint32_t *cookie) { - return (((unsigned long)cookie[1] << 31) << 1) | cookie[0]; + return (((unsigned long long)cookie[1] << 31) << 1) | cookie[0]; } static const char *sstate_name[] = { From f3a2ddc124e09d3990b836e322ed39c76c35546e Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sat, 28 Feb 2015 02:50:24 +0200 Subject: [PATCH 032/141] lib utils: Use helpers to get AF bit/byte len Added funcs to get AF_XXX len in bit/bytes and replace places where switch(AF_XXX) is used for this. Signed-off-by: Vadim Kochan --- include/utils.h | 3 +++ ip/iproute.c | 22 ++++--------------- ip/iprule.c | 9 +------- lib/utils.c | 58 +++++++++++++++++++++---------------------------- 4 files changed, 33 insertions(+), 59 deletions(-) diff --git a/include/utils.h b/include/utils.h index fec9ef4f..9151c4f1 100644 --- a/include/utils.h +++ b/include/utils.h @@ -101,6 +101,9 @@ extern int get_s8(__s8 *val, const char *arg, int base); extern char* hexstring_n2a(const __u8 *str, int len, char *buf, int blen); extern __u8* hexstring_a2n(const char *str, __u8 *buf, int blen); +extern int af_bit_len(int af); +extern int af_byte_len(int af); + extern const char *format_host(int af, int len, const void *addr, char *buf, int buflen); extern const char *rt_addr_n2a(int af, const void *addr, diff --git a/ip/iproute.c b/ip/iproute.c index 76d8e36c..b32025ff 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -268,20 +268,6 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) return 1; } -static int calc_host_len(const struct rtmsg *r) -{ - if (r->rtm_family == AF_INET6) - return 128; - else if (r->rtm_family == AF_INET) - return 32; - else if (r->rtm_family == AF_DECnet) - return 16; - else if (r->rtm_family == AF_IPX) - return 80; - else - return -1; -} - static void print_rtax_features(FILE *fp, unsigned int features) { unsigned int of = features; @@ -302,7 +288,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) int len = n->nlmsg_len; struct rtattr * tb[RTA_MAX+1]; char abuf[256]; - int host_len = -1; + int host_len; __u32 table; SPRINT_BUF(b1); static int hz; @@ -320,7 +306,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) return -1; } - host_len = calc_host_len(r); + host_len = af_bit_len(r->rtm_family); parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len); table = rtm_get_table(r, tb); @@ -1134,9 +1120,9 @@ static int save_route(const struct sockaddr_nl *who, struct nlmsghdr *n, int len = n->nlmsg_len; struct rtmsg *r = NLMSG_DATA(n); struct rtattr *tb[RTA_MAX+1]; - int host_len = -1; + int host_len; - host_len = calc_host_len(r); + host_len = af_bit_len(r->rtm_family); len -= NLMSG_LENGTH(sizeof(*r)); parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len); diff --git a/ip/iprule.c b/ip/iprule.c index 366878e9..54ed7536 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -66,14 +66,7 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len); - if (r->rtm_family == AF_INET) - host_len = 32; - else if (r->rtm_family == AF_INET6) - host_len = 128; - else if (r->rtm_family == AF_DECnet) - host_len = 16; - else if (r->rtm_family == AF_IPX) - host_len = 80; + host_len = af_bit_len(r->rtm_family); if (n->nlmsg_type == RTM_DELRULE) fprintf(fp, "Deleted "); diff --git a/lib/utils.c b/lib/utils.c index e2b05bc0..9cda2681 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -431,6 +431,27 @@ int get_addr_1(inet_prefix *addr, const char *name, int family) return 0; } +int af_bit_len(int af) +{ + switch (af) { + case AF_INET6: + return 128; + case AF_INET: + return 32; + case AF_DECnet: + return 16; + case AF_IPX: + return 80; + } + + return 0; +} + +int af_byte_len(int af) +{ + return af_bit_len(af) / 8; +} + int get_prefix_1(inet_prefix *dst, char *arg, int family) { int err; @@ -456,17 +477,8 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) err = get_addr_1(dst, arg, family); if (err == 0) { - switch(dst->family) { - case AF_INET6: - dst->bitlen = 128; - break; - case AF_DECnet: - dst->bitlen = 16; - break; - default: - case AF_INET: - dst->bitlen = 32; - } + dst->bitlen = af_bit_len(family); + if (slash) { if (get_netmask(&plen, slash+1, 0) || plen > dst->bitlen) { @@ -697,7 +709,6 @@ static const char *resolve_address(const void *addr, int len, int af) } #endif - const char *format_host(int af, int len, const void *addr, char *buf, int buflen) { @@ -705,27 +716,8 @@ const char *format_host(int af, int len, const void *addr, if (resolve_hosts) { const char *n; - if (len <= 0) { - switch (af) { - case AF_INET: - len = 4; - break; - case AF_INET6: - len = 16; - break; - case AF_IPX: - len = 10; - break; -#ifdef AF_DECnet - /* I see no reasons why gethostbyname - may not work for DECnet */ - case AF_DECnet: - len = 2; - break; -#endif - default: ; - } - } + len = len <= 0 ? af_byte_len(af) : len; + if (len > 0 && (n = resolve_address(addr, len, af)) != NULL) return n; From d116ff34145b00db54a37e2a6282dccd8bc08225 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sat, 7 Mar 2015 08:30:58 +0200 Subject: [PATCH 033/141] ip netns: Fix rtnl error while print netns list Observed on the Linux 3.18: # ip netns RTNETLINK answers: Operation not supported net0 CC: Nicolas Dichtel Fixes: d182ee1307c7 ("ipnetns: allow to get and set netns ids") Signed-off-by: Vadim Kochan --- configure | 21 ++++++++++++++++++++- ip/Makefile | 4 ++++ ip/ipnetns.c | 7 +++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/configure b/configure index c3dacdba..631938e9 100755 --- a/configure +++ b/configure @@ -201,7 +201,7 @@ check_setns() { cat >$TMPDIR/setnstest.c < -int main(int argc, char **argv) +int main(int argc, char **argv) { (void)setns(0,0); return 0; @@ -218,6 +218,23 @@ EOF rm -f $TMPDIR/setnstest.c $TMPDIR/setnstest } +check_netnsid() +{ + cat >$TMPDIR/netnsid.c < +int test_def = RTM_GETNSID; +EOF + $CC -c $TMPDIR/netnsid.c >/dev/null 2>&1 + if [ $? -eq 0 ] + then + echo "IP_CONFIG_NETNSID:=y" >> Config + echo "yes" + else + echo "no" + fi + rm -f $TMPDIR/netnsid.c $TMPDIR/netnsid.o +} + check_ipset() { cat >$TMPDIR/ipsettest.c < Date: Tue, 3 Mar 2015 18:41:18 +0200 Subject: [PATCH 034/141] tc class: Show class names from file It is possible to use class names from file /etc/iproute2/cls_names which tc will use when showing class info: # tc/tc -nm class show dev lo class htb 1:10 parent 1:1 leaf 10: prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b class htb 1:1 root rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b class htb web#1:20 parent 1:1 leaf 20: prio 0 rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b class htb 1:2 root rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b class htb 1:30 parent 1:1 leaf 30: prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b class htb voip#1:40 parent 1:2 leaf 40: prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b class htb 1:50 parent 1:2 leaf 50: prio 0 rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b class htb 1:60 parent 1:2 leaf 60: prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b or to specify via file path: # tc/tc -nm -cf /tmp/cls_names class show dev lo Class names file contains simple "maj:min name" structure: 1:20 web 1:40 voip Signed-off-by: Vadim Kochan --- include/names.h | 25 ++++++++ lib/Makefile | 3 +- lib/names.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++ tc/tc.c | 25 +++++++- tc/tc_common.h | 1 + tc/tc_util.c | 48 +++++++++++++-- tc/tc_util.h | 3 + 7 files changed, 252 insertions(+), 9 deletions(-) create mode 100644 include/names.h create mode 100644 lib/names.c diff --git a/include/names.h b/include/names.h new file mode 100644 index 00000000..4123d0b0 --- /dev/null +++ b/include/names.h @@ -0,0 +1,25 @@ +#ifndef DB_NAMES_H_ +#define DB_NAMES_H_ 1 + +#define IDNAME_MAX 256 + +struct db_entry { + struct db_entry *next; + unsigned int id; + char *name; +}; + +struct db_names { + unsigned int size; + struct db_entry *cached; + struct db_entry **hash; + int max; +}; + +struct db_names *db_names_alloc(const char *path); +void db_names_free(struct db_names *db); + +char *id_to_name(struct db_names *db, int id, char *name); +int name_to_id(struct db_names *db, int *id, const char *name); + +#endif diff --git a/lib/Makefile b/lib/Makefile index 66f89f1d..4c7cbc25 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -6,7 +6,8 @@ endif CFLAGS += -fPIC -UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o namespace.o +UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o namespace.o \ + names.o NLOBJ=libgenl.o ll_map.o libnetlink.o diff --git a/lib/names.c b/lib/names.c new file mode 100644 index 00000000..93933f74 --- /dev/null +++ b/lib/names.c @@ -0,0 +1,156 @@ +/* + * names.c db names + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include + +#include "names.h" + +#define MAX_ENTRIES 256 +#define NAME_MAX_LEN 512 + +static int read_id_name(FILE *fp, int *id, char *name) +{ + char buf[NAME_MAX_LEN]; + int min, maj; + + while (fgets(buf, sizeof(buf), fp)) { + char *p = buf; + + while (*p == ' ' || *p == '\t') + p++; + + if (*p == '#' || *p == '\n' || *p == 0) + continue; + + if (sscanf(p, "%x:%x %s\n", &maj, &min, name) == 3) { + *id = (maj << 16) | min; + } else if (sscanf(p, "%x:%x %s #", &maj, &min, name) == 3) { + *id = (maj << 16) | min; + } else if (sscanf(p, "0x%x %s\n", id, name) != 2 && + sscanf(p, "0x%x %s #", id, name) != 2 && + sscanf(p, "%d %s\n", id, name) != 2 && + sscanf(p, "%d %s #", id, name) != 2) { + strcpy(name, p); + return -1; + } + return 1; + } + + return 0; +} + +struct db_names *db_names_alloc(const char *path) +{ + struct db_names *db; + struct db_entry *entry; + FILE *fp; + int id; + char namebuf[NAME_MAX_LEN] = {0}; + int ret; + + fp = fopen(path, "r"); + if (!fp) { + fprintf(stderr, "Can't open file: %s\n", path); + return NULL; + } + + db = malloc(sizeof(*db)); + memset(db, 0, sizeof(*db)); + + db->size = MAX_ENTRIES; + db->hash = malloc(sizeof(struct db_entry *) * db->size); + memset(db->hash, 0, sizeof(struct db_entry *) * db->size); + + while ((ret = read_id_name(fp, &id, &namebuf[0]))) { + if (ret == -1) { + fprintf(stderr, "Database %s is corrupted at %s\n", + path, namebuf); + fclose(fp); + return NULL; + } + + if (id < 0) + continue; + + entry = malloc(sizeof(*entry)); + entry->id = id; + entry->name = strdup(namebuf); + entry->next = db->hash[id & (db->size - 1)]; + db->hash[id & (db->size - 1)] = entry; + } + + fclose(fp); + return db; +} + +void db_names_free(struct db_names *db) +{ + int i; + + if (!db) + return; + + for (i = 0; i < db->size; i++) { + struct db_entry *entry = db->hash[i]; + + while (entry) { + struct db_entry *next = entry->next; + + free(entry->name); + free(entry); + entry = next; + } + } + + free(db->hash); + free(db); +} + +char *id_to_name(struct db_names *db, int id, char *name) +{ + struct db_entry *entry = db->hash[id & (db->size - 1)]; + + while (entry && entry->id != id) + entry = entry->next; + + if (entry) { + strncpy(name, entry->name, IDNAME_MAX); + return name; + } + + snprintf(name, IDNAME_MAX, "%d", id); + return NULL; +} + +int name_to_id(struct db_names *db, int *id, const char *name) +{ + struct db_entry *entry; + int i; + + if (db->cached && strcmp(db->cached->name, name) == 0) { + *id = db->cached->id; + return 0; + } + + for (i = 0; i < db->size; i++) { + entry = db->hash[i]; + while (entry && strcmp(entry->name, name)) + entry = entry->next; + if (entry) { + db->cached = entry; + *id = entry->id; + return 0; + } + } + + return -1; +} diff --git a/tc/tc.c b/tc/tc.c index 93803058..22c3be41 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -41,6 +41,10 @@ int batch_mode = 0; int resolve_hosts = 0; int use_iec = 0; int force = 0; +bool use_names = false; + +static char *conf_file; + struct rtnl_handle rth; static void *BODY = NULL; /* cached handle dlopen(NULL) */ @@ -188,7 +192,8 @@ static void usage(void) " tc [-force] -batch filename\n" "where OBJECT := { qdisc | class | filter | action | monitor }\n" " OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] | -b[atch] [filename] | " - "-n[etns] name }\n"); + "-n[etns] name |\n" + " -nm | -nam[es] | { -cf | -conf } path }\n"); } static int do_cmd(int argc, char **argv) @@ -293,7 +298,7 @@ int main(int argc, char **argv) return 0; } else if (matches(argv[1], "-force") == 0) { ++force; - } else if (matches(argv[1], "-batch") == 0) { + } else if (matches(argv[1], "-batch") == 0) { argc--; argv++; if (argc <= 1) usage(); @@ -302,6 +307,13 @@ int main(int argc, char **argv) NEXT_ARG(); if (netns_switch(argv[1])) return -1; + } else if (matches(argv[1], "-names") == 0 || + matches(argv[1], "-nm") == 0) { + use_names = true; + } else if (matches(argv[1], "-cf") == 0 || + matches(argv[1], "-conf") == 0) { + NEXT_ARG(); + conf_file = argv[1]; } else { fprintf(stderr, "Option \"%s\" is unknown, try \"tc -help\".\n", argv[1]); return -1; @@ -323,8 +335,17 @@ int main(int argc, char **argv) exit(1); } + if (use_names && cls_names_init(conf_file)) { + ret = -1; + goto Exit; + } + ret = do_cmd(argc-1, argv+1); +Exit: rtnl_close(&rth); + if (use_names) + cls_names_uninit(); + return ret; } diff --git a/tc/tc_common.h b/tc/tc_common.h index ea16f7f7..96a0e20f 100644 --- a/tc/tc_common.h +++ b/tc/tc_common.h @@ -21,3 +21,4 @@ extern int parse_size_table(int *p_argc, char ***p_argv, struct tc_sizespec *s); extern int check_size_table_opts(struct tc_sizespec *s); extern int show_graph; +extern bool use_names; diff --git a/tc/tc_util.c b/tc/tc_util.c index f1fca0a8..feae4394 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -23,12 +23,34 @@ #include #include "utils.h" +#include "names.h" #include "tc_util.h" +#include "tc_common.h" #ifndef LIBDIR #define LIBDIR "/usr/lib" #endif +static struct db_names *cls_names = NULL; + +#define NAMES_DB "/etc/iproute2/cls_names" + +int cls_names_init(char *path) +{ + cls_names = db_names_alloc(path ?: NAMES_DB); + if (!cls_names) { + fprintf(stderr, "Error while opening class names file\n"); + return -1; + } + + return 0; +} + +void cls_names_uninit(void) +{ + db_names_free(cls_names); +} + const char *get_tc_lib(void) { const char *lib_dir; @@ -97,20 +119,34 @@ ok: int print_tc_classid(char *buf, int len, __u32 h) { + char handle[40] = {}; + if (h == TC_H_ROOT) - sprintf(buf, "root"); + sprintf(handle, "root"); else if (h == TC_H_UNSPEC) - snprintf(buf, len, "none"); + snprintf(handle, len, "none"); else if (TC_H_MAJ(h) == 0) - snprintf(buf, len, ":%x", TC_H_MIN(h)); + snprintf(handle, len, ":%x", TC_H_MIN(h)); else if (TC_H_MIN(h) == 0) - snprintf(buf, len, "%x:", TC_H_MAJ(h)>>16); + snprintf(handle, len, "%x:", TC_H_MAJ(h) >> 16); else - snprintf(buf, len, "%x:%x", TC_H_MAJ(h)>>16, TC_H_MIN(h)); + snprintf(handle, len, "%x:%x", TC_H_MAJ(h) >> 16, TC_H_MIN(h)); + + if (use_names) { + char clname[IDNAME_MAX] = {}; + + if (id_to_name(cls_names, h, clname)) + snprintf(buf, len, "%s#%s", clname, handle); + else + snprintf(buf, len, "%s", handle); + } else { + snprintf(buf, len, "%s", handle); + } + return 0; } -char * sprint_tc_classid(__u32 h, char *buf) +char *sprint_tc_classid(__u32 h, char *buf) { if (print_tc_classid(buf, SPRINT_BSIZE-1, h)) strcpy(buf, "???"); diff --git a/tc/tc_util.h b/tc/tc_util.h index d4183679..1be1b501 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -100,4 +100,7 @@ extern int parse_action(int *, char ***, int, struct nlmsghdr *); extern void print_tm(FILE *f, const struct tcf_t *tm); extern int prio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt); +extern int cls_names_init(char *path); +extern void cls_names_uninit(void); + #endif From 473544d96d56a9450f95e7d2f9322a3a1fb3b59f Mon Sep 17 00:00:00 2001 From: Mark Einon Date: Mon, 16 Mar 2015 09:59:09 +0000 Subject: [PATCH 035/141] ip: Make uniform the use of synonyms list, show and lst Where used in the ip tool, the 'show' option always has the synonyms 'list' and 'lst', except for ip-token and ip-addrlabel, which are missing 'lst'. Add this as a synonym for these commands. Signed-off-by: Mark Einon --- ip/ipaddrlabel.c | 1 + ip/iptoken.c | 1 + 2 files changed, 2 insertions(+) diff --git a/ip/ipaddrlabel.c b/ip/ipaddrlabel.c index b34dd8b1..f6a638b5 100644 --- a/ip/ipaddrlabel.c +++ b/ip/ipaddrlabel.c @@ -248,6 +248,7 @@ int do_ipaddrlabel(int argc, char **argv) if (argc < 1) { return ipaddrlabel_list(0, NULL); } else if (matches(argv[0], "list") == 0 || + matches(argv[0], "lst") == 0 || matches(argv[0], "show") == 0) { return ipaddrlabel_list(argc-1, argv+1); } else if (matches(argv[0], "add") == 0) { diff --git a/ip/iptoken.c b/ip/iptoken.c index 5689c2ec..655f1601 100644 --- a/ip/iptoken.c +++ b/ip/iptoken.c @@ -195,6 +195,7 @@ int do_iptoken(int argc, char **argv) if (argc < 1) { return iptoken_list(0, NULL); } else if (matches(argv[0], "list") == 0 || + matches(argv[0], "lst") == 0 || matches(argv[0], "show") == 0) { return iptoken_list(argc - 1, argv + 1); } else if (matches(argv[0], "set") == 0 || From 822e9609e7ae8e7dee27355d89d778f14cc27a2d Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 17 Mar 2015 11:34:10 -0400 Subject: [PATCH 036/141] bridge: drop reference to unused option embedded from manpage While looking at the manpage, I noticed a reference to 'embedded' that was added by this commit: commit d611682a8c8f28205158e6d3a7d5e2d01db022cc Author: John Fastabend Date: Thu Sep 13 23:50:36 2012 -0700 iproute2: bridge: finish removing replace option in man pages I no longer see any reference to the 'embedded' option in any c- or h-files, so it seems worthwhile to remove. Signed-off-by: Andy Gospodarek CC: John Fastabend --- man/man8/bridge.8 | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 2005b9f0..18550415 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -54,7 +54,7 @@ bridge \- show / manipulate bridge addresses and devices .B dev .IR DEV " { " .BR local " | " temp " } { " -.BR self " } { " embedded " } { " router " } [ " +.BR self " } { " router " } [ " .B dst .IR IPADDR " ] [ " .B vni @@ -326,10 +326,6 @@ the interface to which this address is associated. - the address is associated with a software fdb (default) .sp -.B embedded -- the address is associated with an offloaded fdb -.sp - .B router - the destination address is associated with a router. Valid if the referenced device is a VXLAN type device and has From 42ecedd4bae534fc688194a795eb4548c6530cda Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 17 Mar 2015 19:26:32 -0700 Subject: [PATCH 037/141] fix ip -force -batch to continue on errors This patch replaces exits with returns in several iproute2 commands. This fixes `ip -batch -force` to not exit but continue on errors. $cat c.txt route del 1.2.3.0/24 dev eth0 route del 1.2.4.0/24 dev eth0 route del 1.2.5.0/24 dev eth0 route add 1.2.3.0/24 dev eth0 $ip -force -batch c.txt RTNETLINK answers: No such process Command failed c.txt:2 RTNETLINK answers: No such process Command failed c.txt:3 Reported-by: Sven-Haegar Koch Signed-off-by: Roopa Prabhu --- bridge/fdb.c | 6 +++--- bridge/link.c | 24 ++++++++++++------------ bridge/mdb.c | 8 ++++---- bridge/vlan.c | 4 ++-- ip/iproute.c | 2 +- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/bridge/fdb.c b/bridge/fdb.c index 6941edd9..3c33e228 100644 --- a/bridge/fdb.c +++ b/bridge/fdb.c @@ -320,7 +320,7 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) if (d == NULL || addr == NULL) { fprintf(stderr, "Device and address are required arguments.\n"); - exit(-1); + return -1; } /* Assume self */ @@ -335,7 +335,7 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) abuf, abuf+1, abuf+2, abuf+3, abuf+4, abuf+5) != 6) { fprintf(stderr, "Invalid mac address %s\n", addr); - exit(-1); + return -1; } addattr_l(&req.n, sizeof(req), NDA_LLADDR, abuf, ETH_ALEN); @@ -363,7 +363,7 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) } if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + return -1; return 0; } diff --git a/bridge/link.c b/bridge/link.c index aa40692d..1af1cf33 100644 --- a/bridge/link.c +++ b/bridge/link.c @@ -284,31 +284,31 @@ static int brlink_modify(int argc, char **argv) } else if (strcmp(*argv, "guard") == 0) { NEXT_ARG(); if (!on_off("guard", &bpdu_guard, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "hairpin") == 0) { NEXT_ARG(); if (!on_off("hairping", &hairpin, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "fastleave") == 0) { NEXT_ARG(); if (!on_off("fastleave", &fast_leave, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "root_block") == 0) { NEXT_ARG(); if (!on_off("root_block", &root_block, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "learning") == 0) { NEXT_ARG(); if (!on_off("learning", &learning, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "learning_sync") == 0) { NEXT_ARG(); if (!on_off("learning_sync", &learning_sync, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "flood") == 0) { NEXT_ARG(); if (!on_off("flood", &flood, *argv)) - exit(-1); + return -1; } else if (strcmp(*argv, "cost") == 0) { NEXT_ARG(); cost = atoi(*argv); @@ -327,7 +327,7 @@ static int brlink_modify(int argc, char **argv) if (state == nstates) { fprintf(stderr, "Error: invalid STP port state\n"); - exit(-1); + return -1; } } } else if (strcmp(*argv, "hwmode") == 0) { @@ -341,7 +341,7 @@ static int brlink_modify(int argc, char **argv) fprintf(stderr, "Mode argument must be \"vepa\" or " "\"veb\".\n"); - exit(-1); + return -1; } } else if (strcmp(*argv, "self") == 0) { flags |= BRIDGE_FLAGS_SELF; @@ -354,14 +354,14 @@ static int brlink_modify(int argc, char **argv) } if (d == NULL) { fprintf(stderr, "Device is a required argument.\n"); - exit(-1); + return -1; } req.ifm.ifi_index = ll_name_to_index(d); if (req.ifm.ifi_index == 0) { fprintf(stderr, "Cannot find bridge device \"%s\"\n", d); - exit(-1); + return -1; } /* Nested PROTINFO attribute. Contains: port flags, cost, priority and @@ -416,7 +416,7 @@ static int brlink_modify(int argc, char **argv) } if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + return -1; return 0; } diff --git a/bridge/mdb.c b/bridge/mdb.c index 6c1c938a..a6b28827 100644 --- a/bridge/mdb.c +++ b/bridge/mdb.c @@ -145,12 +145,12 @@ static int mdb_show(int argc, char **argv) if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETMDB) < 0) { perror("Cannot send dump request"); - exit(1); + return -1; } if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) { fprintf(stderr, "Dump terminated\n"); - exit(1); + return -1; } return 0; @@ -198,7 +198,7 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) if (d == NULL || grp == NULL || p == NULL) { fprintf(stderr, "Device, group address and port name are required arguments.\n"); - exit(-1); + return -1; } req.bpm.ifindex = ll_name_to_index(d); @@ -225,7 +225,7 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv) addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry)); if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + return -1; return 0; } diff --git a/bridge/vlan.c b/bridge/vlan.c index 9f6c84ee..2ae739cf 100644 --- a/bridge/vlan.c +++ b/bridge/vlan.c @@ -80,7 +80,7 @@ static int vlan_modify(int cmd, int argc, char **argv) if (d == NULL || vid == -1) { fprintf(stderr, "Device and VLAN ID are required arguments.\n"); - exit(-1); + return -1; } req.ifm.ifi_index = ll_name_to_index(d); @@ -132,7 +132,7 @@ static int vlan_modify(int cmd, int argc, char **argv) addattr_nest_end(&req.n, afspec); if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + return -1; return 0; } diff --git a/ip/iproute.c b/ip/iproute.c index b32025ff..024d401c 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -1059,7 +1059,7 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) req.r.rtm_family = AF_INET; if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) - exit(2); + return -1; return 0; } From 6f4cad912082998bc2a44316af5550eaf260605a Mon Sep 17 00:00:00 2001 From: "philipp@redfish-solutions.com" Date: Thu, 19 Mar 2015 13:54:28 -0600 Subject: [PATCH 038/141] xfrm: Fix -o (oneline) being broken in xfrm and correct mark radix Don't insert newline in -o (oneline) mode; print mark as hex. Oneline mode is supposed to force all output to be on oneline and machine-parsable, but this isn't the case for "ip xfrm" as shown: % ip -o xfrm monitor ... src 0.0.0.0/0 dst 0.0.0.0/0 \ dir out priority 2051 ptype main \ mark -1879048191/0xffffffff tmpl src 203.0.130.10 dst 198.51.130.30\ proto esp reqid 16384 mode tunnel\ ... as that's 2 lines, not one. Also, the "mark" is shown in signed decimal, but the mask is in hex. This is confusing: let's use hex for both. Signed-off-by: Philip Prindeville --- ip/ipxfrm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c index 659fa6b6..95f91a53 100644 --- a/ip/ipxfrm.c +++ b/ip/ipxfrm.c @@ -689,7 +689,8 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family, if (tb[XFRMA_MARK]) { struct rtattr *rta = tb[XFRMA_MARK]; struct xfrm_mark *m = (struct xfrm_mark *) RTA_DATA(rta); - fprintf(fp, "\tmark %d/0x%x\n", m->v, m->m); + fprintf(fp, "\tmark %#x/%#x", m->v, m->m); + fprintf(fp, "%s", _SL_); } if (tb[XFRMA_ALG_AUTH] && !tb[XFRMA_ALG_AUTH_TRUNC]) { From 106ca2779ebc0d6a17ce7fae073aa38cdbdae6bb Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 17 Mar 2015 19:18:28 -0700 Subject: [PATCH 039/141] lib utils: fix family during af_bit_len calculation commit f3a2ddc124e0 ("lib utils: Use helpers to get AF bit/byte len") used a wrong family or family of zero in the default case during af_bit_len calculation causing ip route commands to fail with below error Error: an inet prefix is expected rather than "10.0.2.14/24". Reported-by: Sven-Haegar Koch Signed-off-by: Roopa Prabhu --- lib/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/utils.c b/lib/utils.c index 9cda2681..0d08a868 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -477,7 +477,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) err = get_addr_1(dst, arg, family); if (err == 0) { - dst->bitlen = af_bit_len(family); + dst->bitlen = af_bit_len(dst->family); if (slash) { if (get_netmask(&plen, slash+1, 0) From d59102975e91b5c3b436df238f66f861765d5cd9 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Thu, 19 Mar 2015 23:09:28 +0200 Subject: [PATCH 040/141] man ip-link: Add ip-netns(8) in 'SEE ALSO' Signed-off-by: Vadim Kochan --- man/man8/ip-link.8.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 9bbf2d30..0e3bb5e6 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -900,7 +900,8 @@ and the outer UDP checksum and remote checksum offload are enabled. .SH SEE ALSO .br -.BR ip (8) +.BR ip (8), +.BR ip-netns (8) .SH AUTHOR Original Manpage by Michail Litvak From 599fc319eb41a2450b1ff4927a5849439ba5c45c Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Tue, 24 Mar 2015 11:41:52 +0200 Subject: [PATCH 041/141] man ip-netns: Fix syntax in default ns process, indent's Signed-off-by: Vadim Kochan --- man/man8/ip-netns.8 | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8 index d4966758..d34cdfe3 100644 --- a/man/man8/ip-netns.8 +++ b/man/man8/ip-netns.8 @@ -46,7 +46,7 @@ ip-netns \- process network namespace management A network namespace is logically another copy of the network stack, with its own routes, firewall rules, and network devices. -By default process inherits network namespace from its parent. Initially all +By default a process inherits its network namespace from its parent. Initially all the processes share the same default network namespace from the init process. By convention a named network namespace is an object at @@ -103,10 +103,18 @@ option was specified then all the network namespace names will be removed. It is possible to lose the physical device when it was moved to netns and then this netns was deleted with a running process: - $ ip netns add net0 - $ ip link set dev eth0 netns net0 - $ ip netns exec net0 SOME_PROCESS_IN_BACKGROUND - $ ip netns del net0 +.RS 10 +$ ip netns add net0 +.RE +.RS 10 +$ ip link set dev eth0 netns net0 +.RE +.RS 10 +$ ip netns exec net0 SOME_PROCESS_IN_BACKGROUND +.RE +.RS 10 +$ ip netns del net0 +.RE and eth0 will appear in the default netns only after SOME_PROCESS_IN_BACKGROUND will exit or will be killed. To prevent this the processes running in net0 From 51cf36756cd77c27fa07f8be23c7c4e3997fee95 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 18 Mar 2015 10:13:34 +0100 Subject: [PATCH 042/141] tc: m_bpf: fix next arg selection after tc opcode Next argument after the tc opcode/verdict is optional, using NEXT_ARG() requires to have another argument after that one otherwise tc will bail out. Therefore, we need to advance to the next argument manually as done elsewhere. Fixes: 86ab59a6660f ("tc: add support for BPF based actions") Signed-off-by: Daniel Borkmann Acked-by: Jiri Pirko --- tc/m_bpf.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tc/m_bpf.c b/tc/m_bpf.c index 3b864f9d..bc6cc47a 100644 --- a/tc/m_bpf.c +++ b/tc/m_bpf.c @@ -89,20 +89,25 @@ static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p, if (argc) { if (matches(*argv, "reclassify") == 0) { parm.action = TC_ACT_RECLASSIFY; - NEXT_ARG(); + argc--; + argv++; } else if (matches(*argv, "pipe") == 0) { parm.action = TC_ACT_PIPE; - NEXT_ARG(); + argc--; + argv++; } else if (matches(*argv, "drop") == 0 || matches(*argv, "shot") == 0) { parm.action = TC_ACT_SHOT; - NEXT_ARG(); + argc--; + argv++; } else if (matches(*argv, "continue") == 0) { parm.action = TC_ACT_UNSPEC; - NEXT_ARG(); + argc--; + argv++; } else if (matches(*argv, "pass") == 0) { parm.action = TC_ACT_OK; - NEXT_ARG(); + argc--; + argv++; } } From 61333d24428835c1b9d517993a4e7ef08849dece Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 15 Mar 2015 12:35:23 -0700 Subject: [PATCH 043/141] update headers files for net-next Use sanitized headers from 4.0.0-rc3 --- include/linux/if_addr.h | 1 + include/linux/if_link.h | 1 + include/linux/pkt_cls.h | 2 ++ include/linux/rtnetlink.h | 12 ++++++++++++ 4 files changed, 16 insertions(+) diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index cc375e42..2033adcf 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -50,6 +50,7 @@ enum { #define IFA_F_PERMANENT 0x80 #define IFA_F_MANAGETEMPADDR 0x100 #define IFA_F_NOPREFIXROUTE 0x200 +#define IFA_F_MCAUTOJOIN 0x400 struct ifa_cacheinfo { __u32 ifa_prefered; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 3450c3fb..7c40db4f 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -245,6 +245,7 @@ enum { IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ IFLA_BRPORT_PROXYARP, /* proxy ARP */ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ + IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 25731dfb..bf08e76b 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -397,6 +397,8 @@ enum { TCA_BPF_CLASSID, TCA_BPF_OPS_LEN, TCA_BPF_OPS, + TCA_BPF_FD, + TCA_BPF_NAME, __TCA_BPF_MAX, }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 3eb78105..28650a31 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -303,6 +303,9 @@ enum rtattr_type_t { RTA_TABLE, RTA_MARK, RTA_MFC_STATS, + RTA_VIA, + RTA_NEWDST, + RTA_PREF, __RTA_MAX }; @@ -332,6 +335,7 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_EXTERNAL 8 /* Route installed externally */ /* Macros to handle hexthops */ @@ -344,6 +348,12 @@ struct rtnexthop { #define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) #define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) +/* RTA_VIA */ +struct rtvia { + __kernel_sa_family_t rtvia_family; + __u8 rtvia_addr[0]; +}; + /* RTM_CACHEINFO */ struct rta_cacheinfo { @@ -621,6 +631,8 @@ enum rtnetlink_groups { #define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF RTNLGRP_MDB, #define RTNLGRP_MDB RTNLGRP_MDB + RTNLGRP_MPLS_ROUTE, +#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) From 655444bdadfcf348d4e668476f493e542587446b Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sat, 7 Mar 2015 22:15:35 -0800 Subject: [PATCH 044/141] route: label externally offloaded routes On ip route print dump, label externally offloaded routes with "external". Offloaded routes are flagged with RTNH_F_EXTERNAL, a recent additon to net-next. For example: $ ip route default via 192.168.0.2 dev eth0 11.0.0.0/30 dev swp1 proto kernel scope link src 11.0.0.2 external 11.0.0.4/30 via 11.0.0.1 dev swp1 proto zebra metric 20 external 11.0.0.8/30 dev swp2 proto kernel scope link src 11.0.0.10 external 11.0.0.12/30 via 11.0.0.9 dev swp2 proto zebra metric 20 external 12.0.0.2 proto zebra metric 30 external nexthop via 11.0.0.1 dev swp1 weight 1 nexthop via 11.0.0.9 dev swp2 weight 1 12.0.0.3 via 11.0.0.1 dev swp1 proto zebra metric 20 external 12.0.0.4 via 11.0.0.9 dev swp2 proto zebra metric 20 external 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.15 Signed-off-by: Scott Feldman Reviewed-by: Jiri Pirko --- ip/iproute.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ip/iproute.c b/ip/iproute.c index 024d401c..35418af3 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -412,6 +412,8 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) fprintf(fp, "onlink "); if (r->rtm_flags & RTNH_F_PERVASIVE) fprintf(fp, "pervasive "); + if (r->rtm_flags & RTNH_F_EXTERNAL) + fprintf(fp, "external "); if (r->rtm_flags & RTM_F_NOTIFY) fprintf(fp, "notify "); if (tb[RTA_MARK]) { From e31867ac30cf95155d113bbfaedf6b99f98a677f Mon Sep 17 00:00:00 2001 From: Madhu Challa Date: Wed, 4 Mar 2015 10:30:10 -0800 Subject: [PATCH 045/141] ip: enable configuring multicast group autojoin Joining multicast group on ethernet level via "ip maddr" command would not work if we have an Ethernet switch that does igmp snooping since the switch would not replicate multicast packets on ports that did not have IGMP reports for the multicast addresses. Linux vxlan interfaces created via "ip link add vxlan" have the group option that enables then to do the required join. By extending ip address command with option "autojoin" we can get similar functionality for openvswitch vxlan interfaces as well as other tunneling mechanisms that need to receive multicast traffic. example: ip address add 224.1.1.10/24 dev eth5 autojoin ip address del 224.1.1.10/24 dev eth5 --- ip/ipaddress.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 99a6ab59..e582da03 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -85,7 +85,7 @@ static void usage(void) fprintf(stderr, " [-]tentative | [-]deprecated | [-]dadfailed | temporary |\n"); fprintf(stderr, " CONFFLAG-LIST ]\n"); fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n"); - fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute ]\n"); + fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute | autojoin ]\n"); fprintf(stderr, "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n"); fprintf(stderr, "LFT := forever | SECONDS\n"); @@ -915,6 +915,10 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, ifa_flags &= ~IFA_F_NOPREFIXROUTE; fprintf(fp, "noprefixroute "); } + if (ifa_flags & IFA_F_MCAUTOJOIN) { + ifa_flags &= ~IFA_F_MCAUTOJOIN; + fprintf(fp, "autojoin "); + } if (!(ifa_flags & IFA_F_PERMANENT)) { fprintf(fp, "dynamic "); } else @@ -1354,6 +1358,9 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) } else if (strcmp(*argv, "noprefixroute") == 0) { filter.flags |= IFA_F_NOPREFIXROUTE; filter.flagmask |= IFA_F_NOPREFIXROUTE; + } else if (strcmp(*argv, "autojoin") == 0) { + filter.flags |= IFA_F_MCAUTOJOIN; + filter.flagmask |= IFA_F_MCAUTOJOIN; } else if (strcmp(*argv, "dadfailed") == 0) { filter.flags |= IFA_F_DADFAILED; filter.flagmask |= IFA_F_DADFAILED; @@ -1558,6 +1565,16 @@ static int default_scope(inet_prefix *lcl) return 0; } +static bool ipaddr_is_multicast(inet_prefix *a) +{ + if (a->family == AF_INET) + return IN_MULTICAST(ntohl(a->data[0])); + else if (a->family == AF_INET6) + return IN6_IS_ADDR_MULTICAST(a->data); + else + return false; +} + static int ipaddr_modify(int cmd, int flags, int argc, char **argv) { struct { @@ -1665,6 +1682,8 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) ifa_flags |= IFA_F_MANAGETEMPADDR; } else if (strcmp(*argv, "noprefixroute") == 0) { ifa_flags |= IFA_F_NOPREFIXROUTE; + } else if (strcmp(*argv, "autojoin") == 0) { + ifa_flags |= IFA_F_MCAUTOJOIN; } else { if (strcmp(*argv, "local") == 0) { NEXT_ARG(); @@ -1755,6 +1774,11 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) sizeof(cinfo)); } + if ((ifa_flags & IFA_F_MCAUTOJOIN) && !ipaddr_is_multicast(&lcl)) { + fprintf(stderr, "autojoin needs multicast address\n"); + return -1; + } + if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) return -2; From b54ac87ef831ae177bc5b29853a17371e943130d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 16 Mar 2015 18:10:56 +0100 Subject: [PATCH 046/141] misc: header rebase, add bpf.h Include the bpf.h uapi header file. Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 183 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 include/linux/bpf.h diff --git a/include/linux/bpf.h b/include/linux/bpf.h new file mode 100644 index 00000000..54e816b1 --- /dev/null +++ b/include/linux/bpf.h @@ -0,0 +1,183 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef __LINUX_BPF_H__ +#define __LINUX_BPF_H__ + +#include +#include + +/* Extended instruction set based on top of classic BPF */ + +/* instruction classes */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG __MAX_BPF_REG + +struct bpf_insn { + __u8 code; /* opcode */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +/* BPF syscall commands */ +enum bpf_cmd { + /* create a map with given type and attributes + * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size) + * returns fd or negative error + * map is deleted when fd is closed + */ + BPF_MAP_CREATE, + + /* lookup key in a given map + * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key, attr->value + * returns zero and stores found elem into value + * or negative error + */ + BPF_MAP_LOOKUP_ELEM, + + /* create or update key/value pair in a given map + * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key, attr->value, attr->flags + * returns zero or negative error + */ + BPF_MAP_UPDATE_ELEM, + + /* find and delete elem by key in a given map + * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key + * returns zero or negative error + */ + BPF_MAP_DELETE_ELEM, + + /* lookup key in a given map and return next key + * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) + * Using attr->map_fd, attr->key, attr->next_key + * returns zero and stores next key or negative error + */ + BPF_MAP_GET_NEXT_KEY, + + /* verify and load eBPF program + * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size) + * Using attr->prog_type, attr->insns, attr->license + * returns fd or negative error + */ + BPF_PROG_LOAD, +}; + +enum bpf_map_type { + BPF_MAP_TYPE_UNSPEC, + BPF_MAP_TYPE_HASH, + BPF_MAP_TYPE_ARRAY, +}; + +enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_SCHED_CLS, +}; + +#define BPF_PSEUDO_MAP_FD 1 + +/* flags for BPF_MAP_UPDATE_ELEM command */ +#define BPF_ANY 0 /* create new element or update existing */ +#define BPF_NOEXIST 1 /* create new element if it didn't exist */ +#define BPF_EXIST 2 /* update existing element */ + +union bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + __u32 map_fd; + __aligned_u64 key; + union { + __aligned_u64 value; + __aligned_u64 next_key; + }; + __u64 flags; + }; + + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + }; +} __attribute__((aligned(8))); + +/* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call + */ +enum bpf_func_id { + BPF_FUNC_unspec, + BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ + BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ + BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ + BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ + BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ + __BPF_FUNC_MAX_ID, +}; + +/* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +struct __sk_buff { + __u32 len; + __u32 pkt_type; + __u32 mark; + __u32 queue_mapping; +}; + +#endif /* __LINUX_BPF_H__ */ From cbdc3ed88a955e22d3a4fe0a6ed03ac17e339042 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 24 Mar 2015 15:21:24 -0700 Subject: [PATCH 047/141] update kernel headers to net-next 4.0-rc5 Lastest features --- include/linux/bpf.h | 5 +++++ include/linux/filter.h | 3 ++- include/linux/if_addr.h | 1 + include/linux/if_link.h | 5 +++++ include/linux/neighbour.h | 1 + include/linux/tc_act/tc_bpf.h | 2 ++ 6 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 54e816b1..77f33a6a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -119,6 +119,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, }; #define BPF_PSEUDO_MAP_FD 1 @@ -178,6 +179,10 @@ struct __sk_buff { __u32 pkt_type; __u32 mark; __u32 queue_mapping; + __u32 protocol; + __u32 vlan_present; + __u32 vlan_tci; + __u32 vlan_proto; }; #endif /* __LINUX_BPF_H__ */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 8688a985..344781d7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -77,7 +77,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_VLAN_TAG_PRESENT 48 #define SKF_AD_PAY_OFFSET 52 #define SKF_AD_RANDOM 56 -#define SKF_AD_MAX 60 +#define SKF_AD_VLAN_TPID 60 +#define SKF_AD_MAX 64 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index 2033adcf..26f0ecff 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -51,6 +51,7 @@ enum { #define IFA_F_MANAGETEMPADDR 0x100 #define IFA_F_NOPREFIXROUTE 0x200 #define IFA_F_MCAUTOJOIN 0x400 +#define IFA_F_STABLE_PRIVACY 0x800 struct ifa_cacheinfo { __u32 ifa_prefered; diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 7c40db4f..6689e8fd 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -147,6 +147,7 @@ enum { IFLA_CARRIER_CHANGES, IFLA_PHYS_SWITCH_ID, IFLA_LINK_NETNSID, + IFLA_PHYS_PORT_NAME, __IFLA_MAX }; @@ -213,6 +214,7 @@ enum { enum in6_addr_gen_mode { IN6_ADDR_GEN_MODE_EUI64, IN6_ADDR_GEN_MODE_NONE, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY, }; /* Bridge section */ @@ -222,6 +224,9 @@ enum { IFLA_BR_FORWARD_DELAY, IFLA_BR_HELLO_TIME, IFLA_BR_MAX_AGE, + IFLA_BR_AGEING_TIME, + IFLA_BR_STP_STATE, + IFLA_BR_PRIORITY, __IFLA_BR_MAX, }; diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index 3873a355..2e35c61b 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -126,6 +126,7 @@ enum { NDTPA_PROXY_QLEN, /* u32 */ NDTPA_LOCKTIME, /* u64, msecs */ NDTPA_QUEUE_LENBYTES, /* u32 */ + NDTPA_MCAST_REPROBES, /* u32 */ __NDTPA_MAX }; #define NDTPA_MAX (__NDTPA_MAX - 1) diff --git a/include/linux/tc_act/tc_bpf.h b/include/linux/tc_act/tc_bpf.h index 5288bd77..07f17cc7 100644 --- a/include/linux/tc_act/tc_bpf.h +++ b/include/linux/tc_act/tc_bpf.h @@ -24,6 +24,8 @@ enum { TCA_ACT_BPF_PARMS, TCA_ACT_BPF_OPS_LEN, TCA_ACT_BPF_OPS, + TCA_ACT_BPF_FD, + TCA_ACT_BPF_NAME, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) From 11c39b5e98a163889fe5e1840e1b2a105bc33680 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 16 Mar 2015 19:37:41 +0100 Subject: [PATCH 048/141] tc: add eBPF support to f_bpf This work adds the tc frontend for kernel commit e2e9b6541dd4 ("cls_bpf: add initial eBPF support for programmable classifiers"). A C-like classifier program (f.e. see e2e9b6541dd4) is being compiled via LLVM's eBPF backend into an ELF file, that is then being passed to tc. tc then loads, if any, eBPF maps and eBPF opcodes (with fixed-up eBPF map file descriptors) out of its dedicated sections, and via bpf(2) into the kernel and then the resulting fd via netlink down to cls_bpf. cls_bpf allows for annotations, currently, I've used the file name for that, so that the user can easily identify his filter when dumping configurations back. Example usage: clang -O2 -emit-llvm -c cls.c -o - | llc -march=bpf -filetype=obj -o cls.o tc filter add dev em1 parent 1: bpf run object-file cls.o classid x:y tc filter show dev em1 [...] filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid x:y cls.o I placed the parser bits derived from Alexei's kernel sample, into tc_bpf.c as my next step is to also add the same support for BPF action, so we can have a fully fledged eBPF classifier and action in tc. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- configure | 26 ++++ include/utils.h | 5 + tc/Makefile | 5 + tc/f_bpf.c | 36 +++-- tc/tc_bpf.c | 392 ++++++++++++++++++++++++++++++++++++++++++++++++ tc/tc_bpf.h | 58 ++++++- 6 files changed, 511 insertions(+), 11 deletions(-) diff --git a/configure b/configure index 631938e9..7bec8a95 100755 --- a/configure +++ b/configure @@ -266,6 +266,29 @@ EOF rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest } +check_elf() +{ + cat >$TMPDIR/elftest.c < +#include +int main(void) +{ + Elf_Scn *scn; + GElf_Shdr shdr; + return elf_version(EV_CURRENT); +} +EOF + + if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1 + then + echo "TC_CONFIG_ELF:=y" >>Config + echo "yes" + else + echo "no" + fi + rm -f $TMPDIR/elftest.c $TMPDIR/elftest +} + check_selinux() # SELinux is a compile time option in the ss utility { @@ -306,5 +329,8 @@ check_netnsid echo -n "SELinux support: " check_selinux +echo -n "ELF support: " +check_elf + echo -e "\nDocs" check_docs diff --git a/include/utils.h b/include/utils.h index 9151c4f1..59b22804 100644 --- a/include/utils.h +++ b/include/utils.h @@ -157,6 +157,11 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#ifndef __check_format_string +# define __check_format_string(pos_str, pos_args) \ + __attribute__ ((format (printf, (pos_str), (pos_args)))) +#endif + extern int cmdlineno; extern ssize_t getcmdline(char **line, size_t *len, FILE *in); extern int makeargs(char *line, char *argv[], int maxargs); diff --git a/tc/Makefile b/tc/Makefile index d831a153..2eff082c 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -89,6 +89,11 @@ else endif endif +ifeq ($(TC_CONFIG_ELF),y) + CFLAGS += -DHAVE_ELF + LDLIBS += -lelf +endif + TCOBJ += $(TCMODULES) LDLIBS += -L. -ltc -lm diff --git a/tc/f_bpf.c b/tc/f_bpf.c index e2af94e3..6d765807 100644 --- a/tc/f_bpf.c +++ b/tc/f_bpf.c @@ -34,13 +34,15 @@ static void explain(void) fprintf(stderr, "\n"); fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n"); fprintf(stderr, " [from file]: run bytecode-file FILE\n"); + fprintf(stderr, " [from file]: run object-file FILE\n"); fprintf(stderr, "\n"); fprintf(stderr, " [ action ACTION_SPEC ]\n"); fprintf(stderr, " [ classid CLASSID ]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n"); fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n"); - fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n"); + fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n"); + fprintf(stderr, "or an ELF file containing eBPF map definitions and bytecode.\n"); fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n"); fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n"); } @@ -71,31 +73,40 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle, while (argc > 0) { if (matches(*argv, "run") == 0) { - bool from_file; + bool from_file = true, ebpf; struct sock_filter bpf_ops[BPF_MAXINSNS]; - __u16 bpf_len; int ret; NEXT_ARG(); if (strcmp(*argv, "bytecode-file") == 0) { - from_file = true; + ebpf = false; } else if (strcmp(*argv, "bytecode") == 0) { from_file = false; + ebpf = false; + } else if (strcmp(*argv, "object-file") == 0) { + ebpf = true; } else { fprintf(stderr, "What is \"%s\"?\n", *argv); explain(); return -1; } NEXT_ARG(); - ret = bpf_parse_ops(argc, argv, bpf_ops, from_file); + ret = ebpf ? bpf_open_object(*argv, BPF_PROG_TYPE_SCHED_CLS) : + bpf_parse_ops(argc, argv, bpf_ops, from_file); if (ret < 0) { - fprintf(stderr, "Illegal \"bytecode\"\n"); + fprintf(stderr, "%s\n", ebpf ? + "Could not load object" : + "Illegal \"bytecode\""); return -1; } - bpf_len = ret; - addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, bpf_len); - addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, - bpf_len * sizeof(struct sock_filter)); + if (ebpf) { + addattr32(n, MAX_MSG, TCA_BPF_FD, ret); + addattrstrz(n, MAX_MSG, TCA_BPF_NAME, *argv); + } else { + addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, ret); + addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops, + ret * sizeof(struct sock_filter)); + } } else if (matches(*argv, "classid") == 0 || strcmp(*argv, "flowid") == 0) { unsigned handle; @@ -153,6 +164,11 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f, sprint_tc_classid(rta_getattr_u32(tb[TCA_BPF_CLASSID]), b1)); } + if (tb[TCA_BPF_NAME]) + fprintf(f, "%s ", rta_getattr_str(tb[TCA_BPF_NAME])); + else if (tb[TCA_BPF_FD]) + fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD])); + if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN]) bpf_print_ops(f, tb[TCA_BPF_OPS], rta_getattr_u16(tb[TCA_BPF_OPS_LEN])); diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c index c6901d6c..3778d6b5 100644 --- a/tc/tc_bpf.c +++ b/tc/tc_bpf.c @@ -8,6 +8,7 @@ * * Authors: Daniel Borkmann * Jiri Pirko + * Alexei Starovoitov */ #include @@ -16,10 +17,19 @@ #include #include #include +#include +#include +#include +#include #include #include #include +#ifdef HAVE_ELF +#include +#include +#endif + #include "utils.h" #include "tc_util.h" #include "tc_bpf.h" @@ -144,3 +154,385 @@ void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt, ops[i].jf, ops[i].k); } + +#ifdef HAVE_ELF +struct bpf_elf_sec_data { + GElf_Shdr sec_hdr; + char *sec_name; + Elf_Data *sec_data; +}; + +static char bpf_log_buf[8192]; + +static const char *prog_type_section(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_SCHED_CLS: + return ELF_SECTION_CLASSIFIER; + /* case BPF_PROG_TYPE_SCHED_ACT: */ + /* return ELF_SECTION_ACTION; */ + default: + return NULL; + } +} + +static void bpf_dump_error(const char *format, ...) __check_format_string(1, 2); +static void bpf_dump_error(const char *format, ...) +{ + va_list vl; + + va_start(vl, format); + vfprintf(stderr, format, vl); + va_end(vl); + + fprintf(stderr, "%s", bpf_log_buf); + memset(bpf_log_buf, 0, sizeof(bpf_log_buf)); +} + +static int bpf_create_map(enum bpf_map_type type, unsigned int size_key, + unsigned int size_value, unsigned int max_elem) +{ + union bpf_attr attr = { + .map_type = type, + .key_size = size_key, + .value_size = size_value, + .max_entries = max_elem, + }; + + return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, + unsigned int len, const char *license) +{ + union bpf_attr attr = { + .prog_type = type, + .insns = bpf_ptr_to_u64(insns), + .insn_cnt = len / sizeof(struct bpf_insn), + .license = bpf_ptr_to_u64(license), + .log_buf = bpf_ptr_to_u64(bpf_log_buf), + .log_size = sizeof(bpf_log_buf), + .log_level = 1, + }; + + return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +} + +static int bpf_prog_attach(enum bpf_prog_type type, const struct bpf_insn *insns, + unsigned int size, const char *license) +{ + int prog_fd = bpf_prog_load(type, insns, size, license); + + if (prog_fd < 0) + bpf_dump_error("BPF program rejected: %s\n", strerror(errno)); + + return prog_fd; +} + +static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key, + unsigned int size_value, unsigned int max_elem) +{ + int map_fd = bpf_create_map(type, size_key, size_value, max_elem); + + if (map_fd < 0) + bpf_dump_error("BPF map rejected: %s\n", strerror(errno)); + + return map_fd; +} + +static void bpf_maps_init(int *map_fds, unsigned int max_fds) +{ + int i; + + for (i = 0; i < max_fds; i++) + map_fds[i] = -1; +} + +static void bpf_maps_destroy(const int *map_fds, unsigned int max_fds) +{ + int i; + + for (i = 0; i < max_fds; i++) { + if (map_fds[i] >= 0) + close(map_fds[i]); + } +} + +static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps, + int *map_fds, unsigned int max_fds) +{ + int i, ret; + + for (i = 0; i < num_maps && num_maps <= max_fds; i++) { + struct bpf_elf_map *map = &maps[i]; + + ret = bpf_map_attach(map->type, map->size_key, + map->size_value, map->max_elem); + if (ret < 0) + goto err_unwind; + + map_fds[i] = ret; + } + + return 0; + +err_unwind: + bpf_maps_destroy(map_fds, i); + return ret; +} + +static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index, + struct bpf_elf_sec_data *sec_data) +{ + GElf_Shdr sec_hdr; + Elf_Scn *sec_fd; + Elf_Data *sec_edata; + char *sec_name; + + memset(sec_data, 0, sizeof(*sec_data)); + + sec_fd = elf_getscn(elf_fd, sec_index); + if (!sec_fd) + return -EINVAL; + + if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) + return -EIO; + + sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx, + sec_hdr.sh_name); + if (!sec_name || !sec_hdr.sh_size) + return -ENOENT; + + sec_edata = elf_getdata(sec_fd, NULL); + if (!sec_edata || elf_getdata(sec_fd, sec_edata)) + return -EIO; + + memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); + sec_data->sec_name = sec_name; + sec_data->sec_data = sec_edata; + + return 0; +} + +static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo, + struct bpf_elf_sec_data *data_insn, + Elf_Data *sym_tab, int *map_fds, int max_fds) +{ + Elf_Data *idata = data_insn->sec_data; + GElf_Shdr *rhdr = &data_relo->sec_hdr; + int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize; + struct bpf_insn *insns = idata->d_buf; + unsigned int num_insns = idata->d_size / sizeof(*insns); + + for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { + unsigned int ioff, fnum; + GElf_Rel relo; + GElf_Sym sym; + + if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo) + return -EIO; + + ioff = relo.r_offset / sizeof(struct bpf_insn); + if (ioff >= num_insns) + return -EINVAL; + if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) + return -EINVAL; + + if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) + return -EIO; + + fnum = sym.st_value / sizeof(struct bpf_elf_map); + if (fnum >= max_fds) + return -EINVAL; + + insns[ioff].src_reg = BPF_PSEUDO_MAP_FD; + insns[ioff].imm = map_fds[fnum]; + } + + return 0; +} + +static int bpf_fetch_ancillary(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen, + int *map_fds, unsigned int max_fds, + char *license, unsigned int lic_len, + Elf_Data **sym_tab) +{ + int sec_index, ret = -1; + + for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { + struct bpf_elf_sec_data data_anc; + + ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, + &data_anc); + if (ret < 0) + continue; + + /* Extract and load eBPF map fds. */ + if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS)) { + struct bpf_elf_map *maps = data_anc.sec_data->d_buf; + unsigned int maps_num = data_anc.sec_data->d_size / + sizeof(*maps); + + sec_seen[sec_index] = true; + ret = bpf_maps_attach(maps, maps_num, map_fds, + max_fds); + if (ret < 0) + return ret; + } + /* Extract eBPF license. */ + else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) { + if (data_anc.sec_data->d_size > lic_len) + return -ENOMEM; + + sec_seen[sec_index] = true; + memcpy(license, data_anc.sec_data->d_buf, + data_anc.sec_data->d_size); + } + /* Extract symbol table for relocations (map fd fixups). */ + else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) { + sec_seen[sec_index] = true; + *sym_tab = data_anc.sec_data; + } + } + + return ret; +} + +static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen, + enum bpf_prog_type type, char *license, + Elf_Data *sym_tab, int *map_fds, unsigned int max_fds) +{ + int sec_index, prog_fd = -1; + + for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { + struct bpf_elf_sec_data data_relo, data_insn; + int ins_index, ret; + + /* Attach eBPF programs with relocation data (maps). */ + ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, + &data_relo); + if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) + continue; + + ins_index = data_relo.sec_hdr.sh_info; + + ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index, + &data_insn); + if (ret < 0) + continue; + if (strcmp(data_insn.sec_name, prog_type_section(type))) + continue; + + sec_seen[sec_index] = true; + sec_seen[ins_index] = true; + + ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab, + map_fds, max_fds); + if (ret < 0) + continue; + + prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf, + data_insn.sec_data->d_size, license); + if (prog_fd < 0) + continue; + + break; + } + + return prog_fd; +} + +static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen, + enum bpf_prog_type type, char *license) +{ + int sec_index, prog_fd = -1; + + for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { + struct bpf_elf_sec_data data_insn; + int ret; + + /* Attach eBPF programs without relocation data. */ + if (sec_seen[sec_index]) + continue; + + ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, + &data_insn); + if (ret < 0) + continue; + if (strcmp(data_insn.sec_name, prog_type_section(type))) + continue; + + prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf, + data_insn.sec_data->d_size, license); + if (prog_fd < 0) + continue; + + break; + } + + return prog_fd; +} + +int bpf_open_object(const char *path, enum bpf_prog_type type) +{ + int map_fds[ELF_MAX_MAPS], max_fds = ARRAY_SIZE(map_fds); + char license[ELF_MAX_LICENSE_LEN]; + int file_fd, prog_fd = -1, ret; + Elf_Data *sym_tab = NULL; + GElf_Ehdr elf_hdr; + bool *sec_seen; + Elf *elf_fd; + + if (elf_version(EV_CURRENT) == EV_NONE) + return -EINVAL; + + file_fd = open(path, O_RDONLY, 0); + if (file_fd < 0) + return -errno; + + elf_fd = elf_begin(file_fd, ELF_C_READ, NULL); + if (!elf_fd) { + ret = -EINVAL; + goto out; + } + + if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) { + ret = -EIO; + goto out_elf; + } + + sec_seen = calloc(elf_hdr.e_shnum, sizeof(*sec_seen)); + if (!sec_seen) { + ret = -ENOMEM; + goto out_elf; + } + + memset(license, 0, sizeof(license)); + bpf_maps_init(map_fds, max_fds); + + ret = bpf_fetch_ancillary(elf_fd, &elf_hdr, sec_seen, map_fds, max_fds, + license, sizeof(license), &sym_tab); + if (ret < 0) + goto out_maps; + if (sym_tab) + prog_fd = bpf_fetch_prog_relo(elf_fd, &elf_hdr, sec_seen, type, + license, sym_tab, map_fds, max_fds); + if (prog_fd < 0) + prog_fd = bpf_fetch_prog(elf_fd, &elf_hdr, sec_seen, type, + license); + if (prog_fd < 0) + goto out_maps; +out_sec: + free(sec_seen); +out_elf: + elf_end(elf_fd); +out: + close(file_fd); + return prog_fd; + +out_maps: + bpf_maps_destroy(map_fds, max_fds); + goto out_sec; +} + +#endif /* HAVE_ELF */ diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h index 08cca927..ce647470 100644 --- a/tc/tc_bpf.h +++ b/tc/tc_bpf.h @@ -13,10 +13,42 @@ #ifndef _TC_BPF_H_ #define _TC_BPF_H_ 1 -#include #include #include #include +#include +#include +#include +#include +#include + +#include "utils.h" + +/* Note: + * + * Below ELF section names and bpf_elf_map structure definition + * are not (!) kernel ABI. It's rather a "contract" between the + * application and the BPF loader in tc. For compatibility, the + * section names should stay as-is. Introduction of aliases, if + * needed, are a possibility, though. + */ + +/* ELF section names, etc */ +#define ELF_SECTION_LICENSE "license" +#define ELF_SECTION_MAPS "maps" +#define ELF_SECTION_CLASSIFIER "classifier" +#define ELF_SECTION_ACTION "action" + +#define ELF_MAX_MAPS 64 +#define ELF_MAX_LICENSE_LEN 128 + +/* ELF map definition */ +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; +}; int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, char **bpf_string, bool *need_release, @@ -25,4 +57,28 @@ int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, bool from_file); void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len); +static inline __u64 bpf_ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +#ifdef HAVE_ELF +int bpf_open_object(const char *path, enum bpf_prog_type type); + +static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size) +{ +#ifdef __NR_bpf + return syscall(__NR_bpf, cmd, attr, size); +#else + errno = ENOSYS; + return -1; #endif +} +#else +static inline int bpf_open_object(const char *path, enum bpf_prog_type type) +{ + errno = ENOSYS; + return -1; +} +#endif /* HAVE_ELF */ +#endif /* _TC_BPF_H_ */ From 26dcdf3a91123c6bf748e06d1205d110d95f34db Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 15 Mar 2015 14:48:32 -0500 Subject: [PATCH 049/141] add a source addres length parameter to rt_addr_n2a For some address families (like AF_PACKET) it is helpful to have the length when prenting the address. Signed-off-by: "Eric W. Biederman" --- include/utils.h | 2 +- ip/iplink_bond.c | 1 + ip/ipmroute.c | 2 ++ ip/ipprefix.c | 4 +++- ip/iproute.c | 11 +++++++---- ip/iprule.c | 10 ++++++---- ip/iptunnel.c | 2 +- ip/ipxfrm.c | 17 +++++++++++------ ip/link_ip6tnl.c | 2 ++ ip/xfrm_monitor.c | 8 +++++--- lib/utils.c | 4 ++-- 11 files changed, 41 insertions(+), 22 deletions(-) diff --git a/include/utils.h b/include/utils.h index 59b22804..f9a7e32d 100644 --- a/include/utils.h +++ b/include/utils.h @@ -106,7 +106,7 @@ extern int af_byte_len(int af); extern const char *format_host(int af, int len, const void *addr, char *buf, int buflen); -extern const char *rt_addr_n2a(int af, const void *addr, +extern const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen); void missarg(const char *) __attribute__((noreturn)); diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c index 3009ec91..a573f92b 100644 --- a/ip/iplink_bond.c +++ b/ip/iplink_bond.c @@ -415,6 +415,7 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (iptb[i]) fprintf(f, "%s", rt_addr_n2a(AF_INET, + RTA_PAYLOAD(iptb[i]), RTA_DATA(iptb[i]), buf, INET_ADDRSTRLEN)); diff --git a/ip/ipmroute.c b/ip/ipmroute.c index b4ed9f15..13ac8925 100644 --- a/ip/ipmroute.c +++ b/ip/ipmroute.c @@ -116,6 +116,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[RTA_SRC]) len = snprintf(obuf, sizeof(obuf), "(%s, ", rt_addr_n2a(family, + RTA_PAYLOAD(tb[RTA_SRC]), RTA_DATA(tb[RTA_SRC]), abuf, sizeof(abuf))); else @@ -123,6 +124,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[RTA_DST]) snprintf(obuf + len, sizeof(obuf) - len, "%s)", rt_addr_n2a(family, + RTA_PAYLOAD(tb[RTA_DST]), RTA_DATA(tb[RTA_DST]), abuf, sizeof(abuf))); else diff --git a/ip/ipprefix.c b/ip/ipprefix.c index 02c0efce..26b59615 100644 --- a/ip/ipprefix.c +++ b/ip/ipprefix.c @@ -80,7 +80,9 @@ int print_prefix(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) pfx = (struct in6_addr *)RTA_DATA(tb[PREFIX_ADDRESS]); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "%s", rt_addr_n2a(family, pfx, + fprintf(fp, "%s", rt_addr_n2a(family, + RTA_PAYLOAD(tb[PREFIX_ADDRESS]), + pfx, abuf, sizeof(abuf))); } fprintf(fp, "/%u ", prefix->prefix_len); diff --git a/ip/iproute.c b/ip/iproute.c index 35418af3..c73a3643 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -339,8 +339,9 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[RTA_DST]) { if (r->rtm_dst_len != host_len) { fprintf(fp, "%s/%u ", rt_addr_n2a(r->rtm_family, - RTA_DATA(tb[RTA_DST]), - abuf, sizeof(abuf)), + RTA_PAYLOAD(tb[RTA_DST]), + RTA_DATA(tb[RTA_DST]), + abuf, sizeof(abuf)), r->rtm_dst_len ); } else { @@ -358,8 +359,9 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[RTA_SRC]) { if (r->rtm_src_len != host_len) { fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family, - RTA_DATA(tb[RTA_SRC]), - abuf, sizeof(abuf)), + RTA_PAYLOAD(tb[RTA_SRC]), + RTA_DATA(tb[RTA_SRC]), + abuf, sizeof(abuf)), r->rtm_src_len ); } else { @@ -401,6 +403,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) */ fprintf(fp, " src %s ", rt_addr_n2a(r->rtm_family, + RTA_PAYLOAD(tb[RTA_PREFSRC]), RTA_DATA(tb[RTA_PREFSRC]), abuf, sizeof(abuf))); } diff --git a/ip/iprule.c b/ip/iprule.c index 54ed7536..967969c0 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -82,8 +82,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[FRA_SRC]) { if (r->rtm_src_len != host_len) { fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family, - RTA_DATA(tb[FRA_SRC]), - abuf, sizeof(abuf)), + RTA_PAYLOAD(tb[FRA_SRC]), + RTA_DATA(tb[FRA_SRC]), + abuf, sizeof(abuf)), r->rtm_src_len ); } else { @@ -102,8 +103,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) if (tb[FRA_DST]) { if (r->rtm_dst_len != host_len) { fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family, - RTA_DATA(tb[FRA_DST]), - abuf, sizeof(abuf)), + RTA_PAYLOAD(tb[FRA_DST]), + RTA_DATA(tb[FRA_DST]), + abuf, sizeof(abuf)), r->rtm_dst_len ); } else { diff --git a/ip/iptunnel.c b/ip/iptunnel.c index caf8a28e..29188c45 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -343,7 +343,7 @@ static void print_tunnel(struct ip_tunnel_parm *p) p->name, tnl_strproto(p->iph.protocol), p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any", - p->iph.saddr ? rt_addr_n2a(AF_INET, &p->iph.saddr, s2, sizeof(s2)) : "any"); + p->iph.saddr ? rt_addr_n2a(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any"); if (p->iph.protocol == IPPROTO_IPV6 && (p->i_flags & SIT_ISATAP)) { struct ip_tunnel_prl prl[16]; diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c index 95f91a53..9aaf58d5 100644 --- a/ip/ipxfrm.c +++ b/ip/ipxfrm.c @@ -288,10 +288,10 @@ void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id, fputs(title, fp); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "src %s ", rt_addr_n2a(family, + fprintf(fp, "src %s ", rt_addr_n2a(family, sizeof(*saddr), saddr, abuf, sizeof(abuf))); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "dst %s", rt_addr_n2a(family, + fprintf(fp, "dst %s", rt_addr_n2a(family, sizeof(id->daddr), &id->daddr, abuf, sizeof(abuf))); fprintf(fp, "%s", _SL_); @@ -455,11 +455,15 @@ void xfrm_selector_print(struct xfrm_selector *sel, __u16 family, fputs(prefix, fp); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "src %s/%u ", rt_addr_n2a(f, &sel->saddr, abuf, sizeof(abuf)), + fprintf(fp, "src %s/%u ", + rt_addr_n2a(f, sizeof(sel->saddr), &sel->saddr, + abuf, sizeof(abuf)), sel->prefixlen_s); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "dst %s/%u ", rt_addr_n2a(f, &sel->daddr, abuf, sizeof(abuf)), + fprintf(fp, "dst %s/%u ", + rt_addr_n2a(f, sizeof(sel->daddr), &sel->daddr, + abuf, sizeof(abuf)), sel->prefixlen_d); if (sel->proto) @@ -755,7 +759,8 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family, memset(abuf, '\0', sizeof(abuf)); fprintf(fp, "addr %s", - rt_addr_n2a(family, &e->encap_oa, abuf, sizeof(abuf))); + rt_addr_n2a(family, sizeof(e->encap_oa), &e->encap_oa, + abuf, sizeof(abuf))); fprintf(fp, "%s", _SL_); } @@ -783,7 +788,7 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family, memset(abuf, '\0', sizeof(abuf)); fprintf(fp, "%s", - rt_addr_n2a(family, coa, + rt_addr_n2a(family, sizeof(*coa), coa, abuf, sizeof(abuf))); fprintf(fp, "%s", _SL_); } diff --git a/ip/link_ip6tnl.c b/ip/link_ip6tnl.c index 5ed3d5a2..cf59a933 100644 --- a/ip/link_ip6tnl.c +++ b/ip/link_ip6tnl.c @@ -285,6 +285,7 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb if (tb[IFLA_IPTUN_REMOTE]) { fprintf(f, "remote %s ", rt_addr_n2a(AF_INET6, + RTA_PAYLOAD(tb[IFLA_IPTUN_REMOTE]), RTA_DATA(tb[IFLA_IPTUN_REMOTE]), s1, sizeof(s1))); } @@ -292,6 +293,7 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb if (tb[IFLA_IPTUN_LOCAL]) { fprintf(f, "local %s ", rt_addr_n2a(AF_INET6, + RTA_PAYLOAD(tb[IFLA_IPTUN_LOCAL]), RTA_DATA(tb[IFLA_IPTUN_LOCAL]), s1, sizeof(s1))); } diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c index 50116a7b..b2b2d6e2 100644 --- a/ip/xfrm_monitor.c +++ b/ip/xfrm_monitor.c @@ -227,7 +227,8 @@ static void xfrm_usersa_print(const struct xfrm_usersa_id *sa_id, __u32 reqid, F buf[0] = 0; fprintf(fp, "dst %s ", - rt_addr_n2a(sa_id->family, &sa_id->daddr, buf, sizeof(buf))); + rt_addr_n2a(sa_id->family, sizeof(sa_id->daddr), &sa_id->daddr, + buf, sizeof(buf))); fprintf(fp, " reqid 0x%x", reqid); @@ -246,7 +247,8 @@ static int xfrm_ae_print(const struct sockaddr_nl *who, xfrm_ae_flags_print(id->flags, arg); fprintf(fp,"\n\t"); memset(abuf, '\0', sizeof(abuf)); - fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family, &id->saddr, + fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family, + sizeof(id->saddr), &id->saddr, abuf, sizeof(abuf))); xfrm_usersa_print(&id->sa_id, id->reqid, fp); @@ -262,7 +264,7 @@ static void xfrm_print_addr(FILE *fp, int family, xfrm_address_t *a) char buf[256]; buf[0] = 0; - fprintf(fp, "%s", rt_addr_n2a(family, a, buf, sizeof(buf))); + fprintf(fp, "%s", rt_addr_n2a(family, sizeof(*a), a, buf, sizeof(buf))); } static int xfrm_mapping_print(const struct sockaddr_nl *who, diff --git a/lib/utils.c b/lib/utils.c index 0d08a868..88ef4bac 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -636,7 +636,7 @@ int __get_user_hz(void) return sysconf(_SC_CLK_TCK); } -const char *rt_addr_n2a(int af, const void *addr, char *buf, int buflen) +const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen) { switch (af) { case AF_INET: @@ -723,7 +723,7 @@ const char *format_host(int af, int len, const void *addr, return n; } #endif - return rt_addr_n2a(af, addr, buf, buflen); + return rt_addr_n2a(af, len, addr, buf, buflen); } From 71b4d59b306ce13ef5a3f465d0e0eeb78886e47c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 15 Mar 2015 14:49:10 -0500 Subject: [PATCH 050/141] make the addr argument of ll_addr_n2a const This avoids build warnings when AF_PACKET support is added to rt_addr_n2a. Signed-off-by: "Eric W. Biederman" --- include/rt_names.h | 2 +- lib/ll_addr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rt_names.h b/include/rt_names.h index c0ea4f98..921be060 100644 --- a/include/rt_names.h +++ b/include/rt_names.h @@ -22,7 +22,7 @@ int inet_proto_a2n(const char *buf); const char * ll_type_n2a(int type, char *buf, int len); -const char *ll_addr_n2a(unsigned char *addr, int alen, +const char *ll_addr_n2a(const unsigned char *addr, int alen, int type, char *buf, int blen); int ll_addr_a2n(char *lladdr, int len, const char *arg); diff --git a/lib/ll_addr.c b/lib/ll_addr.c index c12ab075..2ce9abfb 100644 --- a/lib/ll_addr.c +++ b/lib/ll_addr.c @@ -29,7 +29,7 @@ #include "utils.h" -const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen) +const char *ll_addr_n2a(const unsigned char *addr, int alen, int type, char *buf, int blen) { int i; int l; From 0b218ab18dfad3e307f99aed2167a0c74f7ec67a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 15 Mar 2015 14:49:35 -0500 Subject: [PATCH 051/141] add support for printing AF_PACKET addresses Signed-off-by: "Eric W. Biederman" --- lib/utils.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/utils.c b/lib/utils.c index 88ef4bac..df570cb3 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -25,11 +25,12 @@ #include #include #include +#include #include #include #include - +#include "rt_names.h" #include "utils.h" #include "namespace.h" @@ -397,6 +398,18 @@ int get_addr_1(inet_prefix *addr, const char *name, int family) return 0; } + if (family == AF_PACKET) { + int len; + len = ll_addr_a2n((char *)&addr->data, sizeof(addr->data), name); + if (len < 0) + return -1; + + addr->family = AF_PACKET; + addr->bytelen = len; + addr->bitlen = len * 8; + return 0; + } + if (strchr(name, ':')) { addr->family = AF_INET6; if (family != AF_UNSPEC && family != AF_INET6) @@ -497,10 +510,6 @@ done: int get_addr(inet_prefix *dst, const char *arg, int family) { - if (family == AF_PACKET) { - fprintf(stderr, "Error: \"%s\" may be inet address, but it is not allowed in this context.\n", arg); - exit(1); - } if (get_addr_1(dst, arg, family)) { fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", arg); exit(1); @@ -650,6 +659,8 @@ const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen memcpy(dna.a_addr, addr, 2); return dnet_ntop(af, &dna, buf, buflen); } + case AF_PACKET: + return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen); default: return "???"; } From 45c90d1990cbcecf6b24613f44849d6c44f29cdc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 15 Mar 2015 14:50:03 -0500 Subject: [PATCH 052/141] add address family to/from string helper functions. Add the functions family_name and read_family to convert an address family to a string and to convernt a string to an address family. Signed-off-by: "Eric W. Biederman" --- include/utils.h | 3 +++ ip/ip.c | 16 +++------------- lib/utils.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/include/utils.h b/include/utils.h index f9a7e32d..79c1da19 100644 --- a/include/utils.h +++ b/include/utils.h @@ -109,6 +109,9 @@ extern const char *format_host(int af, int len, const void *addr, extern const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen); +extern int read_family(const char *name); +extern const char *family_name(int family); + void missarg(const char *) __attribute__((noreturn)); void invarg(const char *, const char *) __attribute__((noreturn)); void duparg(const char *, const char *) __attribute__((noreturn)); diff --git a/ip/ip.c b/ip/ip.c index da16b15f..85256d8e 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -190,21 +190,11 @@ int main(int argc, char **argv) argv++; if (argc <= 1) usage(); - if (strcmp(argv[1], "inet") == 0) - preferred_family = AF_INET; - else if (strcmp(argv[1], "inet6") == 0) - preferred_family = AF_INET6; - else if (strcmp(argv[1], "dnet") == 0) - preferred_family = AF_DECnet; - else if (strcmp(argv[1], "link") == 0) - preferred_family = AF_PACKET; - else if (strcmp(argv[1], "ipx") == 0) - preferred_family = AF_IPX; - else if (strcmp(argv[1], "bridge") == 0) - preferred_family = AF_BRIDGE; - else if (strcmp(argv[1], "help") == 0) + if (strcmp(argv[1], "help") == 0) usage(); else + preferred_family = read_family(argv[1]); + if (preferred_family == AF_UNSPEC) invarg("invalid protocol family", argv[1]); } else if (strcmp(opt, "-4") == 0) { preferred_family = AF_INET; diff --git a/lib/utils.c b/lib/utils.c index df570cb3..8aee3980 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -666,6 +666,41 @@ const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen } } +int read_family(const char *name) +{ + int family = AF_UNSPEC; + if (strcmp(name, "inet") == 0) + family = AF_INET; + else if (strcmp(name, "inet6") == 0) + family = AF_INET6; + else if (strcmp(name, "dnet") == 0) + family = AF_DECnet; + else if (strcmp(name, "link") == 0) + family = AF_PACKET; + else if (strcmp(name, "ipx") == 0) + family = AF_IPX; + else if (strcmp(name, "bridge") == 0) + family = AF_BRIDGE; + return family; +} + +const char *family_name(int family) +{ + if (family == AF_INET) + return "inet"; + if (family == AF_INET6) + return "inet6"; + if (family == AF_DECnet) + return "dnet"; + if (family == AF_PACKET) + return "link"; + if (family == AF_IPX) + return "ipx"; + if (family == AF_BRIDGE) + return "bridge"; + return "???"; +} + #ifdef RESOLVE_HOSTNAMES struct namerec { From 8e8f8de42f86e6a625d089acfa64f032d4bc8d92 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 15 Mar 2015 14:51:10 -0500 Subject: [PATCH 053/141] misc whitespace cleanup --- ip/iptunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/iptunnel.c b/ip/iptunnel.c index 29188c45..be84b83e 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -342,7 +342,7 @@ static void print_tunnel(struct ip_tunnel_parm *p) printf("%s: %s/ip remote %s local %s ", p->name, tnl_strproto(p->iph.protocol), - p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any", + p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any", p->iph.saddr ? rt_addr_n2a(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any"); if (p->iph.protocol == IPPROTO_IPV6 && (p->i_flags & SIT_ISATAP)) { From 93ae283594094f2808891ea544bf71cee645633e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 15 Mar 2015 14:52:06 -0500 Subject: [PATCH 054/141] add support for the RTA_VIA attribute Add support for the RTA_VIA attribute that specifies an address family as well as an address for the next hop gateway. To make it easy to pass this reorder inet_prefix so that it's tail is a proper RTA_VIA attribute. Signed-off-by: "Eric W. Biederman" --- include/utils.h | 7 ++-- ip/iproute.c | 76 +++++++++++++++++++++++++++++++++++++----- man/man8/ip-route.8.in | 18 ++++++---- 3 files changed, 83 insertions(+), 18 deletions(-) diff --git a/include/utils.h b/include/utils.h index 79c1da19..ff4c417d 100644 --- a/include/utils.h +++ b/include/utils.h @@ -50,10 +50,11 @@ extern void incomplete_command(void) __attribute__((noreturn)); typedef struct { - __u8 family; - __u8 bytelen; + __u16 flags; + __u16 bytelen; __s16 bitlen; - __u32 flags; + /* These next two fields match rtvia */ + __u16 family; __u32 data[8]; } inet_prefix; diff --git a/ip/iproute.c b/ip/iproute.c index c73a3643..214aaac9 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -75,7 +75,8 @@ static void usage(void) fprintf(stderr, " [ table TABLE_ID ] [ proto RTPROTO ]\n"); fprintf(stderr, " [ scope SCOPE ] [ metric METRIC ]\n"); fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n"); - fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n"); + fprintf(stderr, "NH := [ via [ FAMILY ] ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n"); + fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | bridge | link ]"); fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n"); fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n"); fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n"); @@ -185,8 +186,15 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) (r->rtm_family != filter.msrc.family || (filter.msrc.bitlen >= 0 && filter.msrc.bitlen < r->rtm_src_len))) return 0; - if (filter.rvia.family && r->rtm_family != filter.rvia.family) - return 0; + if (filter.rvia.family) { + int family = r->rtm_family; + if (tb[RTA_VIA]) { + struct rtvia *via = RTA_DATA(tb[RTA_VIA]); + family = via->rtvia_family; + } + if (family != filter.rvia.family) + return 0; + } if (filter.rprefsrc.family && r->rtm_family != filter.rprefsrc.family) return 0; @@ -205,6 +213,12 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len) via.family = r->rtm_family; if (tb[RTA_GATEWAY]) memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len/8); + if (tb[RTA_VIA]) { + size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2; + struct rtvia *rtvia = RTA_DATA(tb[RTA_VIA]); + via.family = rtvia->rtvia_family; + memcpy(&via.data, rtvia->rtvia_addr, len); + } } if (filter.rprefsrc.bitlen>0) { memset(&prefsrc, 0, sizeof(prefsrc)); @@ -386,6 +400,14 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) RTA_DATA(tb[RTA_GATEWAY]), abuf, sizeof(abuf))); } + if (tb[RTA_VIA]) { + size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2; + struct rtvia *via = RTA_DATA(tb[RTA_VIA]); + fprintf(fp, "via %s %s ", + family_name(via->rtvia_family), + format_host(via->rtvia_family, len, via->rtvia_addr, + abuf, sizeof(abuf))); + } if (tb[RTA_OIF] && filter.oifmask != -1) fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF]))); @@ -603,6 +625,14 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) RTA_DATA(tb[RTA_GATEWAY]), abuf, sizeof(abuf))); } + if (tb[RTA_VIA]) { + size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2; + struct rtvia *via = RTA_DATA(tb[RTA_VIA]); + fprintf(fp, "via %s %s ", + family_name(via->rtvia_family), + format_host(via->rtvia_family, len, via->rtvia_addr, + abuf, sizeof(abuf))); + } if (tb[RTA_FLOW]) { __u32 to = rta_getattr_u32(tb[RTA_FLOW]); __u32 from = to>>16; @@ -650,12 +680,23 @@ static int parse_one_nh(struct rtmsg *r, struct rtattr *rta, while (++argv, --argc > 0) { if (strcmp(*argv, "via") == 0) { inet_prefix addr; + int family; NEXT_ARG(); - get_addr(&addr, *argv, r->rtm_family); + family = read_family(*argv); + if (family == AF_UNSPEC) + family = r->rtm_family; + else + NEXT_ARG(); + get_addr(&addr, *argv, family); if (r->rtm_family == AF_UNSPEC) r->rtm_family = addr.family; - rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen); - rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen; + if (addr.family == r->rtm_family) { + rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen); + rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen; + } else { + rta_addattr_l(rta, 4096, RTA_VIA, &addr.family, addr.bytelen+2); + rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen+2; + } } else if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); if ((rtnh->rtnh_ifindex = ll_name_to_index(*argv)) == 0) { @@ -763,12 +804,21 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &addr.data, addr.bytelen); } else if (strcmp(*argv, "via") == 0) { inet_prefix addr; + int family; gw_ok = 1; NEXT_ARG(); - get_addr(&addr, *argv, req.r.rtm_family); + family = read_family(*argv); + if (family == AF_UNSPEC) + family = req.r.rtm_family; + else + NEXT_ARG(); + get_addr(&addr, *argv, family); if (req.r.rtm_family == AF_UNSPEC) req.r.rtm_family = addr.family; - addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen); + if (addr.family == req.r.rtm_family) + addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen); + else + addattr_l(&req.n, sizeof(req), RTA_VIA, &addr.family, addr.bytelen+2); } else if (strcmp(*argv, "from") == 0) { inet_prefix addr; NEXT_ARG(); @@ -1253,8 +1303,14 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action) get_unsigned(&mark, *argv, 0); filter.markmask = -1; } else if (strcmp(*argv, "via") == 0) { + int family; NEXT_ARG(); - get_prefix(&filter.rvia, *argv, do_ipv6); + family = read_family(*argv); + if (family == AF_UNSPEC) + family = do_ipv6; + else + NEXT_ARG(); + get_prefix(&filter.rvia, *argv, family); } else if (strcmp(*argv, "src") == 0) { NEXT_ARG(); get_prefix(&filter.rprefsrc, *argv, do_ipv6); @@ -1556,6 +1612,8 @@ static int iproute_get(int argc, char **argv) tb[RTA_OIF]->rta_type = 0; if (tb[RTA_GATEWAY]) tb[RTA_GATEWAY]->rta_type = 0; + if (tb[RTA_VIA]) + tb[RTA_VIA]->rta_type = 0; if (!idev && tb[RTA_IIF]) tb[RTA_IIF]->rta_type = 0; req.n.nlmsg_flags = NLM_F_REQUEST; diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index 2b1583d5..906cfea0 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -81,12 +81,17 @@ replace " } " .ti -8 .IR NH " := [ " .B via -.IR ADDRESS " ] [ " +[ +.IR FAMILY " ] " ADDRESS " ] [ " .B dev .IR STRING " ] [ " .B weight .IR NUMBER " ] " NHFLAGS +.ti -8 +.IR FAMILY " := [ " +.BR inet " | " inet6 " | " ipx " | " dnet " | " bridge " | " link " ]" + .ti -8 .IR OPTIONS " := " FLAGS " [ " .B mtu @@ -333,9 +338,10 @@ table by default. the output device name. .TP -.BI via " ADDRESS" -the address of the nexthop router. Actually, the sense of this field -depends on the route type. For normal +.BI via " [ FAMILY ] ADDRESS" +the address of the nexthop router, in the address family FAMILY. +Actually, the sense of this field depends on the route type. For +normal .B unicast routes it is either the true next hop router or, if it is a direct route installed in BSD compatibility mode, it can be a local address @@ -472,7 +478,7 @@ is a complex value with its own syntax similar to the top level argument lists: .in +8 -.BI via " ADDRESS" +.BI via " [ FAMILY ] ADDRESS" - is the nexthop router. .sp @@ -669,7 +675,7 @@ only list routes of this type. only list routes going via this device. .TP -.BI via " PREFIX" +.BI via " [ FAMILY ] PREFIX" only list routes going via the nexthop routers selected by .IR PREFIX "." From 6f7a9f4dc55974e73522f23c88bc8ec85b0871f5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 15 Mar 2015 14:53:11 -0500 Subject: [PATCH 055/141] add support for the RTA_NEWDST attribute. This attribute is like RTA_DST except it specifies the destination address to place on a packet when it leaves the host. For ip based protocols this is destination NAT and not a common part of forwarding. For protocols like MPLS label swapping is something that typically happens on every hop. There is likely to be a RTA_NEWSRC at some point so RTA_NEWDST is printed as "as to" and can be specified either as "as to" or just "as" Signed-off-by: "Eric W. Biederman" --- ip/iproute.c | 19 ++++++++++++++++++- man/man8/ip-route.8.in | 5 +++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/ip/iproute.c b/ip/iproute.c index 214aaac9..500e6b4a 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -77,7 +77,7 @@ static void usage(void) fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n"); fprintf(stderr, "NH := [ via [ FAMILY ] ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n"); fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | bridge | link ]"); - fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n"); + fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n"); fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n"); fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n"); fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n"); @@ -388,6 +388,13 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) } else if (r->rtm_src_len) { fprintf(fp, "from 0/%u ", r->rtm_src_len); } + if (tb[RTA_NEWDST]) { + fprintf(fp, "as to %s ", format_host(r->rtm_family, + RTA_PAYLOAD(tb[RTA_NEWDST]), + RTA_DATA(tb[RTA_NEWDST]), + abuf, sizeof(abuf)) + ); + } if (r->rtm_tos && filter.tosmask != -1) { SPRINT_BUF(b1); fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1))); @@ -802,6 +809,16 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) if (req.r.rtm_family == AF_UNSPEC) req.r.rtm_family = addr.family; addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &addr.data, addr.bytelen); + } else if (strcmp(*argv, "as") == 0) { + inet_prefix addr; + NEXT_ARG(); + if (strcmp(*argv, "to") == 0) { + NEXT_ARG(); + } + get_addr(&addr, *argv, req.r.rtm_family); + if (req.r.rtm_family == AF_UNSPEC) + req.r.rtm_family = addr.family; + addattr_l(&req.n, sizeof(req), RTA_NEWDST, &addr.data, addr.bytelen); } else if (strcmp(*argv, "via") == 0) { inet_prefix addr; int family; diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index 906cfea0..51123449 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -98,6 +98,11 @@ replace " } " .IR NUMBER " ] [ " .B advmss .IR NUMBER " ] [ " +.B as +[ +.B to +] +.IR ADDRESS " ]" .B rtt .IR TIME " ] [ " .B rttvar From dacc5d4197c1f8ac12938a594f7e4131cb937cb2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 15 Mar 2015 14:53:45 -0500 Subject: [PATCH 056/141] add basic mpls support to iproute - Pull in the uapi mpls.h - Update rtnetlink.h to include the mpls rtnetlink notification multicast group. - Define AF_MPLS in utils.h if it is not defined from elsewhere as is done with AF_DECnet The address syntax for multiple mpls labels is a complete invention. When I looked there seemed to be no wide spread convention for talking about an mpls label stack in text for. Sometimes people did: "{ Label1, Label2, Label3 }", sometimes people would do: "[ label3, label2, label1 ]", and most of the time label stacks were not explicitly shown at all. The syntax I wound up using, so it would not have spaces and so it would visually distinct from other kinds of addresses is. label1/label2/label3 Where label1 is the label at the top of the label stack and label3 is the label at the bottom on the label stack. When there is a single label this matches what seems to be convention with other tools. Just print out the numeric value of the mpls label. The netlink protocol for labels uses the on the wire format for a label stack. The ttl and traffic class are expected to be 0. Using the on the wire format is common and what happens with other address types. BGP when passing label stacks also uses this technique with the exception that the ttl byte is not included making each label in a BGP label stack 3 bytes instead of 4. Signed-off-by: "Eric W. Biederman" --- Makefile | 3 +++ include/linux/mpls.h | 34 +++++++++++++++++++++++++ include/utils.h | 10 ++++++++ ip/ip.c | 4 ++- ip/ipmonitor.c | 3 +++ ip/iproute.c | 2 +- lib/mpls_ntop.c | 48 ++++++++++++++++++++++++++++++++++ lib/mpls_pton.c | 58 ++++++++++++++++++++++++++++++++++++++++++ lib/utils.c | 30 ++++++++++++++++++++-- man/man8/ip-route.8.in | 2 +- man/man8/ip.8 | 7 ++++- 11 files changed, 195 insertions(+), 6 deletions(-) create mode 100644 include/linux/mpls.h create mode 100644 lib/mpls_ntop.c create mode 100644 lib/mpls_pton.c diff --git a/Makefile b/Makefile index 9dbb29f3..ca6c2e14 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,9 @@ ADDLIB+=dnet_ntop.o dnet_pton.o #options for ipx ADDLIB+=ipx_ntop.o ipx_pton.o +#options for mpls +ADDLIB+=mpls_ntop.o mpls_pton.o + CC = gcc HOSTCC = gcc DEFINES += -D_GNU_SOURCE diff --git a/include/linux/mpls.h b/include/linux/mpls.h new file mode 100644 index 00000000..08939023 --- /dev/null +++ b/include/linux/mpls.h @@ -0,0 +1,34 @@ +#ifndef _MPLS_H +#define _MPLS_H + +#include +#include + +/* Reference: RFC 5462, RFC 3032 + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Label | TC |S| TTL | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Label: Label Value, 20 bits + * TC: Traffic Class field, 3 bits + * S: Bottom of Stack, 1 bit + * TTL: Time to Live, 8 bits + */ + +struct mpls_label { + __be32 entry; +}; + +#define MPLS_LS_LABEL_MASK 0xFFFFF000 +#define MPLS_LS_LABEL_SHIFT 12 +#define MPLS_LS_TC_MASK 0x00000E00 +#define MPLS_LS_TC_SHIFT 9 +#define MPLS_LS_S_MASK 0x00000100 +#define MPLS_LS_S_SHIFT 8 +#define MPLS_LS_TTL_MASK 0x000000FF +#define MPLS_LS_TTL_SHIFT 0 + +#endif /* _MPLS_H */ diff --git a/include/utils.h b/include/utils.h index ff4c417d..c21b59c2 100644 --- a/include/utils.h +++ b/include/utils.h @@ -78,6 +78,13 @@ struct ipx_addr { u_int8_t ipx_node[IPX_NODE_LEN]; }; +#ifndef AF_MPLS +# define AF_MPLS 28 +#endif + +/* Maximum number of labels the mpls helpers support */ +#define MPLS_MAX_LABELS 8 + extern __u32 get_addr32(const char *name); extern int get_addr_1(inet_prefix *dst, const char *arg, int family); extern int get_prefix_1(inet_prefix *dst, char *arg, int family); @@ -126,6 +133,9 @@ int dnet_pton(int af, const char *src, void *addr); const char *ipx_ntop(int af, const void *addr, char *str, size_t len); int ipx_pton(int af, const char *src, void *addr); +const char *mpls_ntop(int af, const void *addr, char *str, size_t len); +int mpls_pton(int af, const char *src, void *addr); + extern int __iproute2_hz_internal; extern int __get_hz(void); diff --git a/ip/ip.c b/ip/ip.c index 85256d8e..f7f214b2 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -52,7 +52,7 @@ static void usage(void) " netns | l2tp | fou | tcp_metrics | token | netconf }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n" " -h[uman-readable] | -iec |\n" -" -f[amily] { inet | inet6 | ipx | dnet | bridge | link } |\n" +" -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n" " -4 | -6 | -I | -D | -B | -0 |\n" " -l[oops] { maximum-addr-flush-attempts } |\n" " -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n" @@ -206,6 +206,8 @@ int main(int argc, char **argv) preferred_family = AF_IPX; } else if (strcmp(opt, "-D") == 0) { preferred_family = AF_DECnet; + } else if (strcmp(opt, "-M") == 0) { + preferred_family = AF_MPLS; } else if (strcmp(opt, "-B") == 0) { preferred_family = AF_BRIDGE; } else if (matches(opt, "-human") == 0 || diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c index 6b5e6653..7833a263 100644 --- a/ip/ipmonitor.c +++ b/ip/ipmonitor.c @@ -158,6 +158,7 @@ int do_ipmonitor(int argc, char **argv) groups |= nl_mgrp(RTNLGRP_IPV6_IFADDR); groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE); groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE); + groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE); groups |= nl_mgrp(RTNLGRP_IPV4_MROUTE); groups |= nl_mgrp(RTNLGRP_IPV6_MROUTE); groups |= nl_mgrp(RTNLGRP_IPV6_PREFIX); @@ -235,6 +236,8 @@ int do_ipmonitor(int argc, char **argv) groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE); if (!preferred_family || preferred_family == AF_INET6) groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE); + if (!preferred_family || preferred_family == AF_MPLS) + groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE); } if (lmroute) { if (!preferred_family || preferred_family == AF_INET) diff --git a/ip/iproute.c b/ip/iproute.c index 500e6b4a..e086b1f5 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -76,7 +76,7 @@ static void usage(void) fprintf(stderr, " [ scope SCOPE ] [ metric METRIC ]\n"); fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n"); fprintf(stderr, "NH := [ via [ FAMILY ] ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n"); - fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | bridge | link ]"); + fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | mpls | bridge | link ]"); fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n"); fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n"); fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n"); diff --git a/lib/mpls_ntop.c b/lib/mpls_ntop.c new file mode 100644 index 00000000..945d6d5e --- /dev/null +++ b/lib/mpls_ntop.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include + +#include "utils.h" + +static const char *mpls_ntop1(const struct mpls_label *addr, char *buf, size_t buflen) +{ + size_t destlen = buflen; + char *dest = buf; + int count; + + for (count = 0; count < MPLS_MAX_LABELS; count++) { + uint32_t entry = ntohl(addr[count].entry); + uint32_t label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + int len = snprintf(dest, destlen, "%u", label); + + /* Is this the end? */ + if (entry & MPLS_LS_S_MASK) + return buf; + + + dest += len; + destlen -= len; + if (destlen) { + *dest = '/'; + dest++; + destlen--; + } + } + errno = -E2BIG; + return NULL; +} + +const char *mpls_ntop(int af, const void *addr, char *buf, size_t buflen) +{ + switch(af) { + case AF_MPLS: + errno = 0; + return mpls_ntop1((struct mpls_label *)addr, buf, buflen); + default: + errno = EAFNOSUPPORT; + } + + return NULL; +} diff --git a/lib/mpls_pton.c b/lib/mpls_pton.c new file mode 100644 index 00000000..bd448cfc --- /dev/null +++ b/lib/mpls_pton.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +#include "utils.h" + + +static int mpls_pton1(const char *name, struct mpls_label *addr) +{ + char *endp; + unsigned count; + + for (count = 0; count < MPLS_MAX_LABELS; count++) { + unsigned long label; + + label = strtoul(name, &endp, 0); + /* Fail when the label value is out or range */ + if (label >= (1 << 20)) + return 0; + + if (endp == name) /* no digits */ + return 0; + + addr->entry = htonl(label << MPLS_LS_LABEL_SHIFT); + if (*endp == '\0') { + addr->entry |= htonl(1 << MPLS_LS_S_SHIFT); + return 1; + } + + /* Bad character in the address */ + if (*endp != '/') + return 0; + + name = endp + 1; + addr += 1; + } + /* The address was too long */ + return 0; +} + +int mpls_pton(int af, const char *src, void *addr) +{ + int err; + + switch(af) { + case AF_MPLS: + errno = 0; + err = mpls_pton1(src, (struct mpls_label *)addr); + break; + default: + errno = EAFNOSUPPORT; + err = -1; + } + + return err; +} diff --git a/lib/utils.c b/lib/utils.c index 8aee3980..428ad8f9 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -390,7 +391,7 @@ int get_addr_1(inet_prefix *addr, const char *name, int family) if (strcmp(name, "default") == 0 || strcmp(name, "all") == 0 || strcmp(name, "any") == 0) { - if (family == AF_DECnet) + if ((family == AF_DECnet) || (family == AF_MPLS)) return -1; addr->family = family; addr->bytelen = (family == AF_INET6 ? 16 : 4); @@ -432,6 +433,23 @@ int get_addr_1(inet_prefix *addr, const char *name, int family) return 0; } + if (family == AF_MPLS) { + int i; + addr->family = AF_MPLS; + if (mpls_pton(AF_MPLS, name, addr->data) <= 0) + return -1; + addr->bytelen = 4; + addr->bitlen = 20; + /* How many bytes do I need? */ + for (i = 0; i < 8; i++) { + if (ntohl(addr->data[i]) & MPLS_LS_S_MASK) { + addr->bytelen = (i + 1)*4; + break; + } + } + return 0; + } + addr->family = AF_INET; if (family != AF_UNSPEC && family != AF_INET) return -1; @@ -455,6 +473,8 @@ int af_bit_len(int af) return 16; case AF_IPX: return 80; + case AF_MPLS: + return 20; } return 0; @@ -476,7 +496,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) if (strcmp(arg, "default") == 0 || strcmp(arg, "any") == 0 || strcmp(arg, "all") == 0) { - if (family == AF_DECnet) + if ((family == AF_DECnet) || (family = AF_MPLS)) return -1; dst->family = family; dst->bytelen = 0; @@ -651,6 +671,8 @@ const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen case AF_INET: case AF_INET6: return inet_ntop(af, addr, buf, buflen); + case AF_MPLS: + return mpls_ntop(af, addr, buf, buflen); case AF_IPX: return ipx_ntop(af, addr, buf, buflen); case AF_DECnet: @@ -679,6 +701,8 @@ int read_family(const char *name) family = AF_PACKET; else if (strcmp(name, "ipx") == 0) family = AF_IPX; + else if (strcmp(name, "mpls") == 0) + family = AF_MPLS; else if (strcmp(name, "bridge") == 0) family = AF_BRIDGE; return family; @@ -696,6 +720,8 @@ const char *family_name(int family) return "link"; if (family == AF_IPX) return "ipx"; + if (family == AF_MPLS) + return "mpls"; if (family == AF_BRIDGE) return "bridge"; return "???"; diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index 51123449..1163536d 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -90,7 +90,7 @@ replace " } " .ti -8 .IR FAMILY " := [ " -.BR inet " | " inet6 " | " ipx " | " dnet " | " bridge " | " link " ]" +.BR inet " | " inet6 " | " ipx " | " dnet " | " mpls " | " bridge " | " link " ]" .ti -8 .IR OPTIONS " := " FLAGS " [ " diff --git a/man/man8/ip.8 b/man/man8/ip.8 index 016e8c66..1755473e 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -73,7 +73,7 @@ Zero (0) means loop until all addresses are removed. .TP .BR "\-f" , " \-family " Specifies the protocol family to use. The protocol family identifier can be one of -.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet" +.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet" , " mpls" or .BR link . If this option is not present, @@ -114,6 +114,11 @@ shortcut for shortcut for .BR "\-family ipx" . +.TP +.B \-M +shortcut for +.BR "\-family mpls" . + .TP .B \-0 shortcut for From 194e9b855d05310cb3c400b1ca7fce3deca7c96a Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 16 Mar 2015 16:01:47 +0100 Subject: [PATCH 057/141] ip: support RFC4191 router preference This allows querying and setting the route preference. It's usually set from the IPv6 Neighbor Discovery Router Advertisement messages. Introduced in "ipv6: expose RFC4191 route preference via rtnetlink", enqueued for Linux 4.1. Signed-off-by: Lubomir Rintel --- doc/ip-cref.tex | 11 +++++++++++ ip/iproute.c | 35 ++++++++++++++++++++++++++++++++++- man/man8/ip-route.8.in | 28 ++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex index e7a79a5d..ea147950 100644 --- a/doc/ip-cref.tex +++ b/doc/ip-cref.tex @@ -1432,6 +1432,17 @@ database. even if it does not match any interface prefix. One application of this option may be found in~\cite{IP-TUNNELS}. +\item \verb|pref PREF| + +--- the IPv6 route preference. +\verb|PREF| PREF is a string specifying the route preference as defined in +RFC4191 for Router Discovery messages. Namely: +\begin{itemize} +\item \verb|low| --- the route has a lowest priority. +\item \verb|medium| --- the route has a default priority. +\item \verb|high| --- the route has a highest priority. +\end{itemize} + \end{itemize} diff --git a/ip/iproute.c b/ip/iproute.c index e086b1f5..132a83a7 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "rt_names.h" @@ -83,12 +84,14 @@ static void usage(void) fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n"); fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n"); fprintf(stderr, " [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n"); + fprintf(stderr, " [ pref PREF ]\n"); fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n"); fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n"); fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n"); fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n"); fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n"); fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n"); + fprintf(stderr, "PREF := [ low | medium | high ]\n"); fprintf(stderr, "TIME := NUMBER[s|ms]\n"); fprintf(stderr, "BOOL := [1|0]\n"); fprintf(stderr, "FEATURES := ecn\n"); @@ -671,6 +674,24 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) nh = RTNH_NEXT(nh); } } + if (tb[RTA_PREF]) { + unsigned int pref = rta_getattr_u8(tb[RTA_PREF]); + fprintf(fp, " pref "); + + switch (pref) { + case ICMPV6_ROUTER_PREF_LOW: + fprintf(fp, "low"); + break; + case ICMPV6_ROUTER_PREF_MEDIUM: + fprintf(fp, "medium"); + break; + case ICMPV6_ROUTER_PREF_HIGH: + fprintf(fp, "high"); + break; + default: + fprintf(fp, "%u", pref); + } + } fprintf(fp, "\n"); fflush(fp); return 0; @@ -854,7 +875,7 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) req.r.rtm_tos = tos; } else if (matches(*argv, "metric") == 0 || matches(*argv, "priority") == 0 || - matches(*argv, "preference") == 0) { + strcmp(*argv, "preference") == 0) { __u32 metric; NEXT_ARG(); if (get_u32(&metric, *argv, 0)) @@ -1051,6 +1072,18 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv) strcmp(*argv, "oif") == 0) { NEXT_ARG(); d = *argv; + } else if (matches(*argv, "pref") == 0) { + __u8 pref; + NEXT_ARG(); + if (strcmp(*argv, "low") == 0) + pref = ICMPV6_ROUTER_PREF_LOW; + else if (strcmp(*argv, "medium") == 0) + pref = ICMPV6_ROUTER_PREF_MEDIUM; + else if (strcmp(*argv, "high") == 0) + pref = ICMPV6_ROUTER_PREF_HIGH; + else if (get_u8(&pref, *argv, 0)) + invarg("\"pref\" value is invalid\n", *argv); + addattr8(&req.n, sizeof(req), RTA_PREF, pref); } else { int type; inet_prefix dst; diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index 1163536d..7fee69f1 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -129,6 +129,8 @@ replace " } " .IR BOOL " ] [ " .B congctl .IR NAME " ]" +.B pref +.IR PREF " ]" .ti -8 .IR TYPE " := [ " @@ -158,6 +160,10 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .IR FEATURES " := [ " .BR ecn " | ]" +.ti -8 +.IR PREF " := [ " +.BR low " | " medium " | " high " ]" + .SH DESCRIPTION .B ip route @@ -562,6 +568,28 @@ to assign (or not to assign) protocol tags. .B onlink pretend that the nexthop is directly attached to this link, even if it does not match any interface prefix. + +.TP +.BI pref " PREF" +the IPv6 route preference. +.I PREF +is a string specifying the route preference as defined in RFC4191 for Router +Discovery messages. Namely: + +.in +8 +.B low +- the route has a lowest priority +.sp + +.B medium +- the route has a default priority +.sp + +.B high +- the route has a highest priority +.sp + +.in -8 .RE .TP From 8b90a9907ef361a25b55d31c2a8c372f4d632e99 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Wed, 25 Mar 2015 05:14:37 +0200 Subject: [PATCH 058/141] tc class: Ignore if default class name file does not exist If '-nm' specified that do not fail if there is no default class names file in /etc/iproute2. Changed default class name file cls_names -> tc_cls. Signed-off-by: Vadim Kochan --- include/names.h | 3 ++- lib/names.c | 61 +++++++++++++++++++++++++++++++++++-------------- tc/tc_util.c | 19 +++++++++++---- 3 files changed, 61 insertions(+), 22 deletions(-) diff --git a/include/names.h b/include/names.h index 4123d0b0..6fed5818 100644 --- a/include/names.h +++ b/include/names.h @@ -16,7 +16,8 @@ struct db_names { int max; }; -struct db_names *db_names_alloc(const char *path); +struct db_names *db_names_alloc(void); +int db_names_load(struct db_names *db, const char *path); void db_names_free(struct db_names *db); char *id_to_name(struct db_names *db, int id, char *name); diff --git a/lib/names.c b/lib/names.c index 93933f74..3b5b0b1e 100644 --- a/lib/names.c +++ b/lib/names.c @@ -11,8 +11,10 @@ #include #include #include +#include #include "names.h" +#include "utils.h" #define MAX_ENTRIES 256 #define NAME_MAX_LEN 512 @@ -48,48 +50,65 @@ static int read_id_name(FILE *fp, int *id, char *name) return 0; } -struct db_names *db_names_alloc(const char *path) +struct db_names *db_names_alloc(void) { struct db_names *db; - struct db_entry *entry; - FILE *fp; - int id; - char namebuf[NAME_MAX_LEN] = {0}; - int ret; - - fp = fopen(path, "r"); - if (!fp) { - fprintf(stderr, "Can't open file: %s\n", path); - return NULL; - } db = malloc(sizeof(*db)); + if (!db) + return NULL; + memset(db, 0, sizeof(*db)); db->size = MAX_ENTRIES; db->hash = malloc(sizeof(struct db_entry *) * db->size); memset(db->hash, 0, sizeof(struct db_entry *) * db->size); + return db; +} + +int db_names_load(struct db_names *db, const char *path) +{ + struct db_entry *entry; + FILE *fp; + int id; + char namebuf[NAME_MAX_LEN] = {0}; + int ret = -1; + + fp = fopen(path, "r"); + if (!fp) + return -ENOENT; + while ((ret = read_id_name(fp, &id, &namebuf[0]))) { if (ret == -1) { fprintf(stderr, "Database %s is corrupted at %s\n", path, namebuf); - fclose(fp); - return NULL; + goto Exit; } + ret = -1; if (id < 0) continue; entry = malloc(sizeof(*entry)); - entry->id = id; + if (!entry) + goto Exit; + entry->name = strdup(namebuf); + if (!entry->name) { + free(entry); + goto Exit; + } + + entry->id = id; entry->next = db->hash[id & (db->size - 1)]; db->hash[id & (db->size - 1)] = entry; } + ret = 0; +Exit: fclose(fp); - return db; + return ret; } void db_names_free(struct db_names *db) @@ -117,8 +136,12 @@ void db_names_free(struct db_names *db) char *id_to_name(struct db_names *db, int id, char *name) { - struct db_entry *entry = db->hash[id & (db->size - 1)]; + struct db_entry *entry; + if (!db) + return NULL; + + entry = db->hash[id & (db->size - 1)]; while (entry && entry->id != id) entry = entry->next; @@ -136,6 +159,9 @@ int name_to_id(struct db_names *db, int *id, const char *name) struct db_entry *entry; int i; + if (!db) + return -1; + if (db->cached && strcmp(db->cached->name, name) == 0) { *id = db->cached->id; return 0; @@ -145,6 +171,7 @@ int name_to_id(struct db_names *db, int *id, const char *name) entry = db->hash[i]; while (entry && strcmp(entry->name, name)) entry = entry->next; + if (entry) { db->cached = entry; *id = entry->id; diff --git a/tc/tc_util.c b/tc/tc_util.c index feae4394..1d3153df 100644 --- a/tc/tc_util.c +++ b/tc/tc_util.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "utils.h" #include "names.h" @@ -33,14 +34,24 @@ static struct db_names *cls_names = NULL; -#define NAMES_DB "/etc/iproute2/cls_names" +#define NAMES_DB "/etc/iproute2/tc_cls" int cls_names_init(char *path) { - cls_names = db_names_alloc(path ?: NAMES_DB); - if (!cls_names) { - fprintf(stderr, "Error while opening class names file\n"); + int ret; + + cls_names = db_names_alloc(); + if (!cls_names) return -1; + + ret = db_names_load(cls_names, path ?: NAMES_DB); + if (ret == -ENOENT && path) { + fprintf(stderr, "Can't open class names file: %s\n", path); + return -1; + } + if (ret) { + db_names_free(cls_names); + cls_names = NULL; } return 0; From bbf2f7c66d84baa1d6c889b449b1cd49f08de725 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Thu, 2 Apr 2015 18:08:03 +0300 Subject: [PATCH 059/141] man ip-netns: Fix shifted layout at bottom of 'ip netns del' Signed-off-by: Vadim Kochan --- man/man8/ip-netns.8 | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8 index d34cdfe3..5985be06 100644 --- a/man/man8/ip-netns.8 +++ b/man/man8/ip-netns.8 @@ -116,12 +116,18 @@ $ ip netns exec net0 SOME_PROCESS_IN_BACKGROUND $ ip netns del net0 .RE +.RS and eth0 will appear in the default netns only after SOME_PROCESS_IN_BACKGROUND will exit or will be killed. To prevent this the processes running in net0 should be killed before deleting the netns: - $ ip netns pids net0 | xargs kill - $ ip netns del net0 +.RE +.RS 10 +$ ip netns pids net0 | xargs kill +.RE +.RS 10 +$ ip netns del net0 +.RE .TP .B ip netns set NAME NETNSID - assign an id to a peer network namespace From 21107f52b0d611ada2713890e0effdf0cd1d994b Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sat, 4 Apr 2015 17:06:19 +0300 Subject: [PATCH 060/141] ip-link: Align usage at [link-netns ID] line Output of the usage was shifted be cause of missing TAB Signed-off-by: Vadim Kochan --- ip/iplink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/iplink.c b/ip/iplink.c index 5893ee40..e6f30e99 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -72,7 +72,7 @@ void iplink_usage(void) fprintf(stderr, " [ mtu MTU ]\n"); fprintf(stderr, " [ netns PID ]\n"); fprintf(stderr, " [ netns NAME ]\n"); - fprintf(stderr, " [ link-netnsid ID ]\n"); + fprintf(stderr, " [ link-netnsid ID ]\n"); fprintf(stderr, " [ alias NAME ]\n"); fprintf(stderr, " [ vf NUM [ mac LLADDR ]\n"); fprintf(stderr, " [ vlan VLANID [ qos VLAN-QOS ] ]\n"); From b6d6b5a1cdd91c878ab2541f38402a44ffb75ee7 Mon Sep 17 00:00:00 2001 From: Vadim Kochan Date: Sat, 4 Apr 2015 19:00:55 +0300 Subject: [PATCH 061/141] man ip-link: Add missing link types - vti,ipvlan,nlmon Signed-off-by: Vadim Kochan --- man/man8/ip-link.8.in | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 0e3bb5e6..642cdb23 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -72,7 +72,10 @@ ip-link \- network device configuration .BR gre " |" .BR gretap " |" .BR ip6gre " |" -.BR ip6gretap " ]" +.BR ip6gretap " |" +.BR vti " |" +.BR nlmon " |" +.BR ipvlan " ]" .ti -8 .BI "ip link delete " DEVICE @@ -228,6 +231,15 @@ Link types: .sp .BR ip6gretap - Virtual L2 tunnel interface GRE over IPv6 +.sp +.BR vti +- Virtual tunnel interface +.sp +.BR nlmon +- Netlink monitoring device +.sp +.BR ipvlan +- Interface for L3 (IPv6/IPv4) based VLANs .in -8 .TP From a89d5329d414c601c220505c5173afecb4438dba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20=C5=A0imerda?= Date: Tue, 7 Apr 2015 08:41:36 -0700 Subject: [PATCH 062/141] docs: make spacing consistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Result of the following command: sed -ri 's/\. /. /g' man/*/* Signed-Off-By: Pavel Å imerda --- man/man8/arpd.8 | 2 +- man/man8/bridge.8 | 60 +++++++++++++++--------------- man/man8/ip-address.8.in | 14 +++---- man/man8/ip-addrlabel.8 | 2 +- man/man8/ip-link.8.in | 12 +++--- man/man8/ip-maddress.8 | 2 +- man/man8/ip-monitor.8 | 4 +- man/man8/ip-mroute.8 | 2 +- man/man8/ip-neighbour.8 | 6 +-- man/man8/ip-netns.8 | 10 ++--- man/man8/ip-route.8.in | 78 +++++++++++++++++++-------------------- man/man8/ip-rule.8 | 26 ++++++------- man/man8/ip-tunnel.8 | 8 ++-- man/man8/ip.8 | 10 ++--- man/man8/rtmon.8 | 2 +- man/man8/tc-cbq-details.8 | 4 +- man/man8/tc-cbq.8 | 4 +- man/man8/tc-choke.8 | 6 +-- man/man8/tc-codel.8 | 4 +- man/man8/tc-drr.8 | 10 ++--- man/man8/tc-fq_codel.8 | 6 +-- man/man8/tc-hfsc.8 | 2 +- man/man8/tc-netem.8 | 2 +- man/man8/tc-pie.8 | 2 +- man/man8/tc-red.8 | 4 +- man/man8/tc-sfb.8 | 26 ++++++------- man/man8/tc-sfq.8 | 2 +- man/man8/tc-tbf.8 | 2 +- 28 files changed, 156 insertions(+), 156 deletions(-) diff --git a/man/man8/arpd.8 b/man/man8/arpd.8 index fc99b97e..5050a98b 100644 --- a/man/man8/arpd.8 +++ b/man/man8/arpd.8 @@ -35,7 +35,7 @@ Suppress sending broadcast queries by the kernel. This option only makes sense t Specifies the timeout of the negative cache. When resolution fails, arpd suppresses further attempts to resolve for this period. This option only makes sense together with option '-k'. This timeout should not be too much longer than the boot time of a typical host not supporting gratuitous ARP. Default value is 60 seconds. .TP -p