From 7dec1bf88bc34e2d0b320f0c23bd1a060c73852b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 8 May 2008 09:11:04 -0700 Subject: [PATCH 01/18] Fix bad hash calculation because of signed address The addr[] was being used signed, but this causes hash calcultion to overflow. Originally reported as Debian bug 480173. --- lib/utils.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/utils.c b/lib/utils.c index d99deacd..fa4fe3e6 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -518,13 +518,14 @@ const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen struct namerec { struct namerec *next; + const char *name; inet_prefix addr; - char *name; }; -static struct namerec *nht[256]; +#define NHASH 257 +static struct namerec *nht[NHASH]; -char *resolve_address(const char *addr, int len, int af) +static const char *resolve_address(const void *addr, int len, int af) { struct namerec *n; struct hostent *h_ent; @@ -539,7 +540,7 @@ char *resolve_address(const char *addr, int len, int af) len = 4; } - hash = addr[len-1] ^ addr[len-2] ^ addr[len-3] ^ addr[len-4]; + hash = *(__u32 *)(addr + len - 4) % NHASH; for (n = nht[hash]; n; n = n->next) { if (n->addr.family == af && @@ -573,7 +574,8 @@ const char *format_host(int af, int len, const void *addr, { #ifdef RESOLVE_HOSTNAMES if (resolve_hosts) { - char *n; + const char *n; + if (len <= 0) { switch (af) { case AF_INET: From 44dcfe820185590f04c9f5f18579d08532fc1a4b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 May 2008 15:42:34 -0700 Subject: [PATCH 02/18] Change formatting of u32 back to default Don't break scripts that depend on previous offset/value format. Introduce a new -pretty flag for decoding, and (*gasp*) document the formatting arguments. --- man/man8/tc.8 | 39 +++++++++++++++++++++++++++++++++++++-- tc/f_u32.c | 4 +++- tc/tc.c | 6 +++++- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/man/man8/tc.8 b/man/man8/tc.8 index 53a866f5..8c0880fa 100644 --- a/man/man8/tc.8 +++ b/man/man8/tc.8 @@ -34,16 +34,28 @@ priority filtertype .B flowid flow-id -.B tc [-s | -d ] qdisc show [ dev +.B tc +.RI "[ " FORMAT " ]" +.B qdisc show [ dev DEV .B ] .P -.B tc [-s | -d ] class show dev +.B tc +.RI "[ " FORMAT " ]" +.B class show dev DEV .P .B tc filter show dev DEV +.ti -8 +.IR FORMAT " := {" +\fB\-s\fR[\fItatistics\fR] | +\fB\-d\fR[\fIetails\fR] | +\fB\-r\fR[\fIaw\fR] | +\fB\-p\fR[\fIretty\fR] | +\fB\i\fR[\fIec\fR] } + .SH DESCRIPTION .B Tc is used to configure Traffic Control in the Linux kernel. Traffic Control consists @@ -326,6 +338,29 @@ link Only available for qdiscs and performs a replace where the node must exist already. +.SH FORMAT +The show command has additional formatting options: + +.TP +.BR "\-s" , " \-stats", " \-statistics" +output more statistics about packet usage. + +.TP +.BR "\-d", " \-details" +output more detailed information about rates and cell sizes. + +.TP +.BR "\-r", " \-raw" +output raw hex values for handles. + +.TP +.BR "\-p", " \-pretty" +decode filter offset and mask values to equivalent filter commands based on TCP/IP. + +.TP +.BR "\-iec" +print rates in IEC units (ie. 1K = 1024). + .SH HISTORY .B tc diff --git a/tc/f_u32.c b/tc/f_u32.c index 7e4d56ca..aba767d0 100644 --- a/tc/f_u32.c +++ b/tc/f_u32.c @@ -25,6 +25,8 @@ #include "utils.h" #include "tc_util.h" +extern int show_pretty; + static void explain(void) { fprintf(stderr, "Usage: ... u32 [ match SELECTOR ... ] [ link HTID ]" @@ -793,7 +795,7 @@ static void show_key(FILE *f, const struct tc_u32_key *key) { char abuf[256]; - if (show_raw) + if (!show_pretty) goto raw; switch (key->off) { diff --git a/tc/tc.c b/tc/tc.c index 6126c712..aabeac85 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -33,6 +33,8 @@ int show_stats = 0; int show_details = 0; int show_raw = 0; +int show_pretty = 0; + int resolve_hosts = 0; int use_iec = 0; int force = 0; @@ -182,7 +184,7 @@ static void usage(void) fprintf(stderr, "Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }\n" " tc [-force] -batch file\n" "where OBJECT := { qdisc | class | filter | action | monitor }\n" - " OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -b[atch] [file] }\n"); + " OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] | -b[atch] [file] }\n"); } static int do_cmd(int argc, char **argv) @@ -273,6 +275,8 @@ int main(int argc, char **argv) ++show_details; } else if (matches(argv[1], "-raw") == 0) { ++show_raw; + } else if (matches(argv[1], "-pretty") == 0) { + ++show_pretty; } else if (matches(argv[1], "-Version") == 0) { printf("tc utility, iproute2-ss%s\n", SNAPSHOT); return 0; From eefcbc72062a235efe391163962f7353dba1af4c Mon Sep 17 00:00:00 2001 From: jamal Date: Sun, 20 Apr 2008 10:47:48 -0400 Subject: [PATCH 03/18] Expose the filter protocol makes protocol accessible .. cheers, jamal [PATCH 2/3] [TC/FILTERS] Expose the filter protocol Expose the filter protocol so it can be used by underlying classifiers when they need it. Signed-off-by: Jamal Hadi Salim --- tc/tc_filter.c | 9 +++++---- tc/tc_util.h | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tc/tc_filter.c b/tc/tc_filter.c index 6701f9ae..db44dec4 100644 --- a/tc/tc_filter.c +++ b/tc/tc_filter.c @@ -175,6 +175,7 @@ static __u32 filter_parent; static int filter_ifindex; static __u32 filter_prio; static __u32 filter_protocol; +__u16 f_proto = 0; int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, @@ -221,13 +222,13 @@ int print_filter(const struct sockaddr_nl *who, } } if (t->tcm_info) { - __u32 protocol = TC_H_MIN(t->tcm_info); + f_proto = TC_H_MIN(t->tcm_info); __u32 prio = TC_H_MAJ(t->tcm_info)>>16; - if (!filter_protocol || filter_protocol != protocol) { - if (protocol) { + if (!filter_protocol || filter_protocol != f_proto) { + if (f_proto) { SPRINT_BUF(b1); fprintf(fp, "protocol %s ", - ll_proto_n2a(protocol, b1, sizeof(b1))); + ll_proto_n2a(f_proto, b1, sizeof(b1))); } } if (!filter_prio || filter_prio != prio) { diff --git a/tc/tc_util.h b/tc/tc_util.h index 301b5c7f..796da542 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -19,6 +19,7 @@ struct qdisc_util int (*print_copt)(struct qdisc_util *qu, FILE *f, struct rtattr *opt); }; +extern __u16 f_proto; struct filter_util { struct filter_util *next; From 1750abe2ba8b695aa39a67cff8ac44b2fb6a2a3d Mon Sep 17 00:00:00 2001 From: jamal Date: Sun, 20 Apr 2008 10:49:24 -0400 Subject: [PATCH 04/18] Infrastructure for pretty printing And last for now .. cheers, jamal [PATCH 3/3] [TC/U32] Infrastructure for pretty printing This patch makes it easy to add pretty printers of different protocols. For starters it makes use of ipv4 and raw printers. Add more later ... Signed-off-by: Jamal Hadi Salim --- tc/f_u32.c | 59 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/tc/f_u32.c b/tc/f_u32.c index aba767d0..0bb57ce3 100644 --- a/tc/f_u32.c +++ b/tc/f_u32.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "utils.h" #include "tc_util.h" @@ -791,27 +792,24 @@ static int parse_hashkey(int *argc_p, char ***argv_p, struct tc_u32_sel *sel) return 0; } -static void show_key(FILE *f, const struct tc_u32_key *key) +static void print_ipv4(FILE *f, const struct tc_u32_key *key) { char abuf[256]; - if (!show_pretty) - goto raw; - switch (key->off) { case 0: switch (ntohl(key->mask)) { case 0x0f000000: - fprintf(f, "\n ihl %u", ntohl(key->val) >> 24); + fprintf(f, "\n match IP ihl %u", ntohl(key->val) >> 24); return; case 0x00ff0000: - fprintf(f, "\n dsfield %#x", ntohl(key->val) >> 16); + fprintf(f, "\n match IP dsfield %#x", ntohl(key->val) >> 16); return; } break; case 8: if (ntohl(key->mask) == 0x00ff0000) { - fprintf(f, "\n protocol %u", ntohl(key->val) >> 16); + fprintf(f, "\n match IP protocol %d", ntohl(key->val) >> 16); return; } break; @@ -820,7 +818,7 @@ static void show_key(FILE *f, const struct tc_u32_key *key) int bits = mask2bits(key->mask); if (bits >= 0) { fprintf(f, "\n %s %s/%d", - key->off == 12 ? "src" : "dst", + key->off == 12 ? "match IP src" : "match IP dst", inet_ntop(AF_INET, &key->val, abuf, sizeof(abuf)), bits); @@ -832,31 +830,62 @@ static void show_key(FILE *f, const struct tc_u32_key *key) case 20: switch (ntohl(key->mask)) { case 0x0000ffff: - fprintf(f, "\n sport %u", + fprintf(f, "\n match sport %u", ntohl(key->val) & 0xffff); return; case 0xffff0000: - fprintf(f, "\n dport %u", + fprintf(f, "\n match dport %u", ntohl(key->val) >> 16); return; case 0xffffffff: - fprintf(f, "\n sport %u, dport %u", + fprintf(f, "\n match sport %u, match dport %u", ntohl(key->val) & 0xffff, ntohl(key->val) >> 16); return; } + /* XXX: Default print_raw */ } +} -raw: - fprintf(f, "\n match %08x/%08x at %s%d", +static void print_raw(FILE *f, const struct tc_u32_key *key) +{ + fprintf(f, "\n match %08x/%08x at %s%d", (unsigned int)ntohl(key->val), (unsigned int)ntohl(key->mask), key->offmask ? "nexthdr+" : "", key->off); } -static int u32_parse_opt(struct filter_util *qu, char *handle, +static const struct { + __u16 proto; + __u16 pad; + void (*pprinter)(FILE *f, const struct tc_u32_key *key); +} u32_pprinters[] = { + {0, 0, print_raw}, + {ETH_P_IP, 0, print_ipv4}, +}; + +static void show_keys(FILE *f, const struct tc_u32_key *key) +{ + int i = 0; + + if (!show_pretty) + goto show_k; + + for (i = 0; i < sizeof(u32_pprinters) / sizeof(u32_pprinters[0]); i++) { + if (u32_pprinters[i].proto == ntohs(f_proto)) { +show_k: + u32_pprinters[i].pprinter(f, key); + return; + } + } + + i = 0; + goto show_k; +} + +static int u32_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n) { struct { @@ -1131,7 +1160,7 @@ static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, if (sel->nkeys) { int i; for (i=0; inkeys; i++) { - show_key(f, sel->keys + i); + show_keys(f, sel->keys + i); if (show_stats && NULL != pf) fprintf(f, " (success %llu ) ", (unsigned long long) pf->kcnts[i]); From de95ae7ca7d6a290eaab2c137b74f19c78a9a1fe Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 23 Apr 2008 15:42:32 +0800 Subject: [PATCH 05/18] xfrm: Allow replay setting Hi Stephen: [IP] xfrm: Allow replay setting For certain applications there is a requirement to start the sequence number from a point other than the default. As it is the kernel provides an interface to do that but it isn't available through the ip(8) command. Since we're encouraging people to migrate over to ip(8) for manual keying, it is useful to have this ability there. This patch adds support for setting replay sequence numbers through ip(8). Signed-off-by: Herbert Xu Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmV>HI~} Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- --- ip/xfrm_state.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index 0e212033..ff309e7d 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -57,7 +57,8 @@ static void usage(void) { fprintf(stderr, "Usage: ip xfrm state { add | update } ID [ XFRM_OPT ] [ mode MODE ]\n"); fprintf(stderr, " [ reqid REQID ] [ seq SEQ ] [ replay-window SIZE ] [ flag FLAG-LIST ]\n"); - fprintf(stderr, " [ encap ENCAP ] [ sel SELECTOR ] [ LIMIT-LIST ]\n"); + fprintf(stderr, " [ encap ENCAP ] [ sel SELECTOR ] [ replay-seq SEQ ]\n"); + fprintf(stderr, " [ replay-oseq SEQ ] [ LIMIT-LIST ]\n"); fprintf(stderr, "Usage: ip xfrm state allocspi ID [ mode MODE ] [ reqid REQID ] [ seq SEQ ]\n"); fprintf(stderr, " [ min SPI max SPI ]\n"); fprintf(stderr, "Usage: ip xfrm state { delete | get } ID\n"); @@ -232,6 +233,7 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv) struct xfrm_usersa_info xsinfo; char buf[RTA_BUF_SIZE]; } req; + struct xfrm_replay_state replay; char *idp = NULL; char *ealgop = NULL; char *aalgop = NULL; @@ -239,6 +241,7 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv) char *coap = NULL; memset(&req, 0, sizeof(req)); + memset(&replay, 0, sizeof(replay)); req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.xsinfo)); req.n.nlmsg_flags = NLM_F_REQUEST|flags; @@ -264,6 +267,14 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv) NEXT_ARG(); if (get_u8(&req.xsinfo.replay_window, *argv, 0)) invarg("\"replay-window\" value is invalid", *argv); + } else if (strcmp(*argv, "replay-seq") == 0) { + NEXT_ARG(); + if (get_u32(&replay.seq, *argv, 0)) + invarg("\"replay-seq\" value is invalid", *argv); + } else if (strcmp(*argv, "replay-oseq") == 0) { + NEXT_ARG(); + if (get_u32(&replay.oseq, *argv, 0)) + invarg("\"replay-oseq\" value is invalid", *argv); } else if (strcmp(*argv, "flag") == 0) { NEXT_ARG(); xfrm_state_flag_parse(&req.xsinfo.flags, &argc, &argv); @@ -386,6 +397,10 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv) argc--; argv++; } + if (replay.seq || replay.oseq) + addattr_l(&req.n, sizeof(req.buf), XFRMA_REPLAY_VAL, + (void *)&replay, sizeof(replay)); + if (!idp) { fprintf(stderr, "Not enough information: \"ID\" is required\n"); exit(1); From 7dfb0366655a136f82c23fb3a6e6f30b482e3f86 Mon Sep 17 00:00:00 2001 From: Andreas Henriksson Date: Thu, 3 Jan 2008 16:48:56 +0100 Subject: [PATCH 06/18] Add routel and routef man page. --- Makefile | 1 + man/man8/routel.8 | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 man/man8/routel.8 diff --git a/Makefile b/Makefile index de04176d..723eb5d3 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,7 @@ install: all ln -sf lnstat.8 $(DESTDIR)$(MANDIR)/man8/rtstat.8 ln -sf lnstat.8 $(DESTDIR)$(MANDIR)/man8/ctstat.8 ln -sf rtacct.8 $(DESTDIR)$(MANDIR)/man8/nstat.8 + ln -sf routel.8 $(DESTDIR)$(MANDIR)/man8/routef.8 install -m 0755 -d $(DESTDIR)$(MANDIR)/man3 install -m 0644 $(shell find man/man3 -maxdepth 1 -type f) $(DESTDIR)$(MANDIR)/man3 diff --git a/man/man8/routel.8 b/man/man8/routel.8 new file mode 100644 index 00000000..cdf8f55b --- /dev/null +++ b/man/man8/routel.8 @@ -0,0 +1,32 @@ +.TH "ROUTEL" "8" "3 Jan, 2008" "iproute2" "Linux" +.SH "NAME" +.LP +routel \- list routes with pretty output format +.br +routef \- flush routes +.SH "SYNTAX" +.LP +routel [\fItablenr\fP [\fIraw ip args...\fP]] +.br +routef +.SH "DESCRIPTION" +.LP +These programs are a set of helper scripts you can use instead of raw iproute2 commands. +.br +The routel script will list routes in a format that some might consider easier to interpret then the ip route list equivalent. +.br +The routef script does not take any arguments and will simply flush the routing table down the drain. Beware! This means deleting all routes which will make your network unusable! + +.SH "FILES" +.LP +\fI/usr/bin/routef\fP +.br +\fI/usr/bin/routel\fP +.SH "AUTHORS" +.LP +The routel script was written by Stephen R. van den Berg , 1999/04/18 and donated to the public domain. +.br +This manual page was written by Andreas Henriksson , for the Debian GNU/Linux system. +.SH "SEE ALSO" +.LP +ip(8) From 516ffb6b7724e97ca035293dcfd9f94cf6ce3a47 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 22 May 2008 13:41:40 -0700 Subject: [PATCH 07/18] Use inet_pton to read ipv4 addresses Use the standard POSIX inet_pton to convert from string to IPV4 address. This avoids problems where ip parses "127.2" wrong. --- lib/utils.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/lib/utils.c b/lib/utils.c index fa4fe3e6..a88f82b5 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -246,10 +246,6 @@ int get_s8(__s8 *val, const char *arg, int base) int get_addr_1(inet_prefix *addr, const char *name, int family) { - const char *cp; - unsigned char *ap = (unsigned char*)addr->data; - int i; - memset(addr, 0, sizeof(*addr)); if (strcmp(name, "default") == 0 || @@ -288,17 +284,10 @@ int get_addr_1(inet_prefix *addr, const char *name, int family) addr->family = AF_INET; if (family != AF_UNSPEC && family != AF_INET) return -1; + if (inet_pton(AF_INET, name, addr->data) <= 0) + return -1; addr->bytelen = 4; addr->bitlen = -1; - for (cp=name, i=0; *cp; cp++) { - if (*cp <= '9' && *cp >= '0') { - ap[i] = 10*ap[i] + (*cp-'0'); - continue; - } - if (*cp == '.' && ++i <= 3) - continue; - return -1; - } return 0; } From b6da1afc736b7ab7fcd3b4df4f4486525c1ab7d3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 29 May 2008 11:54:19 -0700 Subject: [PATCH 08/18] ematch related bugfix and cleanup Bugfix: use strtoul rather than strtol for bstrtol to handle large key/mask. Deinline larger functions to save space. --- tc/m_ematch.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ tc/m_ematch.h | 80 ++++----------------------------------------------- 2 files changed, 83 insertions(+), 74 deletions(-) diff --git a/tc/m_ematch.c b/tc/m_ematch.c index da4f22b2..7065fd0b 100644 --- a/tc/m_ematch.c +++ b/tc/m_ematch.c @@ -491,3 +491,80 @@ int print_ematch(FILE *fd, const struct rtattr *rta) return print_ematch_list(fd, hdr, tb[TCA_EMATCH_TREE_LIST]); } + +struct bstr * bstr_alloc(const char *text) +{ + struct bstr *b = calloc(1, sizeof(*b)); + + if (b == NULL) + return NULL; + + b->data = strdup(text); + if (b->data == NULL) { + free(b); + return NULL; + } + + b->len = strlen(text); + + return b; +} + +unsigned long bstrtoul(const struct bstr *b) +{ + char *inv = NULL; + unsigned long l; + char buf[b->len+1]; + + memcpy(buf, b->data, b->len); + buf[b->len] = '\0'; + + l = strtoul(buf, &inv, 0); + if (l == ULONG_MAX || inv == buf) + return ULONG_MAX; + + return l; +} + +void bstr_print(FILE *fd, const struct bstr *b, int ascii) +{ + int i; + char *s = b->data; + + if (ascii) + for (i = 0; i < b->len; i++) + fprintf(fd, "%c", isprint(s[i]) ? s[i] : '.'); + else { + for (i = 0; i < b->len; i++) + fprintf(fd, "%02x", s[i]); + fprintf(fd, "\""); + for (i = 0; i < b->len; i++) + fprintf(fd, "%c", isprint(s[i]) ? s[i] : '.'); + fprintf(fd, "\""); + } +} + +void print_ematch_tree(const struct ematch *tree) +{ + const struct ematch *t; + + for (t = tree; t; t = t->next) { + if (t->inverted) + printf("NOT "); + + if (t->child) { + printf("("); + print_ematch_tree(t->child); + printf(")"); + } else { + struct bstr *b; + for (b = t->args; b; b = b->next) + printf("%s%s", b->data, b->next ? " " : ""); + } + + if (t->relation == TCF_EM_REL_AND) + printf(" AND "); + else if (t->relation == TCF_EM_REL_OR) + printf(" OR "); + } +} diff --git a/tc/m_ematch.h b/tc/m_ematch.h index 2b5d2938..5036e9b6 100644 --- a/tc/m_ematch.h +++ b/tc/m_ematch.h @@ -18,23 +18,7 @@ struct bstr struct bstr *next; }; -static inline struct bstr * bstr_alloc(const char *text) -{ - struct bstr *b = calloc(1, sizeof(*b)); - - if (b == NULL) - return NULL; - - b->data = strdup(text); - if (b->data == NULL) { - free(b); - return NULL; - } - - b->len = strlen(text); - - return b; -} +extern struct bstr * bstr_alloc(const char *text); static inline struct bstr * bstr_new(char *data, unsigned int len) { @@ -60,45 +44,15 @@ static inline int bstrcmp(struct bstr *b, const char *text) return d; } -static inline unsigned long bstrtoul(struct bstr *b) -{ - char *inv = NULL; - unsigned long l; - char buf[b->len+1]; - - memcpy(buf, b->data, b->len); - buf[b->len] = '\0'; - - l = strtol(buf, &inv, 0); - if (l == ULONG_MAX || inv == buf) - return LONG_MAX; - - return l; -} - -static inline void bstr_print(FILE *fd, struct bstr *b, int ascii) -{ - int i; - char *s = b->data; - - if (ascii) - for (i = 0; i < b->len; i++) - fprintf(fd, "%c", isprint(s[i]) ? s[i] : '.'); - else { - for (i = 0; i < b->len; i++) - fprintf(fd, "%02x", s[i]); - fprintf(fd, "\""); - for (i = 0; i < b->len; i++) - fprintf(fd, "%c", isprint(s[i]) ? s[i] : '.'); - fprintf(fd, "\""); - } -} - static inline struct bstr *bstr_next(struct bstr *b) { return b->next; } +extern unsigned long bstrtoul(const struct bstr *b); +extern void bstr_print(FILE *fd, const struct bstr *b, int ascii); + + struct ematch { struct bstr *args; @@ -123,30 +77,8 @@ static inline struct ematch * new_ematch(struct bstr *args, int inverted) return e; } -static inline void print_ematch_tree(struct ematch *tree) -{ - struct ematch *t; +extern void print_ematch_tree(const struct ematch *tree); - for (t = tree; t; t = t->next) { - if (t->inverted) - printf("NOT "); - - if (t->child) { - printf("("); - print_ematch_tree(t->child); - printf(")"); - } else { - struct bstr *b; - for (b = t->args; b; b = b->next) - printf("%s%s", b->data, b->next ? " " : ""); - } - - if (t->relation == TCF_EM_REL_AND) - printf(" AND "); - else if (t->relation == TCF_EM_REL_OR) - printf(" OR "); - } -} struct ematch_util { From 3ea2fb985f3aa979a2b270d01fa651a5ef814464 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Jun 2008 11:26:38 -0700 Subject: [PATCH 09/18] Update to 2.6.26-rc6 headers Copy sanitized headers from 2.6.26-rc6 --- include/linux/atm.h | 3 ++- include/linux/if_tunnel.h | 22 +++++++++++++++++++--- include/linux/netfilter.h | 1 + include/linux/netfilter_ipv4.h | 2 -- include/linux/rtnetlink.h | 5 +++-- include/linux/types.h | 7 ------- include/linux/xfrm.h | 11 ++++++----- 7 files changed, 31 insertions(+), 20 deletions(-) diff --git a/include/linux/atm.h b/include/linux/atm.h index 593e6c10..9846c8dd 100644 --- a/include/linux/atm.h +++ b/include/linux/atm.h @@ -19,6 +19,7 @@ #include #include #include +#include /* general ATM constants */ @@ -207,7 +208,7 @@ struct sockaddr_atmsvc { char pub[ATM_E164_LEN+1]; /* public address (E.164) */ /* unused addresses must be bzero'ed */ char lij_type; /* role in LIJ call; one of ATM_LIJ* */ - uint32_t lij_id; /* LIJ call identifier */ + __u32 lij_id; /* LIJ call identifier */ } sas_addr __ATM_API_ALIGN; /* SVC address */ }; diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 228eb4eb..d4efe401 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -7,6 +7,10 @@ #define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1) #define SIOCDELTUNNEL (SIOCDEVPRIVATE + 2) #define SIOCCHGTUNNEL (SIOCDEVPRIVATE + 3) +#define SIOCGETPRL (SIOCDEVPRIVATE + 4) +#define SIOCADDPRL (SIOCDEVPRIVATE + 5) +#define SIOCDELPRL (SIOCDEVPRIVATE + 6) +#define SIOCCHGPRL (SIOCDEVPRIVATE + 7) #define GRE_CSUM __constant_htons(0x8000) #define GRE_ROUTING __constant_htons(0x4000) @@ -17,9 +21,6 @@ #define GRE_FLAGS __constant_htons(0x00F8) #define GRE_VERSION __constant_htons(0x0007) -/* i_flags values for SIT mode */ -#define SIT_ISATAP 0x0001 - struct ip_tunnel_parm { char name[IFNAMSIZ]; @@ -31,4 +32,19 @@ struct ip_tunnel_parm struct iphdr iph; }; +/* SIT-mode i_flags */ +#define SIT_ISATAP 0x0001 + +struct ip_tunnel_prl { + __be32 addr; + __u16 flags; + __u16 __reserved; + __u32 datalen; + __u32 __reserved2; + /* data follows */ +}; + +/* PRL flags */ +#define PRL_DEFAULT 0x0001 + #endif /* _IF_TUNNEL_H_ */ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 4196a511..6ea935f7 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -1,6 +1,7 @@ #ifndef __LINUX_NETFILTER_H #define __LINUX_NETFILTER_H +#include /* Responses from hook functions. */ #define NF_DROP 0 diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 791e50ec..da7e16c7 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -60,8 +60,6 @@ enum nf_ip_hook_priorities { NF_IP_PRI_FILTER = 0, NF_IP_PRI_NAT_SRC = 100, NF_IP_PRI_SELINUX_LAST = 225, - NF_IP_PRI_CONNTRACK_HELPER = INT_MAX - 2, - NF_IP_PRI_NAT_SEQ_ADJUST = INT_MAX - 1, NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, NF_IP_PRI_LAST = INT_MAX, }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 3b90a974..c1f2d502 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -246,6 +246,7 @@ enum rt_class_t { RT_TABLE_UNSPEC=0, /* User defined values */ + RT_TABLE_COMPAT=252, RT_TABLE_DEFAULT=253, RT_TABLE_MAIN=254, RT_TABLE_LOCAL=255, @@ -267,10 +268,10 @@ enum rtattr_type_t RTA_PREFSRC, RTA_METRICS, RTA_MULTIPATH, - RTA_PROTOINFO, + RTA_PROTOINFO, /* no longer used */ RTA_FLOW, RTA_CACHEINFO, - RTA_SESSION, + RTA_SESSION, /* no longer used */ RTA_MP_ALGO, /* no longer used */ RTA_TABLE, __RTA_MAX diff --git a/include/linux/types.h b/include/linux/types.h index d9e8c4f2..d17027a9 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -162,11 +162,4 @@ typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; -struct ustat { - __kernel_daddr_t f_tfree; - __kernel_ino_t f_tinode; - char f_fname[6]; - char f_fpack[6]; -}; - #endif /* _LINUX_TYPES_H */ diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 05adf0f2..132ba876 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -97,10 +97,10 @@ struct xfrm_algo { }; struct xfrm_algo_aead { - char alg_name[64]; - int alg_key_len; /* in bits */ - int alg_icv_len; /* in bits */ - char alg_key[0]; + char alg_name[64]; + unsigned int alg_key_len; /* in bits */ + unsigned int alg_icv_len; /* in bits */ + char alg_key[0]; }; struct xfrm_stats { @@ -113,7 +113,8 @@ enum { XFRM_POLICY_TYPE_MAIN = 0, XFRM_POLICY_TYPE_SUB = 1, - XFRM_POLICY_TYPE_MAX = 2 + XFRM_POLICY_TYPE_MAX = 2, + XFRM_POLICY_TYPE_ANY = 255 }; enum From 4c1db1310f398eade7dad2e1c295cccbb7222066 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Jun 2008 12:34:15 -0700 Subject: [PATCH 10/18] use correct hz for rto,ato The function get_hz() returns the psched hz value which is wrong for anything other than tc usage. Should be user hz instead, but kernel is broken (patch sent) and this code doesn't get hit on current systems (netlink is used first). --- misc/ss.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/misc/ss.c b/misc/ss.c index 79193e54..9086db07 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -1191,10 +1191,11 @@ static int tcp_show_line(char *line, const struct filter *f, int family) } } if (show_tcpinfo) { - if (s.rto && s.rto != 3*get_hz()) - printf(" rto:%g", (double)s.rto/get_hz()); + int hz = get_user_hz(); + if (s.rto && s.rto != 3*hz) + printf(" rto:%g", (double)s.rto/hz); if (s.ato) - printf(" ato:%g", (double)s.ato/get_hz()); + printf(" ato:%g", (double)s.ato/hz); if (s.cwnd != 2) printf(" cwnd:%d", s.cwnd); if (s.ssthresh != -1) From 6a34d291449fc799048d2d56edcbeaedf1483fb1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Jun 2008 12:37:42 -0700 Subject: [PATCH 11/18] Neighbor cache timer is in user hz All timer values from kernel are supposed to be in constant units or user hz value. --- ip/ipneigh.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ip/ipneigh.c b/ip/ipneigh.c index 03a17605..fa0e41dc 100644 --- a/ip/ipneigh.c +++ b/ip/ipneigh.c @@ -272,10 +272,9 @@ int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) fprintf(fp, " router"); } if (tb[NDA_CACHEINFO] && show_stats) { - static int hz; struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]); - if (!hz) - hz = get_hz(); + int hz = get_user_hz(); + if (ci->ndm_refcnt) printf(" ref %d", ci->ndm_refcnt); fprintf(fp, " used %d/%d/%d", ci->ndm_used/hz, From 2d44be19cf180d2ec965680b472b8e7acec78aca Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Jun 2008 12:40:03 -0700 Subject: [PATCH 12/18] timers are in user hz The kernel timers are exposed in user hz not kernel hz --- misc/ss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/ss.c b/misc/ss.c index 9086db07..38eed29d 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -420,7 +420,7 @@ const char *print_ms_timer(int timeout) const char *print_hz_timer(int timeout) { - int hz = get_hz(); + int hz = get_user_hz(); return print_ms_timer(((timeout*1000) + hz-1)/hz); } From ae76106841d764fc64188e78f5c2a2575fba725e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 23 Jun 2008 15:59:18 +0200 Subject: [PATCH 13/18] tc: don't set protococol field on filter delete > # tc filter show dev eth1 | grep 4:29:d1 > filter parent 1: protocol ip pref 5 u32 fh 4:29:d1 order 209 key ht 4 > bkt 29 flowid 1:b7aa > > # tc filter del dev eth1 parent 1: pref 5 handle 4:29:d1 u32 > RTNETLINK answers: Invalid argument > We have an error talking to the kernel > > after rollback to package"sys-apps/iproute2-2.6.24.20080108" all > deleted normal... The current iproute version uses "protocol all" by default if its not specified. This is actually only useful for creating new filters, on deletion an unset protocol is treated as wildcard. --- tc/tc_filter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tc/tc_filter.c b/tc/tc_filter.c index db44dec4..177446ef 100644 --- a/tc/tc_filter.c +++ b/tc/tc_filter.c @@ -54,7 +54,7 @@ int tc_filter_modify(int cmd, unsigned flags, int argc, char **argv) } req; struct filter_util *q = NULL; __u32 prio = 0; - __u32 protocol = ETH_P_ALL; + __u32 protocol = 0; int protocol_set = 0; char *fhandle = NULL; char d[16]; @@ -72,6 +72,9 @@ int tc_filter_modify(int cmd, unsigned flags, int argc, char **argv) req.n.nlmsg_type = cmd; req.t.tcm_family = AF_UNSPEC; + if (cmd == RTM_NEWTFILTER && flags & NLM_F_CREATE) + protocol = ETH_P_ALL; + while (argc > 0) { if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); From e2613dc8605e56dbc53890ebbae263f93610bd41 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Fri, 20 Jun 2008 11:07:35 +0200 Subject: [PATCH 14/18] iproute2: add support for IFLA_NET_NS_PID in ip link Hi Stephen, I resend you this patch once more. This time I updated the documentation too (may be that was the reason why you didn't take it before?). Please tell me if there are other things missing in this patch It applies on iproute2 git tree. Regards, Benjamin Description: ------------ This patch adds support for the IFLA_NET_NS_PID type. It is used to move network devices between network namespaces. The syntax is: ip link set DEVICE netns PID PID is the pid of a process in the target network namespace. (Daniel Lezcano is the original author). Signed-off-by: Daniel Lezcano Signed-off-by: Benjamin Thery Signed-off-by: Stephen Hemminger --- doc/ip-cref.tex | 4 ++++ ip/iplink.c | 9 +++++++++ man/man8/ip.8 | 10 +++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex index cc0e07d3..bb4eb783 100644 --- a/doc/ip-cref.tex +++ b/doc/ip-cref.tex @@ -294,6 +294,10 @@ broadcast address will break networking. Do not use it, if you do not understand what this operation really does. \end{NB} +\item \verb|netns PID| + +--- move the device to the network namespace associated with the process PID. + \end{itemize} \vskip 1mm diff --git a/ip/iplink.c b/ip/iplink.c index 95801a6c..c70c84ad 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -49,6 +49,7 @@ void iplink_usage(void) fprintf(stderr, " name NEWNAME |\n"); fprintf(stderr, " address LLADDR | broadcast LLADDR |\n"); fprintf(stderr, " mtu MTU }\n"); + fprintf(stderr, " netns PID }\n"); fprintf(stderr, " ip link show [ DEVICE ]\n"); exit(-1); } @@ -156,6 +157,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char abuf[32]; int qlen = -1; int mtu = -1; + int netns = -1; ret = argc; @@ -197,6 +199,13 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, if (get_integer(&mtu, *argv, 0)) invarg("Invalid \"mtu\" value\n", *argv); addattr_l(&req->n, sizeof(*req), IFLA_MTU, &mtu, 4); + } else if (strcmp(*argv, "netns") == 0) { + NEXT_ARG(); + if (netns != -1) + duparg("netns", *argv); + if (get_integer(&netns, *argv, 0)) + invarg("Invalid \"netns\" value\n", *argv); + addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_PID, &netns, 4); } else if (strcmp(*argv, "multicast") == 0) { NEXT_ARG(); req->i.ifi_change |= IFF_MULTICAST; diff --git a/man/man8/ip.8 b/man/man8/ip.8 index da314f39..2097317c 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -50,7 +50,10 @@ ip \- show / manipulate routing, devices, policy routing and tunnels .IR LLADDR " |" .br .B mtu -.IR MTU " }" +.IR MTU " |" +.br +.B netns +.IR PID " }" .ti -8 .B ip link show @@ -873,6 +876,11 @@ change the link layer broadcast address or the peer address when the interface is .IR "POINTOPOINT" . +.TP +.BI netns " PID" +move the device to the network namespace associated with the process +.IR "PID" . + .PP .B Warning: If multiple parameter changes are requested, From 3cc6232e08e7c914fb9b9032f0f2957f7e015cf6 Mon Sep 17 00:00:00 2001 From: Bertrand Jacquin Date: Sat, 21 Jun 2008 01:51:19 -0400 Subject: [PATCH 15/18] netem: fix cross-compiling failure The programs in netem are compiled and run on the build machine, but they use the CFLAGS that are meant for the target system and often times, these are incompatible. Signed-off-by: Mike Frysinger --- netem/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/netem/Makefile b/netem/Makefile index 3732160d..2d7d68bb 100644 --- a/netem/Makefile +++ b/netem/Makefile @@ -2,6 +2,7 @@ DISTGEN = maketable normal pareto paretonormal DISTDATA = normal.dist pareto.dist paretonormal.dist experimental.dist HOSTCC ?= $(CC) +CCOPTS = $(CBUILD_CFLAGS) LDLIBS += -lm all: $(DISTGEN) $(DISTDATA) From 4ffc44ca7c1ec7adb980b67ae1e1c8fa772777f1 Mon Sep 17 00:00:00 2001 From: Yu Zhiguo Date: Fri, 20 Jun 2008 09:50:16 +0800 Subject: [PATCH 16/18] Fix generic_proc_open() of command 'nstat' and 'rtacct' Fix a bug of generic_proc_open(), so environment variables (e.g. PROC_NET_SNMP, PROC_NET_RTACCT) can be used to specify procfile. Signed-off-by: Yu Zhiguo --- misc/nstat.c | 4 ++-- misc/rtacct.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/misc/nstat.c b/misc/nstat.c index 228bce22..80e695fc 100644 --- a/misc/nstat.c +++ b/misc/nstat.c @@ -43,7 +43,7 @@ int npatterns; char info_source[128]; int source_mismatch; -int generic_proc_open(char *env, char *name) +static int generic_proc_open(const char *env, char *name) { char store[128]; char *p = getenv(env); @@ -52,7 +52,7 @@ int generic_proc_open(char *env, char *name) snprintf(store, sizeof(store)-1, "%s/%s", p, name); p = store; } - return open(store, O_RDONLY); + return open(p, O_RDONLY); } int net_netstat_open(void) diff --git a/misc/rtacct.c b/misc/rtacct.c index e6399d00..eb3ea9ec 100644 --- a/misc/rtacct.c +++ b/misc/rtacct.c @@ -43,7 +43,7 @@ int dump_zeros = 0; unsigned long magic_number = 0; double W; -int generic_proc_open(char *env, char *name) +static int generic_proc_open(const char *env, const char *name) { char store[1024]; char *p = getenv(env); @@ -52,7 +52,7 @@ int generic_proc_open(char *env, char *name) snprintf(store, sizeof(store)-1, "%s/%s", p, name); p = store; } - return open(store, O_RDONLY); + return open(p, O_RDONLY); } int net_rtacct_open(void) From f493dc30094d282d6a76ef8f71753a6d48981f1f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 30 Jun 2008 10:37:28 -0700 Subject: [PATCH 17/18] Fix array out of bounds problem The current kernel generates 71 possible header fields, but MAX_FIELDS in lnstat is only 64. This leads to referencing outside of the array. To fix, increase size of array and chop off parsing at MAX_FIELDS - 1. --- misc/lnstat.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/misc/lnstat.c b/misc/lnstat.c index b56598ab..b04e6ceb 100644 --- a/misc/lnstat.c +++ b/misc/lnstat.c @@ -17,7 +17,7 @@ */ /* Maximum number of fields that can be displayed */ -#define MAX_FIELDS 64 +#define MAX_FIELDS 128 /* Maximum number of header lines */ #define HDR_LINES 10 @@ -121,9 +121,12 @@ static int map_field_params(struct lnstat_file *lnstat_files, if (!fps->params[j].print.width) fps->params[j].print.width = FIELD_WIDTH_DEFAULT; - j++; + + if (++j >= MAX_FIELDS - 1) + goto full; } } + full: fps->num = j; return 1; } From f309d0ae434f177f3cb08fe5c20b436fa9d2c488 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 30 Jun 2008 11:57:13 -0700 Subject: [PATCH 18/18] Add warning message when MAX_FIELDS reached Don't just silently drop. --- misc/lnstat.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/misc/lnstat.c b/misc/lnstat.c index b04e6ceb..32ab6a48 100644 --- a/misc/lnstat.c +++ b/misc/lnstat.c @@ -122,8 +122,13 @@ static int map_field_params(struct lnstat_file *lnstat_files, fps->params[j].print.width = FIELD_WIDTH_DEFAULT; - if (++j >= MAX_FIELDS - 1) + if (++j >= MAX_FIELDS - 1) { + fprintf(stderr, + "WARN: MAX_FIELDS (%d) reached," + " truncating number of keys\n", + MAX_FIELDS); goto full; + } } } full: @@ -272,8 +277,13 @@ int main(int argc, char **argv) for (tok = strtok(tmp, ","); tok; tok = strtok(NULL, ",")) { - if (fp.num >= MAX_FIELDS) + if (fp.num >= MAX_FIELDS) { + fprintf(stderr, + "WARN: too many keys" + " requested: (%d max)\n", + MAX_FIELDS); break; + } fp.params[fp.num++].name = tok; } break;