From 7a53aa592fffbe5984aa8adbe76f5b370b247f05 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:49 +0100 Subject: [PATCH 01/39] ip: align help text with manpage Although the ip command accepts both "neighbor" and "neighbour" as subcommand, I assume it's sufficient to list it in help text as just "neigh" like ip.8 does. Signed-off-by: Phil Sutter --- ip/ip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/ip.c b/ip/ip.c index eea00b82..123f1813 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -49,7 +49,7 @@ static void usage(void) fprintf(stderr, "Usage: ip [ OPTIONS ] OBJECT { COMMAND | help }\n" " ip [ -force ] -batch filename\n" -"where OBJECT := { link | address | addrlabel | route | rule | neighbor | ntable |\n" +"where OBJECT := { link | address | addrlabel | route | rule | neigh | ntable |\n" " tunnel | tuntap | maddress | mroute | mrule | monitor | xfrm |\n" " netns | l2tp | fou | tcp_metrics | token | netconf }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n" From 27ff1a564bb20f80dca1023b29d5d22fe196e727 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:50 +0100 Subject: [PATCH 02/39] ipaddrlabel: Improve help text precision Neither 'list' nor 'flush' actions accept parameters, and with given prefix the action keyword is not optional anymore. Signed-off-by: Phil Sutter --- ip/ipaddrlabel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ip/ipaddrlabel.c b/ip/ipaddrlabel.c index f01bc269..ef093cbe 100644 --- a/ip/ipaddrlabel.c +++ b/ip/ipaddrlabel.c @@ -49,7 +49,8 @@ static void usage(void) __attribute__((noreturn)); static void usage(void) { - fprintf(stderr, "Usage: ip addrlabel [ list | add | del | flush ] prefix PREFIX [ dev DEV ] [ label LABEL ]\n"); + fprintf(stderr, "Usage: ip addrlabel { add | del } prefix PREFIX [ dev DEV ] [ label LABEL ]\n"); + fprintf(stderr, " ip addrlabel [ list | flush | help ]\n"); exit(-1); } From 5c2ea5b8c001fe3e7c85f2c93b9b05bcb035bafa Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:51 +0100 Subject: [PATCH 03/39] iplink: fix help text syntax Get rid of extraneous closing brackets and while here, merge the double netns parameter. Signed-off-by: Phil Sutter --- ip/iplink.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ip/iplink.c b/ip/iplink.c index 69f50572..33d7c0ad 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -70,17 +70,16 @@ void iplink_usage(void) fprintf(stderr, " [ address LLADDR ]\n"); fprintf(stderr, " [ broadcast LLADDR ]\n"); fprintf(stderr, " [ mtu MTU ]\n"); - fprintf(stderr, " [ netns PID ]\n"); - fprintf(stderr, " [ netns NAME ]\n"); + fprintf(stderr, " [ netns { PID | NAME } ]\n"); fprintf(stderr, " [ link-netnsid ID ]\n"); fprintf(stderr, " [ alias NAME ]\n"); fprintf(stderr, " [ vf NUM [ mac LLADDR ]\n"); fprintf(stderr, " [ vlan VLANID [ qos VLAN-QOS ] ]\n"); - fprintf(stderr, " [ rate TXRATE ] ]\n"); + fprintf(stderr, " [ rate TXRATE ]\n"); - fprintf(stderr, " [ spoofchk { on | off} ] ]\n"); - fprintf(stderr, " [ query_rss { on | off} ] ]\n"); + fprintf(stderr, " [ spoofchk { on | off} ]\n"); + fprintf(stderr, " [ query_rss { on | off} ]\n"); fprintf(stderr, " [ state { auto | enable | disable} ] ]\n"); fprintf(stderr, " [ trust { on | off} ] ]\n"); fprintf(stderr, " [ master DEVICE ]\n"); From c339b4cc53f64b79b384524ce0e124965d341579 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:52 +0100 Subject: [PATCH 04/39] ipneigh: add missing proxy keyword to help text And while we're at it, add whitespace around braces and pipe symbol. Signed-off-by: Phil Sutter --- ip/ipneigh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/ipneigh.c b/ip/ipneigh.c index 92b7cd6f..9b1499b0 100644 --- a/ip/ipneigh.c +++ b/ip/ipneigh.c @@ -49,7 +49,7 @@ static void usage(void) fprintf(stderr, "Usage: ip neigh { add | del | change | replace } { ADDR [ lladdr LLADDR ]\n" " [ nud { permanent | noarp | stale | reachable } ]\n" " | proxy ADDR } [ dev DEV ]\n"); - fprintf(stderr, " ip neigh {show|flush} [ to PREFIX ] [ dev DEV ] [ nud STATE ]\n"); + fprintf(stderr, " ip neigh { show | flush } [ proxy ] [ to PREFIX ] [ dev DEV ] [ nud STATE ]\n"); exit(-1); } From f1fdcfe66a8e197a5015cb81c789061c03629108 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:53 +0100 Subject: [PATCH 05/39] ipntable: Fix typo in help text Signed-off-by: Phil Sutter --- ip/ipntable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/ipntable.c b/ip/ipntable.c index 6eb84e79..2763570a 100644 --- a/ip/ipntable.c +++ b/ip/ipntable.c @@ -52,7 +52,7 @@ static void usage(void) "PARMS := [ base_reachable MSEC ] [ retrans MSEC ] [ gc_stale MSEC ]\n" " [ delay_probe MSEC ] [ queue LEN ]\n" - " [ app_probs VAL ] [ ucast_probes VAL ] [ mcast_probes VAL ]\n" + " [ app_probes VAL ] [ ucast_probes VAL ] [ mcast_probes VAL ]\n" " [ anycast_delay MSEC ] [ proxy_delay MSEC ] [ proxy_queue LEN ]\n" " [ locktime MSEC ]\n" ); From 070ebbdf754b02e1d7f9e9dbd84e1499332d9063 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:54 +0100 Subject: [PATCH 06/39] iproute: TYPE keyword is not optional, fix help text accordingly This is a bit pedantic, but brackets ([]) show optional values and since TYPE must not become empty, they're not suited to surround the type keyword choices. Use curly braces instead. Also add some missing whitespace to the parameter list above. Signed-off-by: Phil Sutter --- ip/iproute.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ip/iproute.c b/ip/iproute.c index 051fc12d..5b954478 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -82,13 +82,13 @@ static void usage(void) fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | mpls | bridge | link ]\n"); fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n"); fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n"); - fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n"); + fprintf(stderr, " [ window NUMBER ] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n"); fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n"); fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n"); fprintf(stderr, " [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n"); fprintf(stderr, " [ pref PREF ] [ expires TIME ]\n"); - fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n"); - fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n"); + fprintf(stderr, "TYPE := { unicast | local | broadcast | multicast | throw |\n"); + fprintf(stderr, " unreachable | prohibit | blackhole | nat }\n"); fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n"); fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n"); fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n"); From 20f2af78fb85e53c6d41e73f010607b8d0edbd6e Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:55 +0100 Subject: [PATCH 07/39] iprule: add missing nat keyword to help text Signed-off-by: Phil Sutter --- ip/iprule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ip/iprule.c b/ip/iprule.c index 33b71976..7e3b38b6 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -39,6 +39,7 @@ static void usage(void) fprintf(stderr, "SELECTOR := [ not ] [ from PREFIX ] [ to PREFIX ] [ tos TOS ] [ fwmark FWMARK[/MASK] ]\n"); fprintf(stderr, " [ iif STRING ] [ oif STRING ] [ pref NUMBER ]\n"); fprintf(stderr, "ACTION := [ table TABLE_ID ]\n"); + fprintf(stderr, " [ nat ADDRESS ]\n"); fprintf(stderr, " [ realms [SRCREALM/]DSTREALM ]\n"); fprintf(stderr, " [ goto NUMBER ]\n"); fprintf(stderr, " SUPPRESSOR\n"); From 37fdeb585d773b22a193c07b0beef8994c2b741f Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:56 +0100 Subject: [PATCH 08/39] man: ip-address.8: Minor syntax fixes Clarify that the optional '-' prefix of the 'tentative', 'deprecated' and 'dadfailed' keywords has to be put right in front of them, no whitespace is allowed in between. In addition to that, clarify that it is valid to pass both 'valid_lft' and 'preferred_lft' at the same time to 'ip address'. Signed-off-by: Phil Sutter --- man/man8/ip-address.8.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/man/man8/ip-address.8.in b/man/man8/ip-address.8.in index 159d9065..ff3fe0b9 100644 --- a/man/man8/ip-address.8.in +++ b/man/man8/ip-address.8.in @@ -58,9 +58,9 @@ ip-address \- protocol address management .ti -8 .IR FLAG " := " -.RB "[ " permanent " | " dynamic " | " secondary " | " primary " | \ -[ - ] " tentative " | [ - ] " deprecated " | [ - ] " dadfailed " | "\ -temporary " | " CONFFLAG-LIST " ]" +.RB "[ " permanent " | " dynamic " | " secondary " | " primary " |" +.RB [ - ] tentative " | [" - ] deprecated " | [" - ] dadfailed " |" +.BR temporary " | " CONFFLAG-LIST " ]" .ti -8 .IR CONFFLAG-LIST " := [ " CONFFLAG-LIST " ] " CONFFLAG @@ -72,7 +72,7 @@ temporary " | " CONFFLAG-LIST " ]" .ti -8 .IR LIFETIME " := [ " .BI valid_lft " LFT" -.RB "| " preferred_lft +.RB "] [ " preferred_lft .IR LFT " ]" .ti -8 From d890144ecf7501c9117355d14d013dd5c98936ef Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:57 +0100 Subject: [PATCH 09/39] man: ip-link.8: minor font fix We commonly use bold font for terminals and italic for non-terminals. Signed-off-by: Phil Sutter --- man/man8/ip-link.8.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 221831e5..2376f165 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -49,7 +49,7 @@ ip-link \- network device configuration .RB "[ " numrxqueues .IR QUEUE_COUNT " ]" .br -.BR type " TYPE" +.BI type " TYPE" .RI "[ " ARGS " ]" .ti -8 From ca611d6408c9bf17d122923c72d27d032e054cd8 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:58 +0100 Subject: [PATCH 10/39] man: ip-link.8: Fix and improve synopsis Reflect that it is possible to pass multiple parameters at the same time, also use the same trick the help text uses to emphasize vf specific parameters. Signed-off-by: Phil Sutter --- man/man8/ip-link.8.in | 100 +++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 2376f165..e402fdd0 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -92,79 +92,89 @@ ip-link \- network device configuration .BR "ip link set " { .IR DEVICE " | " .BI "group " GROUP -.RB "} { " up " | " down " | " arp " { " on " | " off " } |" +.RB "} [ { " up " | " down " } ]" .br -.BR promisc " { " on " | " off " } |" +.RB "[ " arp " { " on " | " off " } ]" .br -.BR allmulticast " { " on " | " off " } |" +.RB "[ " dynamic " { " on " | " off " } ]" .br -.BR dynamic " { " on " | " off " } |" +.RB "[ " multicast " { " on " | " off " } ]" .br -.BR multicast " { " on " | " off " } |" +.RB "[ " allmulticast " { " on " | " off " } ]" .br -.BR protodown " { " on " | " off " } |" +.RB "[ " promisc " { " on " | " off " } ]" .br -.B txqueuelen -.IR PACKETS " |" +.RB "[ " protodown " { " on " | " off " } ]" .br -.B name -.IR NEWNAME " |" +.RB "[ " trailers " { " on " | " off " } ]" .br -.B address -.IR LLADDR " |" -.B broadcast -.IR LLADDR " |" +.RB "[ " txqueuelen +.IR PACKETS " ]" .br -.B mtu -.IR MTU " |" +.RB "[ " name +.IR NEWNAME " ]" .br -.B netns -.IR PID " |" +.RB "[ " address +.IR LLADDR " ]" .br -.B netns -.IR NETNSNAME " |" +.RB "[ " broadcast +.IR LLADDR " ]" .br -.B alias -.IR NAME " |" +.RB "[ " mtu +.IR MTU " ]" .br -.B vf +.RB "[ " netns " {" +.IR PID " | " NETNSNAME " } ]" +.br +.RB "[ " link-netnsid +.IR ID " ]" +.br +.RB "[ " alias +.IR NAME " ]" +.br +.RB "[ " vf .IR NUM " [" .B mac -.IR LLADDR " ] [" -.B vlan +.IR LLADDR " ]" +.br +.in +9 +.RB "[ " vlan .IR VLANID " [ " .B qos -.IR VLAN-QOS " ] ] [" -.B rate -.IR TXRATE " ] [" -.B max_tx_rate -.IR TXRATE " ] [" -.B min_tx_rate -.IR TXRATE " ] [" -.B spoofchk { on | off } ] [ -.B state { auto | enable | disable} ] [ -.B trust { on | off } -] | +.IR VLAN-QOS " ] ]" .br -.B master -.IR DEVICE " |" +.RB "[ " rate +.IR TXRATE " ]" .br -.B nomaster " |" +.RB "[ " max_tx_rate +.IR TXRATE " ]" .br -.B addrgenmode { eui64 | none | stable_secret | random } +.RB "[ " min_tx_rate +.IR TXRATE " ]" .br -.B link-netnsid ID -.BR " }" +.RB "[ " spoofchk " { " on " | " off " } ]" +.br +.RB "[ " state " { " auto " | " enable " | " disable " } ]" +.br +.RB "[ " trust " { " on " | " off " } ] ]" +.br +.in -9 +.RB "[ " master +.IR DEVICE " ]" +.br +.RB "[ " nomaster " ]" +.br +.RB "[ " addrgenmode " { " eui64 " | " none " | " stable_secret " | " random " } ]" .ti -8 .B ip link show .RI "[ " DEVICE " | " .B group -.IR GROUP " | " -.BR up " | " +.IR GROUP " ] [" +.BR up " ] [" .B master -.IR DEVICE " | " +.IR DEVICE " ] [" .B type .IR TYPE " ]" From 03cb9d58bc8282d156ff644ad5af15217855af54 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:19:59 +0100 Subject: [PATCH 11/39] man: ip-neighbour: Fix for missing NUD_STATE description Signed-off-by: Phil Sutter --- man/man8/ip-neighbour.8 | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/man/man8/ip-neighbour.8 b/man/man8/ip-neighbour.8 index c9b0256e..19c6d9d8 100644 --- a/man/man8/ip-neighbour.8 +++ b/man/man8/ip-neighbour.8 @@ -18,7 +18,9 @@ ip-neighbour \- neighbour/arp tables management. .IR ADDR " [ " .B lladdr .IR LLADDR " ] [ " -.BR nud " { " permanent " | " noarp " | " stale " | " reachable " } ] | " proxy +.B nud +.IR STATE " ] |" +.B proxy .IR ADDR " } [ " .B dev .IR DEV " ]" @@ -31,6 +33,9 @@ ip-neighbour \- neighbour/arp tables management. .B nud .IR STATE " ]" +.ti -8 +.IR STATE " := {" +.BR permanent " | " noarp " | " stale " | " reachable " }" .SH DESCRIPTION The @@ -75,7 +80,7 @@ can also be .BR "null" . .TP -.BI nud " NUD_STATE" +.BI nud " STATE" the state of the neighbour entry. .B nud is an abbreviation for 'Neighbour Unreachability Detection'. @@ -147,7 +152,7 @@ list neighbour proxies. only list neighbours which are not currently in use. .TP -.BI nud " NUD_STATE" +.BI nud " STATE" only list neighbour entries in this state. .I NUD_STATE takes values listed below or the special value From 57e1ace02a3e3d212c2d8809d63b729ea59d7427 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:20:00 +0100 Subject: [PATCH 12/39] man: ip-netns.8: Clarify synopsis a bit Use brackets to show that 'ip netns' defaults to action 'list', drop superfluous curly braces around 'set' action keyword. Signed-off-by: Phil Sutter --- man/man8/ip-netns.8 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8 index c9b0fbc2..c5310e24 100644 --- a/man/man8/ip-netns.8 +++ b/man/man8/ip-netns.8 @@ -13,7 +13,7 @@ ip-netns \- process network namespace management .BR help " }" .sp .ti -8 -.BR "ip netns" " { " list " } " +.BR "ip netns" " [ " list " ]" .ti -8 .B ip netns add @@ -24,7 +24,7 @@ ip-netns \- process network namespace management .RI "[ " NETNSNAME " ]" .ti -8 -.BR "ip netns" " { " set " } " +.B ip netns set .I NETNSNAME NETNSID .ti -8 From 54beacc33464d51db6c44f606d1be6cd75591c0d Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:20:01 +0100 Subject: [PATCH 13/39] man: ip-ntable.8: Review synopsis section The first line contained a c'n'p error, incorrectly listing 'ip address' syntax. Since PARAMS is used just once and there are not many other parameters to 'ip ntable change', state them inline and in addition to that clarify the possibility to pass multiple parameters at once. Signed-off-by: Phil Sutter --- man/man8/ip-ntable.8 | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/man/man8/ip-ntable.8 b/man/man8/ip-ntable.8 index 462e5896..4f0f2e54 100644 --- a/man/man8/ip-ntable.8 +++ b/man/man8/ip-ntable.8 @@ -8,7 +8,7 @@ ip-ntable - neighbour table configuration .ti -8 .B ip .RI "[ " OPTIONS " ]" -.B address +.B ntable .RI " { " COMMAND " | " .BR help " }" .sp @@ -17,34 +17,39 @@ ip-ntable - neighbour table configuration .BR "ip ntable change name" .IR NAME " [ " .B dev -.IR DEV " ] " PARMS - -.ti -8 -.IR PARMS " := { " +.IR DEV " ] [" .B thresh1 -.IR VAL " | " +.IR VAL " ] [" .B thresh2 -.IR VAL " | " +.IR VAL " ] [" .B thresh3 -.IR VAL " | " +.IR VAL " ] [" .B gc_int -.IR MSEC " | " +.IR MSEC " ] [" .B base_reachable -.IR MSEC " | " +.IR MSEC " ] [" .B retrans -.IR MSEC " | " "gc_stale MSEC " " | " +.IR MSEC " ] [" +.B gc_stale +.IR MSEC " ] [" .B delay_probe -.IR MSEC " | " "queue LEN " " | " +.IR MSEC " ] [" +.B queue +.IR LEN " ] [" .B app_probs -.IR VAL " | " +.IR VAL " ] [" .B ucast_probes -.IR VAL " | " "mcast_probes VAL " " | " +.IR VAL " ] [" +.B mcast_probes +.IR VAL " ] [" .B anycast_delay -.IR MSEC " | " +.IR MSEC " ] [" .B proxy_delay -.IR MSEC " | " "proxy_queue LEN " " | " +.IR MSEC " ] [" +.B proxy_queue +.IR LEN " ] [" .B locktime -.IR MSEC " }" +.IR MSEC " ]" .ti -8 .BR "ip ntable show" " [ " From 582b0fc6cb443ebe71b82654aa6307d55cd1fc1a Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:20:02 +0100 Subject: [PATCH 14/39] man: ip-rule.8: Review synopsis section Clarify that 'ip rule' defaults to action 'list', that 'flush' and 'save' actions don't accept additional parameters, add missing 'not' and 'goto' keywords and finally fix fonts used in 'fwmark' and 'realms' parameters. Signed-off-by: Phil Sutter --- man/man8/ip-rule.8 | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8 index b7008c6a..e9fbb3cf 100644 --- a/man/man8/ip-rule.8 +++ b/man/man8/ip-rule.8 @@ -9,20 +9,26 @@ ip-rule \- routing policy database management .B ip .RI "[ " OPTIONS " ]" .B rule -.RI " { " COMMAND " | " +.RI "{ " COMMAND " | " .BR help " }" .sp .ti -8 .B ip rule -.RB " [ " list " | " add " | " del " | " flush " | " save " ]" +.RB "[ " list " ]" + +.ti -8 +.B ip rule +.RB "{ " add " | " del " }" .I SELECTOR ACTION .ti -8 -.B ip rule " restore " +.B ip rule +.RB "{ " flush " | " save " | " restore " }" .ti -8 .IR SELECTOR " := [ " +.BR not " ] [" .B from .IR PREFIX " ] [ " .B to @@ -30,7 +36,7 @@ ip-rule \- routing policy database management .B tos .IR TOS " ] [ " .B fwmark -.IR FWMARK[/MASK] " ] [ " +.IR FWMARK\fR[\fB/\fIMASK "] ] [ " .B iif .IR STRING " ] [ " .B oif @@ -45,8 +51,9 @@ ip-rule \- routing policy database management .B nat .IR ADDRESS " ] [ " .B realms -.RI "[" SRCREALM "/]" DSTREALM " ]" -.I SUPPRESSOR +.RI "[" SRCREALM "\fB/\fR]" DSTREALM " ] [" +.B goto +.IR NUMBER " ] " SUPPRESSOR .ti -8 .IR SUPPRESSOR " := [ " From 16a124ea2dac186d4c54f3bc166a7be4882855ba Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:20:03 +0100 Subject: [PATCH 15/39] man: ip-token.8: Review synopsis section Drop unnecessary curly braces around single action keywords, point out that 'dev' parameter to 'ip token get' is optional and clarify that 'ip token' defaults to 'list' action. Signed-off-by: Phil Sutter --- man/man8/ip-token.8 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/man/man8/ip-token.8 b/man/man8/ip-token.8 index 35a3d1e3..260f366a 100644 --- a/man/man8/ip-token.8 +++ b/man/man8/ip-token.8 @@ -7,23 +7,23 @@ ip-token \- tokenized interface identifier support .in +8 .ti -8 .B ip token -.RI " { " COMMAND " | " +.RI "{ " COMMAND " | " .BR help " }" .sp .ti -8 -.BR "ip token" " { " set " } " +.B ip token set .IR TOKEN .B dev .IR DEV .ti -8 -.BR "ip token" " { " get " } " -.B dev -.IR DEV +.B ip token get +.RB "[ " dev +.IR DEV " ]" .ti -8 -.BR "ip token" " { " list " }" +.BR "ip token" " [ " list " ]" .SH "DESCRIPTION" IPv6 tokenized interface identifier support is used for assigning well-known From 5d8cb0900e9fd927a81a91c79434cbf847463078 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:20:04 +0100 Subject: [PATCH 16/39] man: ip-tunnel.8: Document missing 6rd action Also drop the non-terminal 'TIME' description as it is not referenced anywhere. Signed-off-by: Phil Sutter --- man/man8/ip-tunnel.8 | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/man/man8/ip-tunnel.8 b/man/man8/ip-tunnel.8 index 8b746cb0..4938c740 100644 --- a/man/man8/ip-tunnel.8 +++ b/man/man8/ip-tunnel.8 @@ -11,7 +11,7 @@ ip-tunnel - tunnel configuration .ti -8 .BR "ip " .RI "[ " OPTIONS " ]" -.BR "tunnel" " { " add " | " change " | " del " | " show " | " prl " }" +.BR "tunnel" " { " add " | " change " | " del " | " show " | " prl " | " 6rd " }" .RI "[ " NAME " ]" .br .RB "[ " mode @@ -42,6 +42,12 @@ ip-tunnel - tunnel configuration .B prl-delete .IR ADDR " ]" .br +.RB "[ " 6rd-prefix +.IR ADDR " ] [" +.B 6rd-relay_prefix +.IR ADDR " ] [ +.BR 6rd-reset " ]" +.br .RB "[ [" no "]" pmtudisc " ]" .RB "[ " dev .IR PHYS_DEV " ]" @@ -75,9 +81,6 @@ ip-tunnel - tunnel configuration .ti -8 .IR KEY " := { " DOTTED_QUAD " | " NUMBER " }" -.ti -8 -.IR TIME " := " NUMBER "[s|ms]" - .SH DESCRIPTION .B tunnel objects are tunnels, encapsulating packets in IP packets and then From a7eef7aa70b2d7ea2a78841e1cc188fa416e0f7f Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:20:05 +0100 Subject: [PATCH 17/39] man: ip-xfrm.8: Document missing parameters Namely, 'extra-flag' of 'ip xfrm state' and 'flag' of 'ip xfrm policy'. Signed-off-by: Phil Sutter --- man/man8/ip-xfrm.8 | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/man/man8/ip-xfrm.8 b/man/man8/ip-xfrm.8 index dae07288..11f71047 100644 --- a/man/man8/ip-xfrm.8 +++ b/man/man8/ip-xfrm.8 @@ -57,6 +57,8 @@ ip-xfrm \- transform configuration .IR ADDR "[/" PLEN "] ]" .RB "[ " ctx .IR CTX " ]" +.RB "[ " extra-flag +.IR EXTRA-FLAG-LIST " ]" .ti -8 .B "ip xfrm state allocspi" @@ -195,6 +197,13 @@ ip-xfrm \- transform configuration .RB "{ " espinudp " | " espinudp-nonike " }" .IR SPORT " " DPORT " " OADDR +.ti -8 +.IR EXTRA-FLAG-LIST " := [ " EXTRA-FLAG-LIST " ] " EXTRA-FLAG + +.ti -8 +.IR EXTRA-FLAG " := " +.B dont-encap-dscp + .ti -8 .BR "ip xfrm policy" " { " add " | " update " }" .I SELECTOR @@ -247,6 +256,8 @@ ip-xfrm \- transform configuration .IR ACTION " ]" .RB "[ " priority .IR PRIORITY " ]" +.RB "[ " flag +.IR FLAG-LIST "]" .ti -8 .B "ip xfrm policy flush" From ac0eff58fd826683107e47c9df085b4b4e92ec66 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:20:06 +0100 Subject: [PATCH 18/39] man: ip.8: Add missing flags and token subcommand description Signed-off-by: Phil Sutter --- man/man8/ip.8 | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/man/man8/ip.8 b/man/man8/ip.8 index b1f69073..aa2bc68c 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -21,7 +21,7 @@ ip \- show / manipulate routing, devices, policy routing and tunnels .IR OBJECT " := { " .BR link " | " address " | " addrlabel " | " route " | " rule " | " neigh " | "\ ntable " | " tunnel " | " tuntap " | " maddress " | " mroute " | " mrule " | "\ - monitor " | " xfrm " | " netns " | " l2tp " | " tcp_metrics " }" + monitor " | " xfrm " | " netns " | " l2tp " | " tcp_metrics " | " token " }" .sp .ti -8 @@ -29,10 +29,22 @@ ip \- show / manipulate routing, devices, policy routing and tunnels \fB\-V\fR[\fIersion\fR] | \fB\-h\fR[\fIuman-readable\fR] | \fB\-s\fR[\fItatistics\fR] | +\fB\-d\fR[\fIetails\fR] | \fB\-r\fR[\fIesolve\fR] | +\fB\-iec\fR | \fB\-f\fR[\fIamily\fR] { .BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | " +\fB-4\fR | +\fB-6\fR | +\fB-I\fR | +\fB-D\fR | +\fB-B\fR | +\fB-0\fR | +\fB-l\fR[\fIoops\fR] { \fBmaximum-addr-flush-attempts\fR } | \fB\-o\fR[\fIneline\fR] | +\fB\-rc\fR[\fIvbuf\fR] [\fBsize\fR] | +\fB\-t\fR[\fIimestamp\fR] | +\fB\-ts\fR[\fIhort\fR] | \fB\-n\fR[\fIetns\fR] name | \fB\-a\fR[\fIll\fR] | \fB\-c\fR[\fIolor\fR] } @@ -179,6 +191,16 @@ Use color output. .BR "\-t" , " \-timestamp" display current time when using monitor option. +.TP +.BR "\-ts" , " \-tshort" +Like +.BR \-timestamp , +but use shorter format. + +.TP +.BR "\-rc" , " \-rcvbuf" +Set the netlink socket receive buffer size, defaults to 1MB. + .SH IP - COMMAND SYNTAX .SS @@ -240,6 +262,10 @@ display current time when using monitor option. .B tcp_metrics/tcpmetrics - manage TCP Metrics +.TP +.B token +- manage tokenized interface identifiers. + .TP .B tunnel - tunnel over IP. @@ -305,6 +331,7 @@ was written by Alexey N. Kuznetsov and added in Linux 2.2. .BR ip-route (8), .BR ip-rule (8), .BR ip-tcp_metrics (8), +.BR ip-token (8), .BR ip-tunnel (8), .BR ip-xfrm (8) .br From 2227f2a5a28fa0fc3cc4257ccc9528cdc81996ac Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:20:07 +0100 Subject: [PATCH 19/39] man: ip-l2tp.8: Fix BNF syntax The 'ADDR' part of 'local' and 'remote' parameters is not optional, but may also consist of the word 'any'. While at it, add missing whitespace and fix fonts. Signed-off-by: Phil Sutter --- man/man8/ip-l2tp.8 | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/man/man8/ip-l2tp.8 b/man/man8/ip-l2tp.8 index 1738035f..5b7041f9 100644 --- a/man/man8/ip-l2tp.8 +++ b/man/man8/ip-l2tp.8 @@ -15,10 +15,7 @@ ip-l2tp - L2TPv3 static unmanaged tunnel configuration .ti -8 .BR "ip l2tp add tunnel" .br -.B remote -.RI "[ " ADDR " ]" -.B local -.RI "[ " ADDR " ]" +.BI remote " ADDR " local " ADDR " .br .B tunnel_id .IR ID @@ -73,24 +70,21 @@ ip-l2tp - L2TPv3 static unmanaged tunnel configuration .IR ID .br .ti -8 -.BR "ip l2tp show tunnel" -.B "[" tunnel_id -.IR ID -.B "]" +.BR "ip l2tp show tunnel" " [ " tunnel_id +.IR ID " ]" .br .ti -8 -.BR "ip l2tp show session" -.B "[" tunnel_id -.IR ID -.B "] [" session_id -.IR ID -.B "]" +.BR "ip l2tp show session" " [ " tunnel_id +.IR ID .B " ] [" +.B session_id +.IR ID " ]" .br .ti -8 .IR NAME " := " .IR STRING .ti -8 -.IR ADDR " := { " IP_ADDRESS " }" +.IR ADDR " := { " IP_ADDRESS " |" +.BR any " }" .ti -8 .IR PORT " := { " NUMBER " }" .ti -8 From e895ae0b31864df9f7dc16254d6dd2084f7bd523 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 2 Mar 2016 19:20:08 +0100 Subject: [PATCH 20/39] man: ip-*.8: drop any reference to generic ip options Listing generic 'ip' options in subcommand man pages is redundant and error-prone, as they won't be kept in sync anyway. Since many other man pages don't list them either, drop references to them in the remaining ones. Signed-off-by: Phil Sutter --- man/man8/ip-addrlabel.8 | 14 +------------- man/man8/ip-link.8.in | 15 +-------------- man/man8/ip-monitor.8 | 4 +--- man/man8/ip-mroute.8 | 2 +- 4 files changed, 4 insertions(+), 31 deletions(-) diff --git a/man/man8/ip-addrlabel.8 b/man/man8/ip-addrlabel.8 index 51ef5727..233d6067 100644 --- a/man/man8/ip-addrlabel.8 +++ b/man/man8/ip-addrlabel.8 @@ -6,21 +6,9 @@ ip-addrlabel \- protocol address label management .ad l .in +8 .ti -8 -.B ip -.RI "[ " OPTIONS " ]" -.B addrlabel +.B ip addrlabel .RI " { " COMMAND " | " .BR help " }" -.sp - -.ti -8 -.IR OPTIONS " := { " -\fB\-V\fR[\fIersion\fR] | -\fB\-s\fR[\fItatistics\fR] | -\fB\-r\fR[\fIesolve\fR] | -\fB\-f\fR[\fIamily\fR] { -.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | " -\fB\-o\fR[\fIneline\fR] } .ti -8 .BR "ip addrlabel" " { " add " | " del " } " prefix diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index e402fdd0..c6a9c862 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -6,24 +6,11 @@ ip-link \- network device configuration .ad l .in +8 .ti -8 -.B ip -.RI "[ " OPTIONS " ]" -.B link +.B ip link .RI " { " COMMAND " | " .BR help " }" .sp -.ti -8 -.IR OPTIONS " := { " -\fB\-V\fR[\fIersion\fR] | -\fB\-h\fR[\fIuman-readable\fR] | -\fB\-s\fR[\fItatistics\fR] | -\fB\-r\fR[\fIesolve\fR] | -\fB\-f\fR[\fIamily\fR] { -.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | " -\fB\-o\fR[\fIneline\fR] | -\fB\-br\fR[\fIief\fR] } - .ti -8 .BI "ip link add" .RB "[ " link diff --git a/man/man8/ip-monitor.8 b/man/man8/ip-monitor.8 index d2bd381a..86f8f988 100644 --- a/man/man8/ip-monitor.8 +++ b/man/man8/ip-monitor.8 @@ -6,9 +6,7 @@ ip-monitor, rtmon \- state monitoring .ad l .in +8 .ti -8 -.BR "ip " " [ " -.IR ip-OPTIONS " ]" -.BR "monitor" " [ " all " |" +.BR "ip monitor" " [ " all " |" .IR OBJECT-LIST " ] [" .BI file " FILENAME " ] [ diff --git a/man/man8/ip-mroute.8 b/man/man8/ip-mroute.8 index e89b6b2d..b64e30d3 100644 --- a/man/man8/ip-mroute.8 +++ b/man/man8/ip-mroute.8 @@ -6,7 +6,7 @@ ip-mroute \- multicast routing cache management .ad l .in +8 .ti -8 -.BR "ip " " [ ip-OPTIONS ] " "mroute show" " [ [ " +.BR "ip mroute show" " [ [ " .BR " to " " ] " .IR PREFIX " ] [ " .B from From 1b5440e94fa6392add98458d0790370b86c577a7 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:36 +0100 Subject: [PATCH 21/39] man: Add a man page for the connmark action Cc: Felix Fietkau Signed-off-by: Phil Sutter --- man/man8/tc-connmark.8 | 55 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 man/man8/tc-connmark.8 diff --git a/man/man8/tc-connmark.8 b/man/man8/tc-connmark.8 new file mode 100644 index 00000000..bb4cf754 --- /dev/null +++ b/man/man8/tc-connmark.8 @@ -0,0 +1,55 @@ +.TH "Connmark retriever action in tc" 8 "11 Jan 2016" "iproute2" "Linux" + +.SH NAME +connmark - netfilter connmark retriever action +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action connmark " [ " zone" +.IR u16_zone_index " ] [ " BRANCH " ] [" +.BI index " u32_index " +] + +.ti -8 +.IR BRANCH " := { " reclassify " | " pipe " | " drop " | " continue " | " ok " }" +.SH DESCRIPTION +The connmark action is used to restore the connection's mark value into the +packet's fwmark. +.SH OPTIONS +.TP +.BI zone " u16_zone_index" +Specify the conntrack zone when doing conntrack lookups for packets. +.I u16_zone_index +is a 16bit unsigned decimal value. +.TP +.I BRANCH +How to continue after executing this action. +.RS +.TP +.B reclassify +Restarts classification by jumping back to the first filter attached to this +action's parent. +.TP +.B pipe +Continue with the next action, this is the default. +.TP +.B drop +.TQ +.B shot +Packet will be dropped without running further actions. +.TP +.B continue +Continue classification with next filter in line. +.TP +.B pass +Return to calling qdisc for packet processing. This ends the classification +process. +.RE +.TP +.BI index " u32_index " +Specify an index for this action in order to being able to identify it in later +commands. +.I u32_index +is a 32bit unsigned decimal value. +.SH SEE ALSO +.BR tc (8) From 438dd1d49d71b5d19f51637c50005f11755558dc Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:37 +0100 Subject: [PATCH 22/39] man: Add a man page for the csum action. Cc: Gregoire Baron Signed-off-by: Phil Sutter --- man/man8/tc-csum.8 | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 man/man8/tc-csum.8 diff --git a/man/man8/tc-csum.8 b/man/man8/tc-csum.8 new file mode 100644 index 00000000..9d00aae3 --- /dev/null +++ b/man/man8/tc-csum.8 @@ -0,0 +1,54 @@ +.TH "Checksum action in tc" 8 "11 Jan 2015" "iproute2" "Linux" + +.SH NAME +csum - checksum update action +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action csum" +.I UPDATE + +.ti -8 +.IR UPDATE " := " TARGET " [ " UPDATE " ]" + +.ti -8 +.IR TARGET " := { " +.BR ip4h " |" +.BR icmp " |" +.BR igmp " |" +.BR tcp " |" +.BR udp " |" +.BR udplite " |" +.IR SWEETS " }" + +.ti -8 +.IR SWEETS " := { " +.BR and " | " or " | " + " }" +.SH DESCRIPTION +The +.B csum +action triggers checksum recalculation of specified packet headers. It is +commonly used after packet editing using the +.B pedit +action to fix for then incorrect checksums. +.SH OPTIONS +.TP +.I TARGET +Specify which headers to update: IPv4 header +.RB ( ip4h ), +ICMP header +.RB ( icmp ), +IGMP header +.RB ( igmp ), +TCP header +.RB ( tcp ), +UDP header +.RB ( udp ") or" +UDPLite header +.RB ( udplite ). +.TP +.B SWEETS +These are merely syntactic sugar and ignored internally. +.SH SEE ALSO +.BR tc (8), +.BR tc-pedit (8) From 61d74eed701f8a54c9f0c549cbad0722e53482ff Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:38 +0100 Subject: [PATCH 23/39] man: Add a man page for the mirred action Signed-off-by: Phil Sutter --- man/man8/tc-mirred.8 | 89 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 man/man8/tc-mirred.8 diff --git a/man/man8/tc-mirred.8 b/man/man8/tc-mirred.8 new file mode 100644 index 00000000..52d98bc4 --- /dev/null +++ b/man/man8/tc-mirred.8 @@ -0,0 +1,89 @@ +.TH "Mirror/redirect action in tc" 8 "11 Jan 2015" "iproute2" "Linux" + +.SH NAME +mirred - mirror/redirect action +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action mirred" +.I DIRECTION ACTION +.RB "[ " index +.IR INDEX " ] " +.BI dev " DEVICENAME" + +.ti -8 +.IR DIRECTION " := { " +.BR ingress " | " egress " }" + +.ti -8 +.IR ACTION " := { " +.BR mirror " | " redirect " }" +.SH DESCRIPTION +The +.B mirred +action allows to redirect or mirror packets to another network interface on the +same system. It is typically used in combination with the +.B ifb +pseudo device to create a shrared instance where QoS happens, but serves well +for debugging or monitoring purposes, too. +.SH OPTIONS +.TP +.B ingress +.TQ +.B egress +Specify the direction in which the packet shall appear on the destination +interface. Currently only +.B egress +is implemented. +.TP +.B mirror +.TQ +.B redirect +Define whether the packet should be copied +.RB ( mirror ) +or moved +.RB ( redirect ) +to the destination interface. +.TP +.BI index " INDEX" +Assign a unique ID to this action instead of letting the kernel choose one +automatically. +.I INDEX +is a 32bit unsigned integer greater than zero. +.TP +.BI dev " DEVICENAME" +Specify the network interface to redirect or mirror to. +.SH EXAMPLES +Limit ingress bandwidth on eth0 to 1mbit/s, redirect exceeding traffic to lo for +debugging purposes: + +.RS +.EX +# tc qdisc add dev eth0 handle ffff: ingress +# tc filter add dev eth0 parent ffff: u32 \\ + match u32 0 0 \\ + action police rate 1mbit burst 100k conform-exceed pipe \\ + action mirred egress redirect dev lo +.EE +.RE + +Use an +.B ifb +interface to send ingress traffic on eth0 through an instance of +.BR sfq : + +.RS +.EX +# modprobe ifb +# ip link set ifb0 up +# tc qdisc add dev ifb0 root sfq +# tc qdisc add dev eth0 handle ffff: ingress +# tc filter add dev eth0 parent ffff: u32 \\ + match u32 0 0 \\ + action mirred egress redirect dev ifb0 +.EE +.RE + +.SH SEE ALSO +.BR tc (8), +.BR tc-u32 (8) From ec0bab1e028a9a8178ae18c4fa6ca600dcf167ba Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:39 +0100 Subject: [PATCH 24/39] man: Add a man page for the nat action Cc: Herbert Xu Signed-off-by: Phil Sutter --- man/man8/tc-nat.8 | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 man/man8/tc-nat.8 diff --git a/man/man8/tc-nat.8 b/man/man8/tc-nat.8 new file mode 100644 index 00000000..fdcc052a --- /dev/null +++ b/man/man8/tc-nat.8 @@ -0,0 +1,78 @@ +.TH "NAT action in tc" 8 "12 Jan 2015" "iproute2" "Linux" + +.SH NAME +nat - stateless native address translation action +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action nat" +.I DIRECTION OLD NEW + +.ti -8 +.IR DIRECTION " := { " +.BR ingress " | " egress " }" + +.ti -8 +.IR OLD " := " IPV4_ADDR_SPEC + +.ti -8 +.IR NEW " := " IPV4_ADDR_SPEC + +.ti -8 +.IR IPV4_ADDR_SPEC " := { " +.BR default " | " any " | " all " | " +\fIin_addr\fR[\fB/\fR{\fIprefix\fR|\fInetmask\fR}] +.SH DESCRIPTION +The +.B nat +action allows to perform NAT without the overhead of conntrack, which is +desirable if the number of flows or addresses to perform NAT on is large. This +action is best used in combination with the +.B u32 +filter to allow for efficient lookups of a large number of stateless NAT rules +in constant time. +.SH OPTIONS +.TP +.B ingress +Translate destination addresses, i.e. perform DNAT. +.TP +.B egress +Translate source addresses, i.e. perform SNAT. +.TP +.I OLD +Specifies addresses which should be translated. +.TP +.I NEW +Specifies addresses which +.I OLD +should be translated into. +.SH NOTES +The accepted address format in +.IR OLD " and " NEW +is quite flexible. It may either consist of one of the keywords +.BR default ", " any " or " all , +representing the all-zero IP address or a combination of IP address and netmask +or prefix length separated by a slash +.RB ( / ) +sign. In any case, the mask (or prefix length) value of +.I OLD +is used for +.I NEW +as well so that a one-to-one mapping of addresses is assured. + +Address translation is done using a combination of binary operations. First, the +original (source or destination) address is matched against the value of +.IR OLD . +If the original address fits, the new address is created by taking the leading +bits from +.I NEW +(defined by the netmask of +.IR OLD ) +and taking the remaining bits from the original address. + +There is rudimental support for upper layer protocols, namely TCP, UDP and ICMP. +While for the first two only checksum recalculation is performed, the action +also takes care of embedded IP headers in ICMP packets by translating the +respective address therein, too. +.SH SEE ALSO +.BR tc (8) From 448800026ff7189f297233c6588457a7e9770183 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:40 +0100 Subject: [PATCH 25/39] man: Add a man page for the pedit action Signed-off-by: Phil Sutter --- man/man8/tc-pedit.8 | 230 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100644 man/man8/tc-pedit.8 diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8 new file mode 100644 index 00000000..c30927ec --- /dev/null +++ b/man/man8/tc-pedit.8 @@ -0,0 +1,230 @@ +.TH "Generic packet editor action in tc" 8 "12 Jan 2015" "iproute2" "Linux" + +.SH NAME +pedit - generic packet editor action +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action pedit munge " { +.IR RAW_OP " | " LAYERED_OP " } [ " BRANCH " ]" + +.ti -8 +.IR RAW_OP " := " +.BI offset " OFFSET" +.RB "{ " u8 " | " u16 " | " u32 " } [" +.IR AT_SPEC " ] " CMD_SPEC + +.ti -8 +.IR AT_SPEC " := " +.BI at " AT " offmask " MASK " shift " SHIFT" + +.ti -8 +.IR LAYERED_OP " := { " +.BI ip " IPHDR_FIELD" +| +.BI ip6 " IP6HDR_FIELD" +| +.BI udp " UDPHDR_FIELD" +| +.BI tcp " TCPHDR_FIELD" +| +.BI icmp " ICMPHDR_FIELD" +.RI } " CMD_SPEC" + +.ti -8 +.IR IPHDR_FIELD " := { " +.BR src " | " dst " | " tos " | " dsfield " | " ihl " | " protocol " |" +.BR precedence " | " nofrag " | " firstfrag " | " ce " | " df " |" +.BR mf " | " dport " | " sport " | " icmp_type " | " icmp_code " }" + +.ti -8 +.IR CMD_SPEC " := {" +.BR clear " | " invert " | " set +.IR VAL " | " +.BR preserve " } [ " retain +.IR RVAL " ]" + +.ti -8 +.IR BRANCH " := {" +.BR reclassify " | " pipe " | " drop " | " shot " | " continue " | " pass " }" +.SH DESCRIPTION +The +.B pedit +action can be used to change arbitrary packet data. The location of data to +change can either be specified by giving an offset and size as in +.IR RAW_OP , +or for header values by naming the header and field to edit the size is then +chosen automatically based on the header field size. Currently this is supported +only for IPv4 headers. +.SH OPTIONS +.TP +.BI offset " OFFSET " "\fR{ \fBu32 \fR| \fBu16 \fR| \fBu8 \fR}" +Specify the offset at which to change data. +.I OFFSET +is a signed integer, it's base is automatically chosen (e.g. hex if prefixed by +.B 0x +or octal if prefixed by +.BR 0 ). +The second argument specifies the length of data to change, that is four bytes +.RB ( u32 ), +two bytes +.RB ( u16 ) +or a single byte +.RB ( u8 ). +.TP +.BI at " AT " offmask " MASK " shift " SHIFT" +This is an optional part of +.IR RAW_OP +which allows to have a variable +.I OFFSET +depending on packet data at offset +.IR AT , +which is binary ANDed with +.I MASK +and right-shifted by +.I SHIFT +before adding it to +.IR OFFSET . +.TP +.BI ip " IPHDR_FIELD" +Change an IPv4 header field. The supported keywords for +.I IPHDR_FIELD +are: +.RS +.TP +.B src +.TQ +.B dst +Source or destination IP address, a four-byte value. +.TP +.B tos +.TQ +.B dsfield +.TQ +.B precedence +Type Of Service field, an eight-bit value. +.TP +.B ihl +Change the IP Header Length field, a four-bit value. +.TP +.B protocol +Next-layer Protocol field, an eight-bit value. +.TP +.B nofrag +.TQ +.B firstfrag +.TQ +.B ce +.TQ +.B df +.TQ +.B mf +Change IP header flags. Note that the value to pass to the +.B set +command is not just a bit value, but the full byte including the flags field. +Though only the relevant bits of that value are respected, the rest ignored. +.TP +.B dport +.TQ +.B sport +Destination or source port numbers, a 16-bit value. Indeed, IPv4 headers don't +contain this information. Instead, this will set an offset which suits at least +TCP and UDP if the IP header is of minimum size (20 bytes). If not, this will do +unexpected things. +.TP +.B icmp_type +.TQ +.B icmp_code +Again, this allows to change data past the actual IP header itself. It assumes +an ICMP header is present immediately following the (minimal sized) IP header. +If it is not or the latter is bigger than the minimum of 20 bytes, this will do +unexpected things. These fields are eight-bit values. +.RE +.TP +.B clear +Clear the addressed data (i.e., set it to zero). +.TP +.B invert +Swap every bit in the addressed data. +.TP +.BI set " VAL" +Set the addressed data to a specific value. The size of +.I VAL +is defined by either one of the +.BR u32 ", " u16 " or " u8 +keywords in +.IR RAW_OP , +or the size of the addressed header field in +.IR LAYERED_OP . +.TP +.B preserve +Keep the addressed data as is. +.TP +.BI retain " RVAL" +This optional extra part of +.I CMD_SPEC +allows to exclude bits from being changed. +.TP +.I BRANCH +The following keywords allow to control how the tree of qdisc, classes, +filters and actions is further traversed after this action. +.RS +.TP +.B reclassify +Restart with the first filter in the current list. +.TP +.B pipe +Continue with the next action attached to the same filter. +.TP +.B drop +.TQ +.B shot +Drop the packet. +.TP +.B continue +Continue classification with the next filter in line. +.TP +.B pass +Finish classification process and return to calling qdisc for further packet +processing. This is the default. +.RE +.SH EXAMPLES +Being able to edit packet data, one could do all kinds of things, such as e.g. +implementing port redirection. Certainly not the most useful application, but +as an example it should do: + +First, qdiscs need to be set up to attach filters to. For the receive path, a simple +.B ingress +qdisc will do, for transmit path a classful qdisc +.RB ( HTB +in this case) is necessary: + +.RS +.EX +tc qdisc replace dev eth0 root handle 1: htb +tc qdisc add dev eth0 ingress handle ffff: +.EE +.RE + +Finally, a filter with +.B pedit +action can be added for each direction. In this case, +.B u32 +is used matching on the port number to redirect from, while +.B pedit +then does the actual rewriting: + +.RS +.EX +tc filter add dev eth0 parent 1: u32 \\ + match ip dport 23 0xffff \\ + action pedit pedit munge ip dport set 22 +tc filter add dev eth0 parent ffff: u32 \\ + match ip sport 22 0xffff \\ + action pedit pedit munge ip sport set 23 +.EE +.RE +.SH SEE ALSO +.BR tc (8), +.BR tc-htb (8), +.BR tc-u32 (8) From d477eea5a6dcb1fe42f8106f2172eaced379eabc Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:41 +0100 Subject: [PATCH 26/39] man: Add a man page for the police action Cc: Alexey Kuznetsov Signed-off-by: Phil Sutter --- man/man8/tc-police.8 | 127 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 man/man8/tc-police.8 diff --git a/man/man8/tc-police.8 b/man/man8/tc-police.8 new file mode 100644 index 00000000..2b1537ec --- /dev/null +++ b/man/man8/tc-police.8 @@ -0,0 +1,127 @@ +.TH "Policing action in tc" 8 "20 Jan 2015" "iproute2" "Linux" + +.SH NAME +police - policing action +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action police" +.BI rate " RATE " burst +.IR BYTES [\fB/ BYTES "] [" +.B mtu +.IR BYTES [\fB/ BYTES "] ] [" +.BI peakrate " RATE" +] [ +.BI avrate " RATE" +] [ +.BI overhead " BYTES" +] [ +.BI linklayer " TYPE" +] [ +.BI conform-exceed " EXCEEDACT\fR[\fB/\fIEXCEEDACT\fR]" + +.ti -8 +.IR EXCEEDACT " := { " +.BR pipe " | " ok " | " reclassify " | " drop " | " continue " }" +.SH DESCRIPTION +The +.B police +action allows to limit bandwidth of traffic matched by the filter it is +attached to. +.SH OPTIONS +.TP +.BI rate " RATE" +The maximum traffic rate of packets passing this action. Those exceeding it will +be treated as defined by the +.B conform-exceed +option. +.TP +.BI burst " BYTES\fR[\fB/\fIBYTES\fR]" +Set the maximum allowed burst in bytes, optionally followed by a slash ('/') +sign and cell size which must be a power of 2. +.TP +.BI mtu " BYTES\fR[\fB/\fIBYTES\fR]" +This is the maximum packet size handled by the policer (larger ones will be +handled like they exceeded the configured rate). Setting this value correctly +will improve the scheduler's precision. +Value formatting is identical to +.B burst +above. Defaults to unlimited. +.TP +.BI peakrate " RATE" +Set the maximum bucket depletion rate, exceeding +.BR rate . +.TP +.BI avrate " RATE" +Make use of an in-kernel bandwidth rate estimator and match the given +.I RATE +against it. +.TP +.BI overhead " BYTES" +Account for protocol overhead of encapsulating output devices when computing +.BR rate " and " peakrate . +.TP +.BI linklayer " TYPE" +Specify the link layer type. +.I TYPE +may be one of +.B ethernet +(the default), +.BR atm " or " adsl +(which are synonyms). It is used to align the precomputed rate tables to ATM +cell sizes, for +.B ethernet +no action is taken. +.TP +.BI conform-exceed " EXCEEDACT\fR[\fB/\fIEXCEEDACT\fR]" +Define how to handle packets which exceed (and, if the second +.I EXCEEDACT +is given, also those who don't), the configured bandwidth limit. Possible values +are: +.RS +.IP continue +Don't do anything, just continue with the next action in line. +.IP drop +Drop the packet immediately. +.IP shot +This is a synonym to +.BR drop . +.IP ok +Accept the packet. This is the default for conforming packets. +.IP pass +This is a synonym to +.BR ok . +.IP reclassify +Treat the packet as non-matching to the filter this action is attached to and +continue with the next filter in line (if any). This is the default for +exceeding packets. +.IP pipe +Pass the packet to the next action in line. +.SH EXAMPLES +A typical application of the police action is to enforce ingress traffic rate +by dropping exceeding packets. Although better done on the sender's side, +especially in scenarios with lack of peer control (e.g. with dial-up providers) +this is often the best one can do in order to keep latencies low under high +load. The following establishes input bandwidth policing to 1mbit/s using the +.B ingress +qdisc and +.B u32 +filter: + +.RS +.EX +# tc qdisc add dev eth0 handle ffff: ingress +# tc filter add dev eth0 parent ffff: u32 \\ + match u32 0 0 \\ + police rate 1mbit burst 100k +.EE +.RE + +As an action can not live on it's own, there always has to be a filter involved as link between qdisc and action. The example above uses +.B u32 +for that, which is configured to effectively match any packet (passing it to the +.B police +action thereby). + +.SH SEE ALSO +.BR tc (8) From ebf9933bb3d5c6a869edb7405f7a4759862409ab Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:42 +0100 Subject: [PATCH 27/39] man: Add a man page for the simple action Signed-off-by: Phil Sutter --- man/man8/tc-simple.8 | 76 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 man/man8/tc-simple.8 diff --git a/man/man8/tc-simple.8 b/man/man8/tc-simple.8 new file mode 100644 index 00000000..2206dc3b --- /dev/null +++ b/man/man8/tc-simple.8 @@ -0,0 +1,76 @@ +.TH "Simple action in tc" 8 "12 Jan 2015" "iproute2" "Linux" + +.SH NAME +simple - basic example action +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action simple" +.I STRING +.SH DESCRIPTION +This is a pedagogical example rather than an actually useful action. Upon every access, it prints the given +.I STRING +which may be of arbitrary length. +.SH OPTIONS +.TP +.I STRING +The actual string to print. +.SH EXAMPLES +The following example makes the kernel yell "Incoming ICMP!" every time it sees +an incoming ICMP on eth0. Steps are: +.IP 1) 4 +Add an ingress qdisc point to eth0 +.IP 2) 4 +Start a chain on ingress of eth0 that first matches ICMP then invokes the +simple action to shout. +.IP 3) 4 +display stats and show that no packet has been seen by the action +.IP 4) 4 +Send one ping packet to google (expect to receive a response back) +.IP 5) 4 +grep the logs to see the logged message +.IP 6) 4 +display stats again and observe increment by 1 + +.RE +.EX + hadi@noma1:$ tc qdisc add dev eth0 ingress + hadi@noma1:$tc filter add dev eth0 parent ffff: protocol ip prio 5 \\ + u32 match ip protocol 1 0xff flowid 1:1 action simple "Incoming ICMP" + + hadi@noma1:$ sudo tc -s filter ls dev eth0 parent ffff: + filter protocol ip pref 5 u32 + filter protocol ip pref 5 u32 fh 800: ht divisor 1 + filter protocol ip pref 5 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 + match 00010000/00ff0000 at 8 + action order 1: Simple + index 4 ref 1 bind 1 installed 29 sec used 29 sec + Action statistics: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + + + hadi@noma1$ ping -c 1 www.google.ca + PING www.google.ca (74.125.225.120) 56(84) bytes of data. + 64 bytes from ord08s08-in-f24.1e100.net (74.125.225.120): icmp_req=1 ttl=53 time=31.3 ms + + --- www.google.ca ping statistics --- + 1 packets transmitted, 1 received, 0% packet loss, time 0ms + rtt min/avg/max/mdev = 31.316/31.316/31.316/0.000 ms + + hadi@noma1$ dmesg | grep simple + [135354.473951] simple: Incoming ICMP_1 + + hadi@noma1$ sudo tc/tc -s filter ls dev eth0 parent ffff: + filter protocol ip pref 5 u32 + filter protocol ip pref 5 u32 fh 800: ht divisor 1 + filter protocol ip pref 5 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 + match 00010000/00ff0000 at 8 + action order 1: Simple + index 4 ref 1 bind 1 installed 206 sec used 67 sec + Action statistics: + Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 +.EE +.SH SEE ALSO +.BR tc (8) From ae6cf29be0dc6e1ce237bfe97c12329638c6edac Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:43 +0100 Subject: [PATCH 28/39] man: Add a man page for the skbedit action Cc: Alexander Duyck Signed-off-by: Phil Sutter --- man/man8/tc-skbedit.8 | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 man/man8/tc-skbedit.8 diff --git a/man/man8/tc-skbedit.8 b/man/man8/tc-skbedit.8 new file mode 100644 index 00000000..b585a4d4 --- /dev/null +++ b/man/man8/tc-skbedit.8 @@ -0,0 +1,45 @@ +.TH "SKB editing action in tc" 8 "12 Jan 2015" "iproute2" "Linux" + +.SH NAME +skbedit - SKB editing action +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action skbedit " [ " queue_mapping +.IR QUEUE_MAPPING " ] [" +.B priority +.IR PRIORITY " ] [" +.B mark +.IR MARK " ]" +.SH DESCRIPTION +The +.B skbedit +action allows to change a packet's associated meta data. It complements the +.B pedit +action, which in turn allows to change parts of the packet data itself. +.SH OPTIONS +.TP +.BI queue_mapping " QUEUE_MAPPING" +Override the packet's transmit queue. Useful when applied to packets transmitted +over MQ-capable network interfaces. +.I QUEUE_MAPPING +is an unsigned 16bit value in decimal format. +.TP +.BI priority " PRIORITY" +Override the packet classification decision. +.I PRIORITY +is either +.BR root ", " none +or a hexadecimal major class ID optionally followed by a colon +.RB ( : ) +and a hexadecimal minor class ID. +.TP +.BI mark " MARK" +Change the packet's firewall mark value. +.I MARK +is an unsigned 32bit value in automatically detected format (i.e., prefix with +.RB ' 0x ' +for hexadecimal interpretation, etc.). +.SH SEE ALSO +.BR tc (8), +.BR tc-pedit (8) From 8a1c6d4894b3a4036eeca3b75fd82b0a1f01e35b Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:44 +0100 Subject: [PATCH 29/39] man: Add a man page for the vlan action Cc: Jiri Pirko Signed-off-by: Phil Sutter --- man/man8/tc-vlan.8 | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 man/man8/tc-vlan.8 diff --git a/man/man8/tc-vlan.8 b/man/man8/tc-vlan.8 new file mode 100644 index 00000000..e650b72d --- /dev/null +++ b/man/man8/tc-vlan.8 @@ -0,0 +1,54 @@ +.TH "VLAN manipulation action in tc" 8 "12 Jan 2015" "iproute2" "Linux" + +.SH NAME +vlan - vlan manipulation module +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action vlan" " { " pop " |" +.IR PUSH " }" + +.ti -8 +.IR PUSH " := " +.BR push " [ " protocol +.IR VLANPROTO " ]" +.BI id " VLANID" +.SH DESCRIPTION +The +.B vlan +action allows to perform 802.1Q en- or decapsulation on a packet, reflected by +the two operation modes +.IR POP " and " PUSH . +The +.I POP +mode is simple, as no further information is required to just drop the +outer-most VLAN encapsulation. The +.I PUSH +mode on the other hand requires at least a +.I VLANID +and allows to optionally choose the +.I VLANPROTO +to use. +.SH OPTIONS +.TP +.B pop +Decapsulation mode, no further arguments allowed. +.TP +.B push +Encapsulation mode. Requires at least +.B id +option. +.TP +.BI id " VLANID" +Specify the VLAN ID to encapsulate into. +.I VLANID +is an unsigned 16bit integer, the format is detected automatically (e.g. prefix +with +.RB ' 0x ' +for hexadecimal interpretation, etc.). +.TP +.BI protocol " VLANPROTO" +Choose the VLAN protocol to use. At the time of writing, the kernel accepts only +.BR 802.1Q " or " 802.1ad . +.SH SEE ALSO +.BR tc (8) From fa2c34eff1b89b55b1cdb3c0ed3b29bf8b3d5b40 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:45 +0100 Subject: [PATCH 30/39] man: Add a man page for the xt action Cc: Jiri Pirko Signed-off-by: Phil Sutter --- man/man8/tc-xt.8 | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 man/man8/tc-xt.8 diff --git a/man/man8/tc-xt.8 b/man/man8/tc-xt.8 new file mode 100644 index 00000000..4fd800cf --- /dev/null +++ b/man/man8/tc-xt.8 @@ -0,0 +1,42 @@ +.TH "iptables action in tc" 8 "3 Mar 2016" "iproute2" "Linux" + +.SH NAME +xt - tc iptables action +.SH SYNOPSIS +.in +8 +.ti -8 +.BR tc " ... " "action xt \-j" +.IR TARGET " [ " TARGET_OPTS " ]" +.SH DESCRIPTION +The +.B xt +action allows to call arbitrary iptables targets for packets matching the filter +this action is attached to. +.SH OPTIONS +.TP +.BI -j " TARGET \fR[\fI TARGET_OPTS \fR]" +Perform a jump to the given iptables target, optionally passing any target +specific options in +.IR TARGET_OPTS . +.SH EXAMPLES +The following will attach a +.B u32 +filter to the +.B ingress +qdisc matching ICMP replies and using the +.B xt +action to make the kernel yell 'PONG' each time: + +.RS +.EX +tc qdisc add dev eth0 ingress +tc filter add dev eth0 parent ffff: proto ip u32 \\ + match ip protocol 1 0xff \\ + match ip icmp_type 0 0xff \\ + action xt -j LOG --log-prefix PONG +.EE +.RE +.SH SEE ALSO +.BR tc (8), +.BR tc-u32 (8), +.BR iptables-extensions (8) From bcdd39c5880f39190a63325f1f93e6c9e87c5feb Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:46 +0100 Subject: [PATCH 31/39] man: ship action man pages Signed-off-by: Phil Sutter --- man/man8/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/man/man8/Makefile b/man/man8/Makefile index 2f776406..d3fdf66a 100644 --- a/man/man8/Makefile +++ b/man/man8/Makefile @@ -14,7 +14,9 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 rtpr.8 ss. tipc.8 tipc-bearer.8 tipc-link.8 tipc-media.8 tipc-nametable.8 \ tipc-node.8 tipc-socket.8 \ tc-basic.8 tc-cgroup.8 tc-flow.8 tc-flower.8 tc-fw.8 tc-route.8 \ - tc-tcindex.8 tc-u32.8 + tc-tcindex.8 tc-u32.8 \ + tc-connmark.8 tc-csum.8 tc-mirred.8 tc-nat.8 tc-pedit.8 tc-police.8 \ + tc-simple.8 tc-skbedit.8 tc-vlan.8 tc-xt.8 all: $(TARGETS) From b487954d5baf95b4402456964eaad76dcb54db82 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 12:52:39 +0100 Subject: [PATCH 32/39] man: tc-u32: Minor syntax fix Signed-off-by: Phil Sutter --- man/man8/tc-u32.8 | 1 + 1 file changed, 1 insertion(+) diff --git a/man/man8/tc-u32.8 b/man/man8/tc-u32.8 index 47c8f2d0..691f53c1 100644 --- a/man/man8/tc-u32.8 +++ b/man/man8/tc-u32.8 @@ -370,6 +370,7 @@ then allows to match various header fields: .RS .TP .BI src " ADDR" +.TQ .BI dst " ADDR" Compare Source or Destination Address fields against the value of .IR ADDR . From 4853ee528110f61f4d1c1606cfd5dd276bb39cbd Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 12:52:38 +0100 Subject: [PATCH 33/39] man: ip-link: Beef up VXLAN csum options a bit Signed-off-by: Phil Sutter --- man/man8/ip-link.8.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index c6a9c862..2cd93b0f 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -491,15 +491,15 @@ are entered into the VXLAN device forwarding database. .sp .I [no]udpcsum -- specifies if UDP checksum is filled in +- specifies if UDP checksum is calculated for transmitted packets over IPv4. .sp .I [no]udp6zerocsumtx -- specifies if UDP checksum is filled in +- skip UDP checksum calculation for transmitted packets over IPv6. .sp .I [no]udp6zerocsumrx -- specifies if UDP checksum is received +- allow incoming UDP packets over IPv6 with zero checksum field. .sp .BI ageing " SECONDS" From c024acc6414285a09107648c9c21a377404b9d45 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 12:52:37 +0100 Subject: [PATCH 34/39] tc: pedit: document branch control in help output This seems to have been a hidden feature, though it's very useful and necessary at least when combining multiple pedit actions. Signed-off-by: Phil Sutter --- tc/m_pedit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tc/m_pedit.c b/tc/m_pedit.c index 4fdd189d..86eb0ca3 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -35,7 +35,7 @@ static int pedit_debug; static void explain(void) { - fprintf(stderr, "Usage: ... pedit munge \n"); + fprintf(stderr, "Usage: ... pedit munge []\n"); fprintf(stderr, "Where: MUNGE := |\n" "\t:= [ATC]\n " @@ -47,6 +47,7 @@ explain(void) "\t\tCMD:= clear | invert | set | retain\n " "\t:= ip | ip6 \n " " \t\t| udp | tcp | icmp \n" + "\t:= reclassify | pipe | drop | continue | pass\n" "For Example usage look at the examples directory\n"); } From 2452c57a5283c85d02a30ba65212b8f423b7a382 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 12:52:40 +0100 Subject: [PATCH 35/39] man: ip-route: Make synopsis consistent with description While the synopsis section contains 'ip route list', it is later described as 'ip route show'. Make this consistent by replacing 'list' with 'show' in synopsis. Signed-off-by: Phil Sutter --- man/man8/ip-route.8.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index c764bfc8..d7fb8fba 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -16,7 +16,7 @@ ip-route \- routing table management .ti -8 .BR "ip route" " { " -.BR list " | " flush " } " +.BR show " | " flush " } " .I SELECTOR .ti -8 From 0ce05841d58b42ef39ce7144dc31e77970e5d2e0 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 12:52:41 +0100 Subject: [PATCH 36/39] doc, man: ip-rule: Remove incorrect statement about rule 0 The documentation is wrong here: it is indeed possible to remove policy rule 0 and recreate it afterwards. Therefore remove these statements. Signed-off-by: Phil Sutter --- doc/ip-cref.tex | 3 --- man/man8/ip-rule.8 | 2 -- 2 files changed, 5 deletions(-) diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex index 67094c95..242cc266 100644 --- a/doc/ip-cref.tex +++ b/doc/ip-cref.tex @@ -2049,9 +2049,6 @@ table \verb|local| (ID 255). The \verb|local| table is a special routing table containing high priority control routes for local and broadcast addresses. -Rule 0 is special. It cannot be deleted or overridden. - - \item Priority: 32766, Selector: match anything, Action: lookup routing table \verb|main| (ID 254). The \verb|main| table is the normal routing table containing all non-policy diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8 index e9fbb3cf..1774ae3e 100644 --- a/man/man8/ip-rule.8 +++ b/man/man8/ip-rule.8 @@ -118,8 +118,6 @@ The .B local table is a special routing table containing high priority control routes for local and broadcast addresses. -.sp -Rule 0 is special. It cannot be deleted or overridden. .TP 2. From 948acfed23e5e314fe0f4f863da0368f56645d32 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 20:07:20 +0100 Subject: [PATCH 37/39] man: ip-neighbour.8: Document all known nud states Not sure how useful they are in practice, but as 'ip neigh' supports setting them all, they deserve to be described as well. While at it, also add a missing layer of indentation to the subordinate nud state list. Signed-off-by: Phil Sutter --- man/man8/ip-neighbour.8 | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/man/man8/ip-neighbour.8 b/man/man8/ip-neighbour.8 index 19c6d9d8..b292e181 100644 --- a/man/man8/ip-neighbour.8 +++ b/man/man8/ip-neighbour.8 @@ -35,7 +35,8 @@ ip-neighbour \- neighbour/arp tables management. .ti -8 .IR STATE " := {" -.BR permanent " | " noarp " | " stale " | " reachable " }" +.BR permanent " | " noarp " | " stale " | " reachable " | " none " |" +.BR incomplete " | " delay " | " probe " | " failed " }" .SH DESCRIPTION The @@ -86,6 +87,7 @@ the state of the neighbour entry. is an abbreviation for 'Neighbour Unreachability Detection'. The state can take one of the following values: +.RS .TP .B permanent the neighbour entry is valid forever and can be only @@ -105,6 +107,24 @@ This option to .B ip neigh does not change the neighbour state if it was valid and the address is not changed by this command. +.TP +.B none +this is a pseudo state used when initially creating a neighbour entry or after +trying to remove it before it becomes free to do so. +.TP +.B incomplete +the neighbour entry has not (yet) been validated/resolved. +.TP +.B delay +neighbor entry validation is currently delayed. +.TP +.B probe +neighbor is being probed. +.TP +.B failed +max number of probes exceeded without success, neighbor validation has +ultimately failed. +.RE .RE .TP From 03a0cf20b451105ab13f0593fe57144bd6b25c3b Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 20:07:21 +0100 Subject: [PATCH 38/39] ipneigh: List all nud states in help output To not make the output overly confusing, list them in a definition of the STATE placeholder which is already used in the show/flush syntax but wasn't explained before. Signed-off-by: Phil Sutter --- ip/ipneigh.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ip/ipneigh.c b/ip/ipneigh.c index 9b1499b0..48cca196 100644 --- a/ip/ipneigh.c +++ b/ip/ipneigh.c @@ -46,10 +46,11 @@ static void usage(void) __attribute__((noreturn)); static void usage(void) { - fprintf(stderr, "Usage: ip neigh { add | del | change | replace } { ADDR [ lladdr LLADDR ]\n" - " [ nud { permanent | noarp | stale | reachable } ]\n" - " | proxy ADDR } [ dev DEV ]\n"); - fprintf(stderr, " ip neigh { show | flush } [ proxy ] [ to PREFIX ] [ dev DEV ] [ nud STATE ]\n"); + fprintf(stderr, "Usage: ip neigh { add | del | change | replace }\n" + " { ADDR [ lladdr LLADDR ] [ nud STATE ] | proxy ADDR } [ dev DEV ]\n"); + fprintf(stderr, " ip neigh { show | flush } [ proxy ] [ to PREFIX ] [ dev DEV ] [ nud STATE ]\n\n"); + fprintf(stderr, "STATE := { permanent | noarp | stale | reachable | none |\n" + " incomplete | delay | probe | failed }\n"); exit(-1); } From 5f4d27d533917ccce4249c1d367aabf606167c47 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 4 Mar 2016 13:11:47 +0100 Subject: [PATCH 39/39] doc: Add my article about tc, filters and actions Signed-off-by: Phil Sutter --- .gitignore | 1 + doc/Makefile | 2 +- doc/tc-filters.tex | 529 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 531 insertions(+), 1 deletion(-) create mode 100644 doc/tc-filters.tex diff --git a/.gitignore b/.gitignore index 98d83c5d..ef03b174 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,4 @@ doc/*.ps doc/*.dvi doc/*.html doc/*.pdf +doc/*.out diff --git a/doc/Makefile b/doc/Makefile index e9c0ff79..0c51872a 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,4 +1,4 @@ -PSFILES=ip-cref.ps ip-tunnels.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps +PSFILES=ip-cref.ps ip-tunnels.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps tc-filters.ps # tc-cref.ps # api-rtnl.tex api-pmtudisc.tex api-news.tex # iki-netdev.ps iki-neighdst.ps diff --git a/doc/tc-filters.tex b/doc/tc-filters.tex new file mode 100644 index 00000000..59127d66 --- /dev/null +++ b/doc/tc-filters.tex @@ -0,0 +1,529 @@ +\documentclass[12pt,twoside]{article} + +\usepackage[hidelinks]{hyperref} % \url +\usepackage{booktabs} % nicer tabulars +\usepackage{fancyvrb} +\usepackage{fullpage} +\usepackage{float} + +\newcommand{\iface}{\textit} +\newcommand{\cmd}{\texttt} +\newcommand{\man}{\textit} +\newcommand{\qdisc}{\texttt} +\newcommand{\filter}{\texttt} + +\begin{document} +\title{QoS in Linux with TC and Filters} +\author{Phil Sutter (phil@nwl.cc)} +\date{January 2016} +\maketitle + +TC, the Traffic Control utility, has been there for a very long time - forever +in my humble perception. It is still (and has ever been if I'm not mistaken) the +only tool to configure QoS in Linux. + +Standard practice when transmitting packets over a medium which may block (due +to congestion, e.g.) is to use a queue which temporarily holds these packets. In +Linux, this queueing approach is where QoS happens: A Queueing Discipline +(qdisc) holds multiple packet queues with different priorities for dequeueing to +the network driver. The classification (i.e. deciding which queue a packet +should go into) is typically done based on Type Of Service (IPv4) or Traffic +Class (IPv6) header fields but depending on qdisc implementation, might be +controlled by the user as well. + +Qdiscs come in two flavors, classful or classless. While classless qdiscs are +not as flexible as classful ones, they also require much less customizing. Often +it is enough to just attach them to an interface, without exact knowledge of +what is done internally. Classful qdiscs are the exact opposite: flexible in +application, they are often not even usable without insightful configuration. + +As the name implies, classful qdiscs provide configurable classes to sort +traffic into. In it's basic form, this is not much different than, say, the +classless \qdisc{pfifo\_fast} which holds three queues and classifies per +packet upon priority field. Though typically classes go beyond that by +supporting nesting and additional characteristics like e.g. maximum traffic +rate or quantum. + +When it comes to controlling the classification process, filters come into play. +They attach to the parent of a set of classes (i.e. either the qdisc itself or +a parent class) and specify how a packet (or it's associated flow) has to look +like in order to suit a given class. To overcome this simplification, it is +possible to attach multiple filters to the same parent, which then consults each +of them in row until the first one accepts the packet. + +Before getting into detail about what filters there are and how to use them, a +simple setup of a qdisc with classes is necessary: +\begin{figure}[H] +\begin{Verbatim} + .-------------------------------------------------------. + | | + | HTB | + | | + | .----------------------------------------------------.| + | | || + | | Class 1:1 || + | | || + | | .---------------..---------------..---------------.|| + | | | || || ||| + | | | Class 1:10 || Class 1:20 || Class 1:30 ||| + | | | || || ||| + | | | .------------.|| .------------.|| .------------.||| + | | | | ||| | ||| | |||| + | | | | fq_codel ||| | fq_codel ||| | fq_codel |||| + | | | | ||| | ||| | |||| + | | | '------------'|| '------------'|| '------------'||| + | | '---------------''---------------''---------------'|| + | '----------------------------------------------------'| + '-------------------------------------------------------' +\end{Verbatim} +\end{figure} +\noindent +The following commands establish the basic setup shown: +\begin{Verbatim} +(1) # tc qdisc replace dev eth0 root handle 1: htb default 30 +(2) # tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit +(3) # alias tclass='tc class add dev eth0 parent 1:1' +(4) # tclass classid 1:10 htb rate 1mbit ceil 20mbit prio 1 +(4) # tclass classid 1:20 htb rate 90mbit ceil 95mbit prio 2 +(4) # tclass classid 1:30 htb rate 1mbit ceil 95mbit prio 3 +(5) # tc qdisc add dev eth0 parent 1:10 fq_codel +(5) # tc qdisc add dev eth0 parent 1:20 fq_codel +(5) # tc qdisc add dev eth0 parent 1:30 fq_codel +\end{Verbatim} +A little explanation for the unfamiliar reader: +\begin{enumerate} +\item Replace the root qdisc of \iface{eth0} by an instance of \qdisc{HTB}. + Specifying the handle is necessary so it can be referenced in consecutive + calls to \cmd{tc}. The default class for unclassified traffic is set to + 30. +\item Create a single top-level class with handle 1:1 which limits the total + bandwidth allowed to 95mbit/s. It is assumed that \iface{eth0} is a 100mbit/s link, + staying a little below that helps to keep the main point of enqueueing in + the qdisc layer instead of the interface hardware queue or at another + bottleneck in the network. +\item Define an alias for the common part of the remaining three calls in order + to improve readability. This means all remaining classes are attached to the + common parent class from (2). +\item Create three child classes for different uses: Class 1:10 has highest + priority but is tightly limited in bandwidth - fine for interactive + connections. Class 1:20 has mid priority and high guaranteed bandwidth, for + high priority bulk traffic. Finally, there's the default class 1:30 with + lowest priority, low guaranteed bandwidth and the ability to use the full + link in case it's unused otherwise. This should be fine for uninteresting + traffic not explicitly taken care of. +\item Attach a leaf qdisc to each of the child classes created in (4). Since + \qdisc{HTB} by default attaches \qdisc{pfifo} as leaf qdisc, this step is optional. Still, + the fairness between different flows provided by the classless \qdisc{fq\_codel} is + worth the effort. +\end{enumerate} +More information about the qdiscs and fine-tuning parameters can be found in +\man{tc-htb(8)} and \man{tc-fq\_codel(8)}. + +Without any additional setup done, now all traffic leaving \iface{eth0} is shaped to +95mbit/s and directed through class 1:30. This can be verified by looking at the +\texttt{Sent} field of the class statistics printed via \cmd{tc -s class show dev eth0}: +Only the root class 1:1 and it's child 1:30 should show any traffic. + + +\section*{Finally time to start filtering!} + +Let's begin with a simple one, i.e. reestablishing what \qdisc{pfifo\_fast} did +automatically based on TOS/Priority field. Linux internally translates the +header field into the priority field of struct skbuff, which +\qdisc{pfifo\_fast} uses for +classification. \man{tc-prio(8)} contains a table listing the priority (and +ultimately, \qdisc{pfifo\_fast} queue index) each TOS value is being translated into. +Here is a shorter version: +\begin{center} +\begin{tabular}{lll} +TOS Values & Linux Priority (Number) & Queue Index \\ +\midrule +0x0 - 0x6 & Best Effort (0) & 1 \\ +0x8 - 0xe & Bulk (2) & 2 \\ +0x10 - 0x16 & Interactive (6) & 0 \\ +0x18 - 0x1e & Interactive Bulk (4) & 1 \\ +\end{tabular} +\end{center} +Using the \filter{basic} filter, it is possible to match packets based on that skbuff +field, which has the added benefit of being IP version agnostic. Since the +\qdisc{HTB} setup above defaults to class ID 1:30, the Bulk priority can be +ignored. The \filter{basic} filter allows to combine matches, therefore we get along +with only two filters: +\begin{Verbatim} +# tc filter add dev eth0 parent 1: basic \ + match 'meta(priority eq 6)' classid 1:10 +# tc filter add dev eth0 parent 1: basic \ + match 'meta(priority eq 0)' \ + or 'meta(priority eq 4)' classid 1:20 +\end{Verbatim} +A detailed description of the \filter{basic} filter and the ematch syntax it uses can be +found in \man{tc-basic(8)} and \man{tc-ematch(8)}. + +Obviously, this first example cries for optimization. A simple one would be to +just change the default class from 1:30 to 1:20, so filters are only needed for +Bulk and Interactive priorities: +\begin{Verbatim} +# tc filter add dev eth0 parent 1: basic \ + match 'meta(priority eq 6)' classid 1:10 +# tc filter add dev eth0 parent 1: basic \ + match 'meta(priority eq 2)' classid 1:20 +\end{Verbatim} +Given that class IDs are random, choosing them wisely allows for a direct +mapping. So first, recreate the qdisc and classes configuration: +\begin{Verbatim} +# tc qdisc replace dev eth0 root handle 1: htb default 10 +# tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit +# alias tclass='tc class add dev eth0 parent 1:1' +# tclass classid 1:16 htb rate 1mbit ceil 20mbit prio 1 +# tclass classid 1:10 htb rate 90mbit ceil 95mbit prio 2 +# tclass classid 1:12 htb rate 1mbit ceil 95mbit prio 3 +# tc qdisc add dev eth0 parent 1:16 fq_codel +# tc qdisc add dev eth0 parent 1:10 fq_codel +# tc qdisc add dev eth0 parent 1:12 fq_codel +\end{Verbatim} +This is basically identical to above, but with changed leaf class IDs and the +second priority class being the default. Using the \filter{flow} filter with it's \texttt{map} +functionality, a single filter command is enough: +\begin{Verbatim} +# tc filter add dev eth0 parent 1: handle 0x1337 flow \ + map key priority baseclass 1:10 +\end{Verbatim} +The \filter{flow} filter now uses the priority value to construct a destination class ID +by adding it to the value of \texttt{baseclass}. While this works for priority values of +0, 2 and 6, it will result in non-existent class ID 1:14 for Interactive Bulk +traffic. In that case, the \qdisc{HTB} default applies so that traffic goes into class +ID 1:10 just as intended. Please note that specifying a handle is a mandatory +requirement by the \filter{flow} filter, although I didn't see where one would use that +later. For more information about \filter{flow}, see \man{tc-flow(8)}. + +While \filter{flow} and \filter{basic} filters are relatively easy to apply and understand, they +are as well quite limited to their intended purpose. A more flexible option is +the \filter{u32} filter, which allows to match on arbitrary parts of the packet data - +yet only on that, not any meta data associated to it by the kernel (with the +exception of firewall mark value). So in order to continue this little +exercise with \filter{u32}, we have to base classification directly upon the actual TOS +value. An intuitive attempt might look like this: +\begin{Verbatim} +# alias tcfilter='tc filter add dev eth0 parent 1:' +# tcfilter u32 match ip dsfield 0x10 0x1e classid 1:16 +# tcfilter u32 match ip dsfield 0x12 0x1e classid 1:16 +# tcfilter u32 match ip dsfield 0x14 0x1e classid 1:16 +# tcfilter u32 match ip dsfield 0x16 0x1e classid 1:16 +# tcfilter u32 match ip dsfield 0x8 0x1e classid 1:12 +# tcfilter u32 match ip dsfield 0xa 0x1e classid 1:12 +# tcfilter u32 match ip dsfield 0xc 0x1e classid 1:12 +# tcfilter u32 match ip dsfield 0xe 0x1e classid 1:12 +\end{Verbatim} +The obvious drawback here is the amount of filters needed. And without the +default class, eight more filters would be necessary. This also has performance +implications: A packet with TOS value 0xe will be checked eight times in total +in order to determine it's destination class. While there's not much to be done +about the number of filters, at least the performance problem can be eliminated +by using \filter{u32}'s hash table support: +\begin{Verbatim} +# tc filter add dev eth0 parent 1: prio 99 handle 1: u32 divisor 16 +\end{Verbatim} +This creates a hash table with 16 buckets. The table size is arbitrary, but not +random: Since the first bit of the TOS field is not interesting, it can be +ignored and therefore the range of values to consider is just [0;15], i.e. a +number of 16 different values. The next step is to populate the hash table: +\begin{Verbatim} +# alias tcfilter='tc filter add dev eth0 parent 1: prio 99' +# tcfilter u32 match u8 0 0 ht 1:0: classid 1:16 +# tcfilter u32 match u8 0 0 ht 1:1: classid 1:16 +# tcfilter u32 match u8 0 0 ht 1:2: classid 1:16 +# tcfilter u32 match u8 0 0 ht 1:3: classid 1:16 +# tcfilter u32 match u8 0 0 ht 1:4: classid 1:12 +# tcfilter u32 match u8 0 0 ht 1:5: classid 1:12 +# tcfilter u32 match u8 0 0 ht 1:6: classid 1:12 +# tcfilter u32 match u8 0 0 ht 1:7: classid 1:12 +# tcfilter u32 match u8 0 0 ht 1:8: classid 1:16 +# tcfilter u32 match u8 0 0 ht 1:9: classid 1:16 +# tcfilter u32 match u8 0 0 ht 1:a: classid 1:16 +# tcfilter u32 match u8 0 0 ht 1:b: classid 1:16 +# tcfilter u32 match u8 0 0 ht 1:c: classid 1:10 +# tcfilter u32 match u8 0 0 ht 1:d: classid 1:10 +# tcfilter u32 match u8 0 0 ht 1:e: classid 1:10 +# tcfilter u32 match u8 0 0 ht 1:f: classid 1:10 +\end{Verbatim} +The parameter \texttt{ht} denotes the hash table and bucket the filter should be added +to. Since the first TOS bit is ignored, it's value has to be divided by two in +order to get to the bucket it maps to. E.g. a TOS value of 0x10 will therefore +map to bucket 0x8. For the sake of completeness, all possible values are mapped +and therefore a configurable default class is not required. Note that the used +match expression is not necessary, but mandatory. Therefore anything that +matches any packet will suffice. Finally, a filter which links to the defined +hash table is needed: +\begin{Verbatim} +# tc filter add dev eth0 parent 1: prio 1 protocol ip u32 \ + link 1: hashkey mask 0x001e0000 match u8 0 0 +\end{Verbatim} +Here again, the actual match statement is not necessary, but syntactically +required. All the magic lies within the \texttt{hashkey} parameter, which defines which +part of the packet should be used directly as hash key. Here's a drawing of the +first four bytes of the IPv4 header, with the area selected by \texttt{hashkey mask} +highlighted: +\begin{figure}[H] +\begin{Verbatim} + 0 1 2 3 + .-----------------------------------------------------------------. + | | | ######## | | | + | Version| IHL | #DSCP### | ECN| Total Length | + | | | ######## | | | + '-----------------------------------------------------------------' +\end{Verbatim} +\end{figure} +\noindent +This may look confusing at first, but keep in mind that bit- as well as +byte-ordering here is LSB while the mask value is written in MSB we humans use. +Therefore reading the mask is done like so, starting from left: +\begin{enumerate} +\item Skip the first byte (which contains Version and IHL fields). +\item Skip the lowest bit of the second byte (0x1e is even). +\item Mark the four following bits (0x1e is 11110 in binary). +\item Skip the remaining three bits of the second byte as well as the remaining two + bytes. +\end{enumerate} +Before doing the lookup, the kernel right-shifts the masked value by the amount +of zero-bits in \texttt{mask}, which implicitly also does the division by two which the +hash table depends on. With this setup, every packet has to pass exactly two +filters to be classified. Note that this filter is limited to IPv4 packets: Due +to the related Traffic Class field being at a different offset in the packet, it +would not work for IPv6. To use the same setup for IPv6 as well, a second +entry-level filter is necessary: +\begin{Verbatim} +# tc filter add dev eth0 parent 1: prio 2 protocol ipv6 u32 \ + link 1: hashkey mask 0x01e00000 match u8 0 0 +\end{Verbatim} +For illustration purposes, here again is a drawing of the first four bytes of +the IPv6 header, again with masked area highlighted: +\begin{figure}[H] +\begin{Verbatim} + 0 1 2 3 + .-----------------------------------------------------------------. + | | ######## | | + | Version| #Traffic Class| Flow Label | + | | ######## | | + '-----------------------------------------------------------------' +\end{Verbatim} +\end{figure} +\noindent +Reading the mask value is analogous to IPv4 with the added complexity that +Traffic Class spans over two bytes. Yet, for comparison there's a simple trick: +IPv6 has the interesting field shifted by four bits to the left, and the new +mask's value is shifted by the same amount. For further information about +\filter{u32} and what can be done with it, consult it's man page +\man{tc-u32(8)}. + +Of course, the kernel provides many more filters than just \filter{basic}, +\filter{flow} and \filter{u32} which have been presented above. As of now, the +remaining ones are: +\begin{description} +\item[bpf] + Filtering using Berkeley Packet Filter programs. The program's return + code determines the packet's destination class ID. + +\item[cgroup] + Filter packets based on control groups. This is only useful for packets + originating from the local host, as control groups only exist in that + scope. + +\item[flower] + An extended variant of the flow filter. + +\item[fw] + Matches on firewall mark values previously assigned to the packet by + netfilter (or a filter action, see below for details). This allows to + export the classification algorithm into netfilter, which is very + convenient if appropriate rules exist on the same system in there + already. + +\item[route] + Filter packets based on matching routing table entry. Basically + equivalent to the \texttt{fw} filter above, to make use of an already existing + extensive routing table setup. + +\item[rsvp, rsvp6] + Implementation of the Resource Reservation Protocol in Linux, to react + upon requests sent by an RSVP daemon. + +\item[tcindex] + Match packets based on tcindex value, which is usually set by the dsmark + qdisc. This is part of an approach to support Differentiated Services in + Linux, which is another topic on it's own. +\end{description} + + +\section*{Filter Actions} + +The tc filter framework provides the infrastructure to another extensible set of +tools as well, namely tc actions. As the name suggests, they allow to do things +with packets (or associated data). (The list of) Actions are part of a given +filter. If it matches, each action it contains is executed in order before +returning the classification result. Since the action has direct access to the +latter, it is in theory possible for an action to react upon or even change the +filtering result - as long as the packet matched, of course. Yet none of the +currently in-tree actions make use of this. + +The Generic Actions framework originally evolved out of the filters' ability to +police traffic to a given maximum bandwidth. One common use case for that is to +limit ingress traffic, dropping packets which exceed the threshold. A classic +setup example is like so: +\begin{Verbatim} +# tc qdisc add dev eth0 handle ffff: ingress +# tc filter add dev eth0 parent ffff: u32 \ + match u32 0 0 + police rate 1mbit burst 100k +\end{Verbatim} +The ingress qdisc is not a real one, but merely a point of reference for filters +to attach to which should get applied to incoming traffic. The \filter{u32} filter added +above matches on any packet and therefore limits the total incoming bandwidth to +1mbit/s, allowing bursts of up to 100kbytes. Using the new syntax, the filter +command changes slightly: +\begin{Verbatim} +# tc filter add dev eth0 parent ffff: u32 \ + match u32 0 0 \ + action police rate 1mbit burst 100k +\end{Verbatim} +The important detail is that this syntax allows to define multiple actions. +E.g. for testing purposes, it is possible to redirect exceeding traffic to the +loopback interface instead of dropping it: +\begin{Verbatim} +# tc filter add dev eth0 parent ffff: u32 \ + match u32 0 0 \ + action police rate 1mbit burst 100k conform-exceed pipe \ + action mirred egress redirect dev lo +\end{Verbatim} +The added parameter \texttt{conform-exceed pipe} tells the police action to allow for +further actions to handle the exceeding packet. + +Apart from \texttt{police} and \texttt{mirred} actions, there are a few more. Here's a full +list of the currently implemented ones: +\begin{description} +\item[bpf] + Apply a Berkeley Packet Filter program to the packet. + +\item[connmark] + Set the packet's firewall mark to that of it's connection. This works by + searching the conntrack table for a matching entry. If found, the mark + is restored. + +\item[csum] + Trigger recalculation of packet checksums. The supported protocols are: + IPv4, ICMP, IGMP, TCP, UDP and UDPLite. + +\item[ipt] + Pass the packet to an iptables target. This allows to use iptables + extensions directly instead of having to go the extra mile via setting + an arbitrary firewall mark and matching on that from within netfilter. + +\item[mirred] + Mirror or redirect packets. This is often combined with the ifb pseudo + device to share a common QoS setup between multiple interfaces or even + ingress traffic. + +\item[nat] + Perform stateless Native Address Translation. This is certainly not + complete and therefore inferior to NAT using iptables: Although the + kernel module decides between TCP, UDP and ICMP traffic, it does not + handle typical problematic protocols such as active FTP or SIP. + +\item[pedit] + Generic packet editing. This allows to alter arbitrary bytes of the + packet, either by specifying an offset into the packet or by naming a + packet header and field name to change. Currently, the latter is + implemented only for IPv4 yet. + +\item[police] + Apply a bandwidth rate limiting policy. Packets exceeding it are dropped + by default, but may optionally be handled differently. + +\item[simple] + This is rather an example than real action. All it does is print a + user-defined string together with a packet counter. Useful maybe for + debugging when filter statistics are not available or too complicated. + +\item[skbedit] + Edit associated packet data, supports changing queue mapping, priority + field and firewall mark value. + +\item[vlan] + Add/remove a VLAN header to/from the packet. This might serve as + alternative to using 802.1Q pseudo-interfaces in combination with + routing rules when e.g. packets for a given destination need to be + encapsulated. +\end{description} + + +\section*{Intermediate Functional Block} + +The Intermediate Functional Block (\texttt{ifb}) pseudo network interface acts as a QoS +concentrator for multiple different sources of traffic. Packets from or to other +interfaces have to be redirected to it using the \texttt{mirred} action in order to be +handled, regularly routed traffic will be dropped. This way, a single stack of +qdiscs, classes and filters can be shared between multiple interfaces. + +Here's a simple example to feed incoming traffic from multiple interfaces +through a Stochastic Fairness Queue (\qdisc{sfq}): +\begin{Verbatim} +(1) # modprobe ifb +(2) # ip link set ifb0 up +(3) # tc qdisc add dev ifb0 root sfq +\end{Verbatim} +The first step is to load the \texttt{ifb} kernel module (1). By default, this will +create two ifb devices: \iface{ifb0} and \iface{ifb1}. After setting +\iface{ifb0} up in (2), the root +qdisc is replaced by \qdisc{sfq} in (3). Finally, one can start redirecting ingress +traffic to \iface{ifb0}, e.g. from \iface{eth0}: +\begin{Verbatim} +# tc qdisc add dev eth0 handle ffff: ingress +# tc filter add dev eth0 parent ffff: u32 \ + match u32 0 0 \ + action mirred egress redirect dev ifb0 +\end{Verbatim} +The same can be done for other interfaces, just replacing \iface{eth0} in the two +commands above. One thing to keep in mind here is the asymmetrical routing this +creates within the host doing the QoS: Incoming packets enter the system via +\iface{ifb0}, while corresponding replies leave directly via \iface{eth0}. This can be observed +using \cmd{tcpdump} on \iface{ifb0}, which shows the input part of the traffic only. What's +more confusing is that \cmd{tcpdump} on \iface{eth0} shows both incoming and outgoing traffic, +but the redirection is still effective - a simple prove is setting +\iface{ifb0} down, +which will interrupt the communication. Obviously \cmd{tcpdump} catches the packets to +dump before they enter the ingress qdisc, which is why it sees them while the +kernel itself doesn't. + + +\section*{Conclusion} + +My personal impression is that although the \cmd{tc} utility is an absolute +necessity for anyone aiming at doing QoS in Linux professionally, there are way +too many loose ends and trip wires present in it's environment. Contributing to +this is the fact, that much of the non-essential functionality is redundantly +available in netfilter. Another problem which adds weight to the first one is a +general lack of documentation. Of course, there are many HOWTOs and guides in +the internet, but since it's often not clear how up to date these are, I prefer +the usual resources such as man or info pages. Surely nothing one couldn't fix +in hindsight, but quality certainly suffers if the original author of the code +does not or can not contribute to that. + +All that being said, once the steep learning curve has been mastered, the +conglomerate of (classful) qdiscs, filters and actions provides a highly +sophisticated and flexible infrastructure to perform QoS, which plays nicely +along with routing and firewalling setups. + + +\section*{Further Reading} + +A good starting point for novice users and experienced ones diving into unknown +areas is the extensive HOWTO at \url{http://lartc.org}. The iproute2 package ships +some examples (usually in /usr/share/doc/, depending on distribution) as well as +man pages for \cmd{tc} in general, qdiscs and filters. The latter have been added +just recently though, so if your distribution does not ship iproute2 version +4.3.0 yet, these are not in there. Apart from that, the internet is a spring of +HOWTOs and scripts people wrote - though these should be taken with a grain of +salt: The complexity of the matter often leads to copying others' solutions +without much validation, which allows for less optimal or even obsolete +implementations to survive much longer than desired. + +\end{document}