From 1298403e26a051bff7f181b2eb814e9a53232b3b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 29 Sep 2017 09:57:19 -0700 Subject: [PATCH 01/28] doc: remove obsolete ip-tunnels documentation This file has not been updated since conversion to git and is really old and outdated. Signed-off-by: Stephen Hemminger --- doc/Makefile | 2 +- doc/ip-tunnels.tex | 469 --------------------------------------------- 2 files changed, 1 insertion(+), 470 deletions(-) delete mode 100644 doc/ip-tunnels.tex diff --git a/doc/Makefile b/doc/Makefile index 0c51872a..ea4c4ae1 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,4 +1,4 @@ -PSFILES=ip-cref.ps ip-tunnels.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps tc-filters.ps +PSFILES=ip-cref.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps tc-filters.ps # tc-cref.ps # api-rtnl.tex api-pmtudisc.tex api-news.tex # iki-netdev.ps iki-neighdst.ps diff --git a/doc/ip-tunnels.tex b/doc/ip-tunnels.tex deleted file mode 100644 index 0a8c930c..00000000 --- a/doc/ip-tunnels.tex +++ /dev/null @@ -1,469 +0,0 @@ -\documentstyle[12pt,twoside]{article} -\def\TITLE{Tunnels over IP} -\input preamble -\begin{center} -\Large\bf Tunnels over IP in Linux-2.2 -\end{center} - - -\begin{center} -{ \large Alexey~N.~Kuznetsov } \\ -\em Institute for Nuclear Research, Moscow \\ -\verb|kuznet@ms2.inr.ac.ru| \\ -\rm March 17, 1999 -\end{center} - -\vspace{5mm} - -\tableofcontents - - -\section{Instead of introduction: micro-FAQ.} - -\begin{itemize} - -\item -Q: In linux-2.0.36 I used: -\begin{verbatim} - ifconfig tunl1 10.0.0.1 pointopoint 193.233.7.65 -\end{verbatim} -to create tunnel. It does not work in 2.2.0! - -A: You are right, it does not work. The command written above is split to two commands. -\begin{verbatim} - ip tunnel add MY-TUNNEL mode ipip remote 193.233.7.65 -\end{verbatim} -will create tunnel device with name \verb|MY-TUNNEL|. Now you may configure -it with: -\begin{verbatim} - ifconfig MY-TUNNEL 10.0.0.1 -\end{verbatim} -Certainly, if you prefer name \verb|tunl1| to \verb|MY-TUNNEL|, -you still may use it. - -\item -Q: In linux-2.0.36 I used: -\begin{verbatim} - ifconfig tunl0 10.0.0.1 - route add -net 10.0.0.0 gw 193.233.7.65 dev tunl0 -\end{verbatim} -to tunnel net 10.0.0.0 via router 193.233.7.65. It does not -work in 2.2.0! Moreover, \verb|route| prints a funny error sort of -``network unreachable'' and after this I found a strange direct route -to 10.0.0.0 via \verb|tunl0| in routing table. - -A: Yes, in 2.2 the rule that {\em normal} gateway must reside on directly -connected network has not any exceptions. You may tell kernel, that -this particular route is {\em abnormal}: -\begin{verbatim} - ifconfig tunl0 10.0.0.1 netmask 255.255.255.255 - ip route add 10.0.0.0/8 via 193.233.7.65 dev tunl0 onlink -\end{verbatim} -Note keyword \verb|onlink|, it is the magic key that orders kernel -not to check for consistency of gateway address. -Probably, after this explanation you have already guessed another method -to cheat kernel: -\begin{verbatim} - ifconfig tunl0 10.0.0.1 netmask 255.255.255.255 - route add -host 193.233.7.65 dev tunl0 - route add -net 10.0.0.0 netmask 255.0.0.0 gw 193.233.7.65 - route del -host 193.233.7.65 dev tunl0 -\end{verbatim} -Well, if you like such tricks, nobody may prohibit you to use them. -Only do not forget -that between \verb|route add| and \verb|route del| host 193.233.7.65 is -unreachable. - -\item -Q: In 2.0.36 I used to load \verb|tunnel| device module and \verb|ipip| module. -I cannot find any \verb|tunnel| in 2.2! - -A: Linux-2.2 has single module \verb|ipip| for both directions of tunneling -and for all IPIP tunnel devices. - -\item -Q: \verb|traceroute| does not work over tunnel! Well, stop... It works, - only skips some number of hops. - -A: Yes. By default tunnel driver copies \verb|ttl| value from -inner packet to outer one. It means that path traversed by tunneled -packets to another endpoint is not hidden. If you dislike this, or if you -are going to use some routing protocol expecting that packets -with ttl 1 will reach peering host (f.e.\ RIP, OSPF or EBGP) -and you are not afraid of -tunnel loops, you may append option \verb|ttl 64|, when creating tunnel -with \verb|ip tunnel add|. - -\item -Q: ... Well, list of things, which 2.0 was able to do finishes. - -\end{itemize} - -\paragraph{Summary of differences between 2.2 and 2.0.} - -\begin{itemize} - -\item {\bf In 2.0} you could compile tunnel device into kernel - and got set of 4 devices \verb|tunl0| ... \verb|tunl3| or, - alternatively, compile it as module and load new module - for each new tunnel. Also, module \verb|ipip| was necessary - to receive tunneled packets. - - {\bf 2.2} has {\em one\/} module \verb|ipip|. Loading it you get base - tunnel device \verb|tunl0| and another tunnels may be created with command - \verb|ip tunnel add|. These new devices may have arbitrary names. - - -\item {\bf In 2.0} you set remote tunnel endpoint address with - the command \verb|ifconfig| ... \verb|pointopoint A|. - - {\bf In 2.2} this command has the same semantics on all - the interfaces, namely it sets not tunnel endpoint, - but address of peering host, which is directly reachable - via this tunnel, - rather than via Internet. Actual tunnel endpoint address \verb|A| - should be set with \verb|ip tunnel add ... remote A|. - -\item {\bf In 2.0} you create tunnel routes with the command: -\begin{verbatim} - route add -net 10.0.0.0 gw A dev tunl0 -\end{verbatim} - - {\bf 2.2} interprets this command equally for all device - kinds and gateway is required to be directly reachable via this tunnel, - rather than via Internet. You still may use \verb|ip route add ... onlink| - to override this behaviour. - -\end{itemize} - - -\section{Tunnel setup: basics} - -Standard Linux-2.2 kernel supports three flavor of tunnels, -listed in the following table: -\vspace{2mm} - -\begin{tabular}{lll} -\vrule depth 0.8ex width 0pt\relax -Mode & Description & Base device \\ -ipip & IP over IP & tunl0 \\ -sit & IPv6 over IP & sit0 \\ -gre & ANY over GRE over IP & gre0 -\end{tabular} - -\vspace{2mm} - -\noindent All the kinds of tunnels are created with one command: -\begin{verbatim} - ip tunnel add mode [ local ] [ remote ] -\end{verbatim} - -This command creates new tunnel device with name \verb||. -The \verb|| is an arbitrary string. Particularly, -it may be even \verb|eth0|. The rest of parameters set -different tunnel characteristics. - -\begin{itemize} - -\item -\verb|mode | sets tunnel mode. Three modes are available now - \verb|ipip|, \verb|sit| and \verb|gre|. - -\item -\verb|remote | sets remote endpoint of the tunnel to IP - address \verb||. -\item -\verb|local | sets fixed local address for tunneled - packets. It must be an address on another interface of this host. - -\end{itemize} - -\let\thefootnote\oldthefootnote - -Both \verb|remote| and \verb|local| may be omitted. In this case we -say that they are zero or wildcard. Two tunnels of one mode cannot -have the same \verb|remote| and \verb|local|. Particularly it means -that base device or fallback tunnel cannot be replicated.\footnote{ -This restriction is relaxed for keyed GRE tunnels.} - -Tunnels are divided to two classes: {\bf pointopoint} tunnels, which -have some not wildcard \verb|remote| address and deliver all the packets -to this destination, and {\bf NBMA} (i.e. Non-Broadcast Multi-Access) tunnels, -which have no \verb|remote|. Particularly, base devices (f.e.\ \verb|tunl0|) -are NBMA, because they have neither \verb|remote| nor -\verb|local| addresses. - - -After tunnel device is created you should configure it as you did -it with another devices. Certainly, the configuration of tunnels has -some features related to the fact that they work over existing Internet -routing infrastructure and simultaneously create new virtual links, -which changes this infrastructure. The danger that not enough careful -tunnel setup will result in formation of tunnel loops, -collapse of routing or flooding network with exponentially -growing number of tunneled fragments is very real. - - -Protocol setup on pointopoint tunnels does not differ of configuration -of another devices. You should set a protocol address with \verb|ifconfig| -and add routes with \verb|route| utility. - -NBMA tunnels are different. To route something via NBMA tunnel -you have to explain to driver, where it should deliver packets to. -The only way to make it is to create special routes with gateway -address pointing to desired endpoint. F.e.\ -\begin{verbatim} - ip route add 10.0.0.0/24 via dev tunl0 onlink -\end{verbatim} -It is important to use option \verb|onlink|, otherwise -kernel will refuse request to create route via gateway not directly -reachable over device \verb|tunl0|. With IPv6 the situation is much simpler: -when you start device \verb|sit0|, it automatically configures itself -with all IPv4 addresses mapped to IPv6 space, so that all IPv4 -Internet is {\em really reachable} via \verb|sit0|! Excellent, the command -\begin{verbatim} - ip route add 3FFE::/16 via ::193.233.7.65 dev sit0 -\end{verbatim} -will route \verb|3FFE::/16| via \verb|sit0|, sending all the packets -destined to this prefix to 193.233.7.65. - -\section{Tunnel setup: options} - -Command \verb|ip tunnel add| has several additional options. -\begin{itemize} - -\item \verb|ttl N| --- set fixed TTL \verb|N| on tunneled packets. - \verb|N| is number in the range 1--255. 0 is special value, - meaning that packets inherit TTL value. - Default value is: \verb|inherit|. - -\item \verb|tos T| --- set fixed tos \verb|T| on tunneled packets. - Default value is: \verb|inherit|. - -\item \verb|dev DEV| --- bind tunnel to device \verb|DEV|, so that - tunneled packets will be routed only via this device and will - not be able to escape to another device, when route to endpoint changes. - -\item \verb|nopmtudisc| --- disable Path MTU Discovery on this tunnel. - It is enabled by default. Note that fixed ttl is incompatible - with this option: tunnels with fixed ttl always make pmtu discovery. - -\end{itemize} - -\verb|ipip| and \verb|sit| tunnels have no more options. \verb|gre| -tunnels are more complicated: - -\begin{itemize} - -\item \verb|key K| --- use keyed GRE with key \verb|K|. \verb|K| is - either number or IP address-like dotted quad. - -\item \verb|csum| --- checksum tunneled packets. - -\item \verb|seq| --- serialize packets. -\begin{NB} - I think this option does not - work. At least, I did not test it, did not debug it and - even do not understand, how it is supposed to work and for what - purpose Cisco planned to use it. -\end{NB} - -\end{itemize} - - -Actually, these GRE options can be set separately for input and -output directions by prefixing corresponding keywords with letter -\verb|i| or \verb|o|. F.e.\ \verb|icsum| orders to accept only -packets with correct checksum and \verb|ocsum| means, that -our host will calculate and send checksum. - -Command \verb|ip tunnel add| is not the only operation, -which can be made with tunnels. Certainly, you may get short help page -with: -\begin{verbatim} - ip tunnel help -\end{verbatim} - -Besides that, you may view list of installed tunnels with the help of command: -\begin{verbatim} - ip tunnel ls -\end{verbatim} -Also you may look at statistics: -\begin{verbatim} - ip -s tunnel ls Cisco -\end{verbatim} -where \verb|Cisco| is name of tunnel device. Command -\begin{verbatim} - ip tunnel del Cisco -\end{verbatim} -destroys tunnel \verb|Cisco|. And, finally, -\begin{verbatim} - ip tunnel change Cisco mode sit local ME remote HE ttl 32 -\end{verbatim} -changes its parameters. - -\section{Differences 2.2 and 2.0 tunnels revisited.} - -Now we can discuss more subtle differences between tunneling in 2.0 -and 2.2. - -\begin{itemize} - -\item In 2.0 all tunneled packets were received promiscuously -as soon as you loaded module \verb|ipip|. 2.2 tries to select the best -tunnel device and packet looks as received on this. F.e.\ if host -received \verb|ipip| packet from host \verb|D| destined to our -local address \verb|S|, kernel searches for matching tunnels -in order: - -\begin{tabular}{ll} -1 & \verb|remote| is \verb|D| and \verb|local| is \verb|S| \\ -2 & \verb|remote| is \verb|D| and \verb|local| is wildcard \\ -3 & \verb|remote| is wildcard and \verb|local| is \verb|S| \\ -4 & \verb|tunl0| -\end{tabular} - -If tunnel exists, but it is not in \verb|UP| state, the tunnel is ignored. -Note, that if \verb|tunl0| is \verb|UP| it receives all the IPIP packets, -not acknowledged by more specific tunnels. -Be careful, it means that without carefully installed firewall rules -anyone on the Internet may inject to your network any packets with -source addresses indistinguishable from local ones. It is not so bad idea -to design tunnels in the way enforcing maximal route symmetry -and to enable reversed path filter (\verb|rp_filter| sysctl option) on -tunnel devices. - -\item In 2.2 you can monitor and debug tunnels with \verb|tcpdump|. -F.e.\ \verb|tcpdump| \verb|-i Cisco| \verb|-nvv| will dump packets, -which kernel output, via tunnel \verb|Cisco| and the packets received on it -from kernel viewpoint. - -\end{itemize} - - -\section{Linux and Cisco IOS tunnels.} - -Among another tunnels Cisco IOS supports IPIP and GRE. -Essentially, Cisco setup is subset of options, available for Linux. -Let us consider the simplest example: - -\begin{verbatim} -interface Tunnel0 - tunnel mode gre ip - tunnel source 10.10.14.1 - tunnel destination 10.10.13.2 -\end{verbatim} - - -This command set translates to: - -\begin{verbatim} - ip tunnel add Tunnel0 \ - mode gre \ - local 10.10.14.1 \ - remote 10.10.13.2 -\end{verbatim} - -Any questions? No questions. - -\section{Interaction IPIP tunnels and DVMRP.} - -DVMRP exploits IPIP tunnels to route multicasts via Internet. -\verb|mrouted| creates -IPIP tunnels listed in its configuration file automatically. -From kernel and user viewpoints there are no differences between -tunnels, created in this way, and tunnels created by \verb|ip tunnel|. -I.e.\ if \verb|mrouted| created some tunnel, it may be used to -route unicast packets, provided appropriate routes are added. -And vice versa, if administrator has already created a tunnel, -it will be reused by \verb|mrouted|, if it requests DVMRP -tunnel with the same local and remote addresses. - -Do not wonder, if your manually configured tunnel is -destroyed, when mrouted exits. - - -\section{Broadcast GRE ``tunnels''.} - -It is possible to set \verb|remote| for GRE tunnel to a multicast -address. Such tunnel becomes {\bf broadcast} tunnel (though word -tunnel is not quite appropriate in this case, it is rather virtual network). -\begin{verbatim} - ip tunnel add Universe local 193.233.7.65 \ - remote 224.66.66.66 ttl 16 - ip addr add 10.0.0.1/16 dev Universe - ip link set Universe up -\end{verbatim} -This tunnel is true broadcast network and broadcast packets are -sent to multicast group 224.66.66.66. By default such tunnel starts -to resolve both IP and IPv6 addresses via ARP/NDISC, so that -if multicast routing is supported in surrounding network, all GRE nodes -will find one another automatically and will form virtual Ethernet-like -broadcast network. If multicast routing does not work, it is unpleasant -but not fatal flaw. The tunnel becomes NBMA rather than broadcast network. -You may disable dynamic ARPing by: -\begin{verbatim} - echo 0 > /proc/sys/net/ipv4/neigh/Universe/mcast_solicit -\end{verbatim} -and to add required information to ARP tables manually: -\begin{verbatim} - ip neigh add 10.0.0.2 lladdr 128.6.190.2 dev Universe nud permanent -\end{verbatim} -In this case packets sent to 10.0.0.2 will be encapsulated in GRE -and sent to 128.6.190.2. It is possible to facilitate address resolution -using methods typical for another NBMA networks f.e.\ to start user -level \verb|arpd| daemon, which will maintain database of hosts attached -to GRE virtual network or ask for information -dedicated ARP or NHRP server. - - -Actually, such setup is the most natural for tunneling, -it is really flexible, scalable and easily managable, so that -it is strongly recommended to be used with GRE tunnels instead of ugly -hack with NBMA mode and \verb|onlink| modifier. Unfortunately, -by historical reasons broadcast mode is not supported by IPIP tunnels, -but this probably will change in future. - - - -\section{Traffic control issues.} - -Tunnels are devices, hence all the power of Linux traffic control -applies to them. The simplest (and the most useful in practice) -example is limiting tunnel bandwidth. The following command: -\begin{verbatim} - tc qdisc add dev tunl0 root tbf \ - rate 128Kbit burst 4K limit 10K -\end{verbatim} -will limit tunneled traffic to 128Kbit with maximal burst size of 4K -and queuing not more than 10K. - -However, you should remember, that tunnels are {\em virtual} devices -implemented in software and true queue management is impossible for them -just because they have no queues. Instead, it is better to create classes -on real physical interfaces and to map tunneled packets to them. -In general case of dynamic routing you should create such classes -on all outgoing interfaces, or, alternatively, -to use option \verb|dev DEV| to bind tunnel to a fixed physical device. -In the last case packets will be routed only via specified device -and you need to setup corresponding classes only on it. -Though you have to pay for this convenience, -if routing will change, your tunnel will fail. - -Suppose that CBQ class \verb|1:ABC| has been created on device \verb|eth0| -specially for tunnel \verb|Cisco| with endpoints \verb|S| and \verb|D|. -Now you can select IPIP packets with addresses \verb|S| and \verb|D| -with some classifier and map them to class \verb|1:ABC|. F.e.\ -it is easy to make with \verb|rsvp| classifier: -\begin{verbatim} - tc filter add dev eth0 pref 100 proto ip rsvp \ - session D ipproto ipip filter S \ - classid 1:ABC -\end{verbatim} - -If you want to make more detailed classification of sub-flows -transmitted via tunnel, you can build CBQ subtree, -rooted at \verb|1:ABC| and attach to subroot set of rules parsing -IPIP packets more deeply. - -\end{document} From d77ce080d33370d90de8b123cd143e9599dc1ca6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 29 Sep 2017 09:58:39 -0700 Subject: [PATCH 02/28] doc: remove outdated ss documentation The current version is well documented on man page. The latex documentation is very old and was never upated. Signed-off-by: Stephen Hemminger --- doc/ss.sgml | 525 ---------------------------------------------------- 1 file changed, 525 deletions(-) delete mode 100644 doc/ss.sgml diff --git a/doc/ss.sgml b/doc/ss.sgml deleted file mode 100644 index 3024b574..00000000 --- a/doc/ss.sgml +++ /dev/null @@ -1,525 +0,0 @@ - - -
- -SS Utility: Quick Intro -<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/ -<date>some_negative_number, 20 Sep 2001 -<abstract> -<tt/ss/ is one another utility to investigate sockets. -Functionally it is NOT better than <tt/netstat/ combined -with some perl/awk scripts and though it is surely faster -it is not enough to make it much better. :-) -So, stop reading this now and do not waste your time. -Well, certainly, it proposes some functionality, which current -netstat is still not able to do, but surely will soon. -</abstract> - -<sect>Why? - -<p> <tt>/proc</tt> interface is inadequate, unfortunately. -When amount of sockets is enough large, <tt/netstat/ or even -plain <tt>cat /proc/net/tcp/</tt> cause nothing but pains and curses. -In linux-2.4 the desease became worse: even if amount -of sockets is small reading <tt>/proc/net/tcp/</tt> is slow enough. - -This utility presents a new approach, which is supposed to scale -well. I am not going to describe technical details here and -will concentrate on description of the command. -The only important thing to say is that it is not so bad idea -to load module <tt/tcp_diag/, which can be found in directory -<tt/Modules/ of <tt/iproute2/. If you do not make this <tt/ss/ -will work, but it falls back to <tt>/proc</tt> and becomes slow -like <tt/netstat/, well, a bit faster yet (see section "Some numbers"). - -<sect>Old news - -<p> -In the simplest form <tt/ss/ is equivalent to netstat -with some small deviations. - -<itemize> -<item><tt/ss -t -a/ dumps all TCP sockets -<item><tt/ss -u -a/ dumps all UDP sockets -<item><tt/ss -w -a/ dumps all RAW sockets -<item><tt/ss -x -a/ dumps all UNIX sockets -</itemize> - -<p> -Option <tt/-o/ shows TCP timers state. -Option <tt/-e/ shows some extended information. -Etc. etc. etc. Seems, all the options of netstat related to sockets -are supported. Though not AX.25 and other bizarres. :-) -If someone wants, he can make support for decnet and ipx. -Some rudimentary support for them is already present in iproute2 libutils, -and I will be glad to see these new members. - -<p> -However, standard functionality is a bit different: - -<p> -The first: without option <tt/-a/ sockets in states -<tt/TIME-WAIT/ and <tt/SYN-RECV/ are skipped too. -It is more reasonable default, I think. - -<p> -The second: format of UNIX sockets is different. It coincides -with tcp/udp. Though standard kernel still does not allow to -see write/read queues and peer address of connected UNIX sockets, -the patch doing this exists. - -<p> -The third: default is to dump only TCP sockets, rather than all of the types. - -<p> -The next: by default it does not resolve numeric host addresses (like <tt/ip/)! -Resolving is enabled with option <tt/-r/. Service names, usually stored -in local files, are resolved by default. Also, if service database -does not contain references to a port, <tt/ss/ queries system -<tt/rpcbind/. RPC services are prefixed with <tt/rpc./ -Resolution of services may be suppressed with option <tt/-n/. - -<p> -It does not accept "long" options (I dislike them, sorry). -So, address family is given with family identifier following -option <tt/-f/ to be algined to iproute2 conventions. -Mostly, it is to allow option parser to parse -addresses correctly, but as side effect it really limits dumping -to sockets supporting only given family. Option <tt/-A/ followed -by list of socket tables to dump is also supported. -Logically, id of socket table is different of _address_ family, which is -another point of incompatibility. So, id is one of -<tt/all/, <tt/tcp/, <tt/udp/, -<tt/raw/, <tt/inet/, <tt/unix/, <tt/packet/, <tt/netlink/. See? -Well, <tt/inet/ is just abbreviation for <tt/tcp|udp|raw/ -and it is not difficult to guess that <tt/packet/ allows -to look at packet sockets. Actually, there are also some other abbreviations, -f.e. <tt/unix_dgram/ selects only datagram UNIX sockets. - -<p> -The next: well, I still do not know. :-) - - - - -<sect>Time to talk about new functionality. - -<p>It is builtin filtering of socket lists. - -<sect1> Filtering by state. - -<p> -<tt/ss/ allows to filter socket states, using keywords -<tt/state/ and <tt/exclude/, followed by some state -identifier. - -<p> -State identifier are standard TCP state names (not listed, -they are useless for you if you already do not know them) -or abbreviations: - -<itemize> -<item><tt/all/ - for all the states -<item><tt/bucket/ - for TCP minisockets (<tt/TIME-WAIT|SYN-RECV/) -<item><tt/big/ - all except for minisockets -<item><tt/connected/ - not closed and not listening -<item><tt/synchronized/ - connected and not <tt/SYN-SENT/ -</itemize> - -<p> - F.e. to dump all tcp sockets except <tt/SYN-RECV/: - -<tscreen><verb> - ss exclude SYN-RECV -</verb></tscreen> - -<p> - If neither <tt/state/ nor <tt/exclude/ directives - are present, - state filter defaults to <tt/all/ with option <tt/-a/ - or to <tt/all/, - excluding listening, syn-recv, time-wait and closed sockets. - -<sect1> Filtering by addresses and ports. - -<p> -Option list may contain address/port filter. -It is boolean expression which consists of boolean operation -<tt/or/, <tt/and/, <tt/not/ and predicates. -Actually, all the flavors of names for boolean operations are eaten: -<tt/&/, <tt/&&/, <tt/|/, <tt/||/, <tt/!/, but do not forget -about special sense given to these symbols by unix shells and escape -them correctly, when used from command line. - -<p> -Predicates may be of the folowing kinds: - -<itemize> -<item>A. Address/port match, where address is checked against mask - and port is either wildcard or exact. It is one of: - -<tscreen><verb> - dst prefix:port - src prefix:port - src unix:STRING - src link:protocol:ifindex - src nl:channel:pid -</verb></tscreen> - - Both prefix and port may be absent or replaced with <tt/*/, - which means wildcard. UNIX socket use more powerful scheme - matching to socket names by shell wildcards. Also, prefixes - unix: and link: may be omitted, if address family is evident - from context (with option <tt/-x/ or with <tt/-f unix/ - or with <tt/unix/ keyword) - -<p> - F.e. - -<tscreen><verb> - dst 10.0.0.1 - dst 10.0.0.1: - dst 10.0.0.1/32: - dst 10.0.0.1:* -</verb></tscreen> - are equivalent and mean socket connected to - any port on host 10.0.0.1 - -<tscreen><verb> - dst 10.0.0.0/24:22 -</verb></tscreen> - sockets connected to port 22 on network - 10.0.0.0...255. - -<p> - Note that port separated of address with colon, which creates - troubles with IPv6 addresses. Generally, we interpret the last - colon as splitting port. To allow to give IPv6 addresses, - trick like used in IPv6 HTTP URLs may be used: - -<tscreen><verb> - dst [::1] -</verb></tscreen> - are sockets connected to ::1 on any port - -<p> - Another way is <tt/dst ::1/128/. / helps to understand that - colon is part of IPv6 address. - -<p> - Now we can add another alias for <tt/dst 10.0.0.1/: - <tt/dst [10.0.0.1]/. :-) - -<p> Address may be a DNS name. In this case all the addresses are looked - up (in all the address families, if it is not limited by option <tt/-f/ - or special address prefix <tt/inet:/, <tt/inet6/) and resulting - expression is <tt/or/ over all of them. - -<item> B. Port expressions: -<tscreen><verb> - dport >= :1024 - dport != :22 - sport < :32000 -</verb></tscreen> - etc. - - All the relations: <tt/</, <tt/>/, <tt/=/, <tt/>=/, <tt/=/, <tt/==/, - <tt/!=/, <tt/eq/, <tt/ge/, <tt/lt/, <tt/ne/... - Use variant which you like more, but not forget to escape special - characters when typing them in command line. :-) - - Note that port number syntactically coincides to the case A! - You may even add an IP address, but it will not participate - incomparison, except for <tt/==/ and <tt/!=/, which are equivalent - to corresponding predicates of type A. F.e. -<p> -<tt/dst 10.0.0.1:22/ - is equivalent to <tt/dport eq 10.0.0.1:22/ - and - <tt/not dst 10.0.0.1:22/ is equivalent to - <tt/dport neq 10.0.0.1:22/ - -<item>C. Keyword <tt/autobound/. It matches to sockets bound automatically - on local system. - -</itemize> - - -<sect> Examples - -<p> -<itemize> -<item>1. List all the tcp sockets in state <tt/FIN-WAIT-1/ for our apache - to network 193.233.7/24 and look at their timers: - -<tscreen><verb> - ss -o state fin-wait-1 \( sport = :http or sport = :https \) \ - dst 193.233.7/24 -</verb></tscreen> - - Oops, forgot to say that missing logical operation is - equivalent to <tt/and/. - -<item> 2. Well, now look at the rest... - -<tscreen><verb> - ss -o excl fin-wait-1 - ss state fin-wait-1 \( sport neq :http and sport neq :https \) \ - or not dst 193.233.7/24 -</verb></tscreen> - - Note that we have to do _two_ calls of ss to do this. - State match is always anded to address/port match. - The reason for this is purely technical: ss does fast skip of - not matching states before parsing addresses and I consider the - ability to skip fastly gobs of time-wait and syn-recv sockets - as more important than logical generality. - -<item> 3. So, let's look at all our sockets using autobound ports: - -<tscreen><verb> - ss -a -A all autobound -</verb></tscreen> - - -<item> 4. And eventually find all the local processes connected - to local X servers: - -<tscreen><verb> - ss -xp dst "/tmp/.X11-unix/*" -</verb></tscreen> - - Pardon, this does not work with current kernel, patching is required. - But we still can look at server side: - -<tscreen><verb> - ss -x src "/tmp/.X11-unix/*" -</verb></tscreen> - -</itemize> - - -<sect> Returning to ground: real manual - -<p> -<sect1> Command arguments - -<p> General format of arguments to <tt/ss/ is: - -<tscreen><verb> - ss [ OPTIONS ] [ STATE-FILTER ] [ ADDRESS-FILTER ] -</verb></tscreen> - -<sect2><tt/OPTIONS/ -<p> <tt/OPTIONS/ is list of single letter options, using common unix -conventions. - -<itemize> -<item><tt/-h/ - show help page -<item><tt/-?/ - the same, of course -<item><tt/-v/, <tt/-V/ - print version of <tt/ss/ and exit -<item><tt/-s/ - print summary statistics. This option does not parse -socket lists obtaining summary from various sources. It is useful -when amount of sockets is so huge that parsing <tt>/proc/net/tcp</tt> -is painful. -<item><tt/-D FILE/ - do not display anything, just dump raw information -about TCP sockets to <tt/FILE/ after applying filters. If <tt/FILE/ is <tt/-/ -<tt/stdout/ is used. -<item><tt/-F FILE/ - read continuation of filter from <tt/FILE/. -Each line of <tt/FILE/ is interpreted like single command line option. -If <tt/FILE/ is <tt/-/ <tt/stdin/ is used. -<item><tt/-r/ - try to resolve numeric address/ports -<item><tt/-n/ - do not try to resolve ports -<item><tt/-o/ - show some optional information, f.e. TCP timers -<item><tt/-i/ - show some infomration specific to TCP (RTO, congestion -window, slow start threshould etc.) -<item><tt/-e/ - show even more optional information -<item><tt/-m/ - show extended information on memory used by the socket. -It is available only with <tt/tcp_diag/ enabled. -<item><tt/-p/ - show list of processes owning the socket -<item><tt/-f FAMILY/ - default address family used for parsing addresses. - Also this option limits listing to sockets supporting - given address family. Currently the following families - are supported: <tt/unix/, <tt/inet/, <tt/inet6/, <tt/link/, - <tt/netlink/. -<item><tt/-4/ - alias for <tt/-f inet/ -<item><tt/-6/ - alias for <tt/-f inet6/ -<item><tt/-0/ - alias for <tt/-f link/ -<item><tt/-A LIST-OF-TABLES/ - list of socket tables to dump, separated - by commas. The following identifiers are understood: - <tt/all/, <tt/inet/, <tt/tcp/, <tt/udp/, <tt/raw/, - <tt/unix/, <tt/packet/, <tt/netlink/, <tt/unix_dgram/, - <tt/unix_stream/, <tt/packet_raw/, <tt/packet_dgram/. -<item><tt/-x/ - alias for <tt/-A unix/ -<item><tt/-t/ - alias for <tt/-A tcp/ -<item><tt/-u/ - alias for <tt/-A udp/ -<item><tt/-w/ - alias for <tt/-A raw/ -<item><tt/-a/ - show sockets of all the states. By default sockets - in states <tt/LISTEN/, <tt/TIME-WAIT/, <tt/SYN_RECV/ - and <tt/CLOSE/ are skipped. -<item><tt/-l/ - show only sockets in state <tt/LISTEN/ -</itemize> - -<sect2><tt/STATE-FILTER/ - -<p><tt/STATE-FILTER/ allows to construct arbitrary set of -states to match. Its syntax is sequence of keywords <tt/state/ -and <tt/exclude/ followed by identifier of state. -Available identifiers are: - -<p> -<itemize> -<item> All standard TCP states: <tt/established/, <tt/syn-sent/, -<tt/syn-recv/, <tt/fin-wait-1/, <tt/fin-wait-2/, <tt/time-wait/, -<tt/closed/, <tt/close-wait/, <tt/last-ack/, <tt/listen/ and <tt/closing/. - -<item><tt/all/ - for all the states -<item><tt/connected/ - all the states except for <tt/listen/ and <tt/closed/ -<item><tt/synchronized/ - all the <tt/connected/ states except for -<tt/syn-sent/ -<item><tt/bucket/ - states, which are maintained as minisockets, i.e. -<tt/time-wait/ and <tt/syn-recv/. -<item><tt/big/ - opposite to <tt/bucket/ -</itemize> - -<sect2><tt/ADDRESS_FILTER/ - -<p><tt/ADDRESS_FILTER/ is boolean expression with operations <tt/and/, <tt/or/ -and <tt/not/, which can be abbreviated in C style f.e. as <tt/&/, -<tt/&&/. - -<p> -Predicates check socket addresses, both local and remote. -There are the following kinds of predicates: - -<itemize> -<item> <tt/dst ADDRESS_PATTERN/ - matches remote address and port -<item> <tt/src ADDRESS_PATTERN/ - matches local address and port -<item> <tt/dport RELOP PORT/ - compares remote port to a number -<item> <tt/sport RELOP PORT/ - compares local port to a number -<item> <tt/autobound/ - checks that socket is bound to an ephemeral - port -</itemize> - -<p><tt/RELOP/ is some of <tt/<=/, <tt/>=/, <tt/==/ etc. -To make this more convinient for use in unix shell, alphabetic -FORTRAN-like notations <tt/le/, <tt/gt/ etc. are accepted as well. - -<p>The format and semantics of <tt/ADDRESS_PATTERN/ depends on address -family. - -<itemize> -<item><tt/inet/ - <tt/ADDRESS_PATTERN/ consists of IP prefix, optionally -followed by colon and port. If prefix or port part is absent or replaced -with <tt/*/, this means wildcard match. -<item><tt/inet6/ - The same as <tt/inet/, only prefix refers to an IPv6 -address. Unlike <tt/inet/ colon becomes ambiguous, so that <tt/ss/ allows -to use scheme, like used in URLs, where address is suppounded with -<tt/[/ ... <tt/]/. -<item><tt/unix/ - <tt/ADDRESS_PATTERN/ is shell-style wildcard. -<item><tt/packet/ - format looks like <tt/inet/, only interface index -stays instead of port and link layer protocol id instead of address. -<item><tt/netlink/ - format looks like <tt/inet/, only socket pid -stays instead of port and netlink channel instead of address. -</itemize> - -<p><tt/PORT/ is syntactically <tt/ADDRESS_PATTERN/ with wildcard -address part. Certainly, it is undefined for UNIX sockets. - -<sect1> Environment variables - -<p> -<tt/ss/ allows to change source of information using various -environment variables: - -<p> -<itemize> -<item> <tt/PROC_SLABINFO/ to override <tt>/proc/slabinfo</tt> -<item> <tt/PROC_NET_TCP/ to override <tt>/proc/net/tcp</tt> -<item> <tt/PROC_NET_UDP/ to override <tt>/proc/net/udp</tt> -<item> etc. -</itemize> - -<p> -Variable <tt/PROC_ROOT/ allows to change root of all the <tt>/proc/</tt> -hierarchy. - -<p> -Variable <tt/TCPDIAG_FILE/ prescribes to open a file instead of -requesting kernel to dump information about TCP sockets. - - -<p> This option is used mainly to investigate bug reports, -when dumps of files usually found in <tt>/proc/</tt> are recevied -by e-mail. - -<sect1> Output format - -<p>Six columns. The first is <tt/Netid/, it denotes socket type and -transport protocol, when it is ambiguous: <tt/tcp/, <tt/udp/, <tt/raw/, -<tt/u_str/ is abbreviation for <tt/unix_stream/, <tt/u_dgr/ for UNIX -datagram sockets, <tt/nl/ for netlink, <tt/p_raw/ and <tt/p_dgr/ for -raw and datagram packet sockets. This column is optional, it will -be hidden, if filter selects an unique netid. - -<p> -The second column is <tt/State/. Socket state is displayed here. -The names are standard TCP names, except for <tt/UNCONN/, which -cannot happen for TCP, but normal for not connected sockets -of another types. Again, this column can be hidden. - -<p> -Then two columns (<tt/Recv-Q/ and <tt/Send-Q/) showing amount of data -queued for receive and transmit. - -<p> -And the last two columns display local address and port of the socket -and its peer address, if the socket is connected. - -<p> -If options <tt/-o/, <tt/-e/ or <tt/-p/ were given, options are -displayed not in fixed positions but separated by spaces pairs: -<tt/option:value/. If value is not a single number, it is presented -as list of values, enclosed to <tt/(/ ... <tt/)/ and separated with -commas. F.e. - -<tscreen><verb> - timer:(keepalive,111min,0) -</verb></tscreen> -is typical format for TCP timer (option <tt/-o/). - -<tscreen><verb> - users:((X,113,3)) -</verb></tscreen> -is typical for list of users (option <tt/-p/). - - -<sect>Some numbers - -<p> -Well, let us use <tt/pidentd/ and a tool <tt/ibench/ to measure -its performance. It is 30 requests per second here. Nothing to test, -it is too slow. OK, let us patch pidentd with patch from directory -Patches. After this it handles about 4300 requests per second -and becomes handy tool to pollute socket tables with lots of timewait -buckets. - -<p> -So, each test starts from pollution tables with 30000 sockets -and then doing full dump of the table piped to wc and measuring -timings with time: - -<p>Results: - -<itemize> -<item> <tt/netstat -at/ - 15.6 seconds -<item> <tt/ss -atr/, but without <tt/tcp_diag/ - 5.4 seconds -<item> <tt/ss -atr/ with <tt/tcp_diag/ - 0.47 seconds -</itemize> - -No comments. Though one comment is necessary, most of time -without <tt/tcp_diag/ is wasted inside kernel with completely -blocked networking. More than 10 seconds, yes. <tt/tcp_diag/ -does the same work for 100 milliseconds of system time. - -</article> From 760e9830fcd08b064dca54326450a7248883f48d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger <stephen@networkplumber.org> Date: Fri, 29 Sep 2017 10:00:12 -0700 Subject: [PATCH 03/28] doc: remove outdated arpd documentation Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- doc/Makefile | 2 +- doc/arpd.sgml | 130 -------------------------------------------------- 2 files changed, 1 insertion(+), 131 deletions(-) delete mode 100644 doc/arpd.sgml diff --git a/doc/Makefile b/doc/Makefile index ea4c4ae1..d3fdc2b6 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,4 +1,4 @@ -PSFILES=ip-cref.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps tc-filters.ps +PSFILES=ip-cref.ps api-ip6-flowlabels.ps nstat.ps rtstat.ps tc-filters.ps # tc-cref.ps # api-rtnl.tex api-pmtudisc.tex api-news.tex # iki-netdev.ps iki-neighdst.ps diff --git a/doc/arpd.sgml b/doc/arpd.sgml deleted file mode 100644 index 0ab79c60..00000000 --- a/doc/arpd.sgml +++ /dev/null @@ -1,130 +0,0 @@ -<!doctype linuxdoc system> - -<article> - -<title>ARPD Daemon -<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/ -<date>some_negative_number, 20 Sep 2001 -<abstract> -<tt/arpd/ is daemon collecting gratuitous ARP information, saving -it on local disk and feeding it to kernel on demand to avoid -redundant broadcasting due to limited size of kernel ARP cache. -</abstract> - - -<p><bf/Description/ - -<p>The format of the command is: - -<tscreen><verb> - arpd OPTIONS [ INTERFACE [ INTERFACE ... ] ] -</verb></tscreen> - -<p> <tt/OPTIONS/ are: - -<itemize> - -<item><tt/-l/ - dump <tt/arpd/ database to stdout and exit. Output consists -of three columns: interface index, IP address and MAC address. -Negative entries for dead hosts are also shown, in this case MAC address -is replaced by word <tt/FAILED/ followed by colon and time when the fact -that host is dead was proven the last time. - -<item><tt/-f FILE/ - read and load <tt/arpd/ database from <tt/FILE/ -in text format similar dumped by option <tt/-l/. Exit after load, -probably listing resulting database, if option <tt/-l/ is also given. -If <tt/FILE/ is <tt/-/, <tt/stdin/ is read to get ARP table. - -<item><tt/-b DATABASE/ - location of database file. Default location is -<tt>/var/lib/arpd/arpd.db</tt>. - -<item><tt/-a NUMBER/ - <tt/arpd/ not only passively listens ARP on wire, but -also send brodcast queries itself. <tt/NUMBER/ is number of such queries -to make before destination is considered as dead. When <tt/arpd/ is started -as kernel helper (i.e. with <tt/app_solicit/ enabled in <tt/sysctl/ -or even with option <tt/-k/) without this option and still did not learn enough -information, you can observe 1 second gaps in service. Not fatal, but -not good. - -<item><tt/-k/ - suppress sending broadcast queries by kernel. It takes -sense together with option <tt/-a/. - -<item><tt/-n TIME/ - timeout of negative cache. When resolution fails <tt/arpd/ -suppresses further attempts to resolve for this period. It makes sense -only together with option <tt/-k/. This timeout should not be too much -longer than boot time of a typical host not supporting gratuitous ARP. -Default value is 60 seconds. - -<item><tt/-R RATE/ - maximal steady rate of broadcasts sent by <tt/arpd/ -in packets per second. Default value is 1. - -<item><tt/-B NUMBER/ - number of broadcasts sent by <tt/arpd/ back to back. -Default value is 3. Together with option <tt/-R/ this option allows -to police broadcasting not to exceed <tt/B+R*T/ over any interval -of time <tt/T/. - -</itemize> - -<p><tt/INTERFACE/ is name of networking inteface to watch. -If no interfaces given, <tt/arpd/ monitors all the interfaces. -In this case <tt/arpd/ does not adjust <tt/sysctl/ parameters, -it is supposed user does this himself after <tt/arpd/ is started. - - -<p> Signals - -<p> <tt/arpd/ exits gracefully syncing database and restoring adjusted -<tt/sysctl/ parameters, when receives <tt/SIGINT/ or <tt/SIGTERM/. -<tt/SIGHUP/ syncs database to disk. <tt/SIGUSR1/ sends some statistics -to <tt/syslog/. Effect of another signals is undefined, they may corrupt -database and leave <tt/sysctl/ parameters in an unpredictable state. - -<p> Note - -<p> In order to <tt/arpd/ be able to serve as ARP resolver, kernel must be -compiled with the option <tt/CONFIG_ARPD/ and, in the case when interface list -is not given on command line, variable <tt/app_solicit/ -on interfaces of interest should be set in <tt>/proc/sys/net/ipv4/neigh/*</tt>. -If this is not made <tt/arpd/ still collects gratuitous ARP information -in its database. - -<p> Examples - -<enum> -<item> Start <tt/arpd/ to collect gratuitous ARP, but not messing -with kernel functionality: - -<tscreen><verb> - arpd -b /var/tmp/arpd.db -</verb></tscreen> - -<item> Look at result after some time: - -<tscreen><verb> - killall arpd - arpd -l -b /var/tmp/arpd.db -</verb></tscreen> - -<item> To enable kernel helper, leaving leading role to kernel: - -<tscreen><verb> - arpd -b /var/tmp/arpd.db -a 1 eth0 eth1 -</verb></tscreen> - -<item> Completely replace kernel resolution on interfaces <tt/eth0/ -and <tt/eth1/. In this case kernel still does unicast probing to -validate entries, but all the broadcast activity is suppressed -and made under authority of <tt/arpd/: - -<tscreen><verb> - arpd -b /var/tmp/arpd.db -a 3 -k eth0 eth1 -</verb></tscreen> - -This is mode which <tt/arpd/ is supposed to work normally. -It is not default just to prevent occasional enabling of too aggressive -mode occasionally. - -</enum> - -</article> - From 3e83c095e81516275d3c5e773cf419608b287155 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger <stephen@networkplumber.org> Date: Fri, 29 Sep 2017 10:01:15 -0700 Subject: [PATCH 04/28] doc: remove outdated nstat/rtstat documentation Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- doc/Makefile | 2 +- doc/nstat.sgml | 110 ------------------------------------------------ doc/rtstat.sgml | 52 ----------------------- 3 files changed, 1 insertion(+), 163 deletions(-) delete mode 100644 doc/nstat.sgml delete mode 100644 doc/rtstat.sgml diff --git a/doc/Makefile b/doc/Makefile index d3fdc2b6..38be3d90 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,4 +1,4 @@ -PSFILES=ip-cref.ps api-ip6-flowlabels.ps nstat.ps rtstat.ps tc-filters.ps +PSFILES=ip-cref.ps api-ip6-flowlabels.ps tc-filters.ps # tc-cref.ps # api-rtnl.tex api-pmtudisc.tex api-news.tex # iki-netdev.ps iki-neighdst.ps diff --git a/doc/nstat.sgml b/doc/nstat.sgml deleted file mode 100644 index 48cacc69..00000000 --- a/doc/nstat.sgml +++ /dev/null @@ -1,110 +0,0 @@ -<!doctype linuxdoc system> - -<article> - -<title>NSTAT, IFSTAT and RTACCT Utilities -<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/ -<date>some_negative_number, 20 Sep 2001 -<abstract> -<tt/nstat/, <tt/ifstat/ and <tt/rtacct/ are simple tools helping -to monitor kernel snmp counters and network interface statistics. -</abstract> - -<p> These utilities are very similar, so that I describe -them simultaneously, using name <tt/Xstat/ in the places which apply -to all of them. - -<p>The format of the command is: - -<tscreen><verb> - Xstat [ OPTIONS ] [ PATTERN [ PATTERN ... ] ] -</verb></tscreen> - -<p> -<tt/PATTERN/ is shell style pattern, selecting identifier -of SNMP variables or interfaces to show. Variable is displayed -if one of patterns matches its name. If no patterns are given, -<tt/Xstat/ assumes that user wants to see all the variables. - -<p> <tt/OPTIONS/ is list of single letter options, using common unix -conventions. - -<itemize> -<item><tt/-h/ - show help page -<item><tt/-?/ - the same, of course -<item><tt/-v/, <tt/-V/ - print version of <tt/Xstat/ and exit -<item><tt/-z/ - dump zero counters too. By default they are not shown. -<item><tt/-a/ - dump absolute values of counters. By default <tt/Xstat/ - calculates increments since the previous use. -<item><tt/-s/ - do not update history, so that the next time you will - see counters including values accumulated to the moment - of this measurement too. -<item><tt/-n/ - do not display anything, only update history. -<item><tt/-r/ - reset history. -<item><tt/-d INTERVAL/ - <tt/Xstat/ is run in daemon mode collecting - statistics. <tt/INTERVAL/ is interval between measurements - in seconds. -<item><tt/-t INTERVAL/ - time interval to average rates. Default value - is 60 seconds. -<item><tt/-e/ - display extended information about errors (<tt/ifstat/ only). -</itemize> - -<p> -History is just dump saved in file <tt>/tmp/.Xstat.uUID</tt> -or in file given by environment variables <tt/NSTAT_HISTORY/, -<tt/IFSTAT_HISTORY/ and <tt/RTACCT_HISTORY/. -Each time when you use <tt/Xstat/ values there are updated. -If you use patterns, only the values which you _really_ see -are updated. If you want to skip an unintersting period, -use option <tt/-n/, or just output to <tt>/dev/null</tt>. - -<p> -<tt/Xstat/ understands when history is invalidated by system reboot -or source of information switched between different instances -of daemonic <tt/Xstat/ and kernel SNMP tables and does not -use invalid history. - -<p> Beware, <tt/Xstat/ will not produce sane output, -when many processes use it simultaneously. If several processes -under single user need this utility they should use environment -variables to put their history in safe places -or to use it with options <tt/-a -s/. - -<p> -Well, that's all. The utility is very simple, but nevertheless -very handy. - -<p> <bf/Output of XSTAT/ -<p> The first line of output is <tt/#/ followed by identifier -of source of information, it may be word <tt/kernel/, when <tt/Xstat/ -gets information from kernel or some dotted decimal number followed -by parameters, when it obtains information from running <tt/Xstat/ daemon. - -<p>In the case of <tt/nstat/ the rest of output consists of three columns: -SNMP MIB identifier, -its value (or increment since previous measurement) and average -rate of increase of the counter per second. <tt/ifstat/ outputs -interface name followed by pairs of counter and rate of its change. - -<p> <bf/Daemonic Xstat/ -<p> <tt/Xstat/ may be started as daemon by any user. This makes sense -to avoid wrapped counters and to obtain reasonable long counters -for large time. Also <tt/Xstat/ daemon calculates average rates. -For the first goal sampling interval (option <tt/-d/) may be large enough, -f.e. for gigabit rates byte counters overflow not more frequently than -each 40 seconds and you may select interval of 20 seconds. -From the other hand, when <tt/Xstat/ is used for estimating rates -interval should be less than averaging period (option <tt/-t/), otherwise -estimation loses in quality. - -Client <tt/Xstat/, before trying to get information from the kernel, -contacts daemon started by this user, then it tries system wide -daemon, which is supposed to be started by superuser. And only if -none of them replied it gets information from kernel. - -<p> <bf/Environment/ -<p> <tt/NSTAT_HISTORY/ - name of history file for <tt/nstat/. -<p> <tt/IFSTAT_HISTORY/ - name of history file for <tt/ifstat/. -<p> <tt/RTACCT_HISTORY/ - name of history file for <tt/rtacct/. - -</article> diff --git a/doc/rtstat.sgml b/doc/rtstat.sgml deleted file mode 100644 index 07391c39..00000000 --- a/doc/rtstat.sgml +++ /dev/null @@ -1,52 +0,0 @@ -<!doctype linuxdoc system> - -<article> - -<title>RTACCT Utility -<author>Robert Olsson -<date>some_negative_number, 20 Dec 2001 - -<p> -Here is some code for monitoring the route cache. For systems handling high -network load, servers, routers, firewalls etc the route cache and its garbage -collection is crucial. Linux has a solid implementation. - -<p> -The kernel patch (not required since linux-2.4.7) adds statistics counters -from route cache process into -/proc/net/rt_cache_stat. A companion user mode program presents the statistics -in a vmstat or iostat manner. The ratio between cache hits and misses gives -the flow length. - -<p> -Hopefully it can help understanding performance and DoS and other related -issues. - -<p> An URL where newer versions of this utility can be (probably) found -is ftp://robur.slu.se/pub/Linux/net-development/rt_cache_stat/ - - -<p><bf/Description/ - -<p>The format of the command is: - -<tscreen><verb> - rtstat [ OPTIONS ] -</verb></tscreen> - -<p> <tt/OPTIONS/ are: - -<itemize> - -<item><tt/-h/, <tt/-help/ - show help page and version of the utility. - -<item><tt/-i INTERVAL/ - interval between snapshots, default value is -2 seconds. - -<item><tt/-s NUMBER/ - whether to print header line. 0 inhibits header line, -1 prescribes to print it once and 2 (this is default setting) forces header -line each 20 lines. - -</itemize> - -</article> From fd1aa86741f4350a9b8e39744a97a87cd77b59c6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger <stephen@networkplumber.org> Date: Fri, 29 Sep 2017 10:02:31 -0700 Subject: [PATCH 05/28] ignore generated Config file Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 308aec6b..f8c3dfca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +# locally generated +Config static-syms.h config.* *.o From bbf2a3634ed7199eac40d3605dad283e6ca5cf56 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger <stephen@networkplumber.org> Date: Fri, 29 Sep 2017 10:05:09 -0700 Subject: [PATCH 06/28] doc: remove outdated tc-filters documentation Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- doc/tc-filters.tex | 514 --------------------------------------------- 1 file changed, 514 deletions(-) delete mode 100644 doc/tc-filters.tex diff --git a/doc/tc-filters.tex b/doc/tc-filters.tex deleted file mode 100644 index 54cc0c99..00000000 --- a/doc/tc-filters.tex +++ /dev/null @@ -1,514 +0,0 @@ -\documentclass[12pt,twoside]{article} - -\usepackage[hidelinks]{hyperref} % \url -\usepackage{booktabs} % nicer tabulars -\usepackage{fancyvrb} -\usepackage{fullpage} -\usepackage{float} - -\newcommand{\iface}{\textit} -\newcommand{\cmd}{\texttt} -\newcommand{\man}{\textit} -\newcommand{\qdisc}{\texttt} -\newcommand{\filter}{\texttt} - -\begin{document} -\title{QoS in Linux with TC and Filters} -\author{Phil Sutter (phil@nwl.cc)} -\date{January 2016} -\maketitle - -Standard practice when transmitting packets over a medium which may block (due -to congestion, e.g.) is to use a queue which temporarily holds these packets. In -Linux, this queueing approach is where QoS happens: A Queueing Discipline -(qdisc) holds multiple packet queues with different priorities for dequeueing to -the network driver. The classification (i.e. deciding which queue a packet -should go into) is typically done based on Type Of Service (IPv4) or Traffic -Class (IPv6) header fields but depending on qdisc implementation, might be -controlled by the user as well. - -Qdiscs come in two flavors, classful or classless. While classless qdiscs are -not as flexible as classful ones, they also require much less customizing. Often -it is enough to just attach them to an interface, without exact knowledge of -what is done internally. Classful qdiscs are the exact opposite: flexible in -application, they are often not even usable without insightful configuration. - -As the name implies, classful qdiscs provide configurable classes to sort -traffic into. In it's basic form, this is not much different than, say, the -classless \qdisc{pfifo\_fast} which holds three queues and classifies per -packet upon priority field. Though typically classes go beyond that by -supporting nesting and additional characteristics like e.g. maximum traffic -rate or quantum. - -When it comes to controlling the classification process, filters come into play. -They attach to the parent of a set of classes (i.e. either the qdisc itself or -a parent class) and specify how a packet (or it's associated flow) has to look -like in order to suit a given class. To overcome this simplification, it is -possible to attach multiple filters to the same parent, which then consults each -of them in row until the first one accepts the packet. - -Before getting into detail about what filters there are and how to use them, a -simple setup of a qdisc with classes is necessary: -\begin{figure}[H] -\begin{Verbatim} - .-------------------------------------------------------. - | | - | HTB | - | | - | .----------------------------------------------------.| - | | || - | | Class 1:1 || - | | || - | | .---------------..---------------..---------------.|| - | | | || || ||| - | | | Class 1:10 || Class 1:20 || Class 1:30 ||| - | | | || || ||| - | | | .------------.|| .------------.|| .------------.||| - | | | | ||| | ||| | |||| - | | | | fq_codel ||| | fq_codel ||| | fq_codel |||| - | | | | ||| | ||| | |||| - | | | '------------'|| '------------'|| '------------'||| - | | '---------------''---------------''---------------'|| - | '----------------------------------------------------'| - '-------------------------------------------------------' -\end{Verbatim} -\end{figure} -\noindent -The following commands establish the basic setup shown: -\begin{Verbatim} -(1) # tc qdisc replace dev eth0 root handle 1: htb default 30 -(2) # tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit -(3) # alias tclass='tc class add dev eth0 parent 1:1' -(4) # tclass classid 1:10 htb rate 1mbit ceil 20mbit prio 1 -(4) # tclass classid 1:20 htb rate 90mbit ceil 95mbit prio 2 -(4) # tclass classid 1:30 htb rate 1mbit ceil 95mbit prio 3 -(5) # tc qdisc add dev eth0 parent 1:10 fq_codel -(5) # tc qdisc add dev eth0 parent 1:20 fq_codel -(5) # tc qdisc add dev eth0 parent 1:30 fq_codel -\end{Verbatim} -A little explanation for the unfamiliar reader: -\begin{enumerate} -\item Replace the root qdisc of \iface{eth0} by an instance of \qdisc{HTB}. - Specifying the handle is necessary so it can be referenced in consecutive - calls to \cmd{tc}. The default class for unclassified traffic is set to - 30. -\item Create a single top-level class with handle 1:1 which limits the total - bandwidth allowed to 95mbit/s. It is assumed that \iface{eth0} is a 100mbit/s link, - staying a little below that helps to keep the main point of enqueueing in - the qdisc layer instead of the interface hardware queue or at another - bottleneck in the network. -\item Define an alias for the common part of the remaining three calls in order - to improve readability. This means all remaining classes are attached to the - common parent class from (2). -\item Create three child classes for different uses: Class 1:10 has highest - priority but is tightly limited in bandwidth - fine for interactive - connections. Class 1:20 has mid priority and high guaranteed bandwidth, for - high priority bulk traffic. Finally, there's the default class 1:30 with - lowest priority, low guaranteed bandwidth and the ability to use the full - link in case it's unused otherwise. This should be fine for uninteresting - traffic not explicitly taken care of. -\item Attach a leaf qdisc to each of the child classes created in (4). Since - \qdisc{HTB} by default attaches \qdisc{pfifo} as leaf qdisc, this step is optional. Still, - the fairness between different flows provided by the classless \qdisc{fq\_codel} is - worth the effort. -\end{enumerate} -More information about the qdiscs and fine-tuning parameters can be found in -\man{tc-htb(8)} and \man{tc-fq\_codel(8)}. - -Without any additional setup done, now all traffic leaving \iface{eth0} is shaped to -95mbit/s and directed through class 1:30. This can be verified by looking at the -\texttt{Sent} field of the class statistics printed via \cmd{tc -s class show dev eth0}: -Only the root class 1:1 and it's child 1:30 should show any traffic. - - -\section*{Finally time to start filtering!} - -Let's begin with a simple one, i.e. reestablishing what \qdisc{pfifo\_fast} did -automatically based on TOS/Priority field. Linux internally translates the -header field into the priority field of struct skbuff, which -\qdisc{pfifo\_fast} uses for -classification. \man{tc-prio(8)} contains a table listing the priority (and -ultimately, \qdisc{pfifo\_fast} queue index) each TOS value is being translated into. -Here is a shorter version: -\begin{center} -\begin{tabular}{lll} -TOS Values & Linux Priority (Number) & Queue Index \\ -\midrule -0x0 - 0x6 & Best Effort (0) & 1 \\ -0x8 - 0xe & Bulk (2) & 2 \\ -0x10 - 0x16 & Interactive (6) & 0 \\ -0x18 - 0x1e & Interactive Bulk (4) & 1 \\ -\end{tabular} -\end{center} -Using the \filter{basic} filter, it is possible to match packets based on that skbuff -field, which has the added benefit of being IP version agnostic. Since the -\qdisc{HTB} setup above defaults to class ID 1:30, the Bulk priority can be -ignored. The \filter{basic} filter allows to combine matches, therefore we get along -with only two filters: -\begin{Verbatim} -# tc filter add dev eth0 parent 1: basic \ - match 'meta(priority eq 6)' classid 1:10 -# tc filter add dev eth0 parent 1: basic \ - match 'meta(priority eq 0)' \ - or 'meta(priority eq 4)' classid 1:20 -\end{Verbatim} -A detailed description of the \filter{basic} filter and the ematch syntax it uses can be -found in \man{tc-basic(8)} and \man{tc-ematch(8)}. - -Obviously, this first example cries for optimization. A simple one would be to -just change the default class from 1:30 to 1:20, so filters are only needed for -Bulk and Interactive priorities: -\begin{Verbatim} -# tc filter add dev eth0 parent 1: basic \ - match 'meta(priority eq 6)' classid 1:10 -# tc filter add dev eth0 parent 1: basic \ - match 'meta(priority eq 2)' classid 1:20 -\end{Verbatim} -Given that class IDs are random, choosing them wisely allows for a direct -mapping. So first, recreate the qdisc and classes configuration: -\begin{Verbatim} -# tc qdisc replace dev eth0 root handle 1: htb default 10 -# tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit -# alias tclass='tc class add dev eth0 parent 1:1' -# tclass classid 1:16 htb rate 1mbit ceil 20mbit prio 1 -# tclass classid 1:10 htb rate 90mbit ceil 95mbit prio 2 -# tclass classid 1:12 htb rate 1mbit ceil 95mbit prio 3 -# tc qdisc add dev eth0 parent 1:16 fq_codel -# tc qdisc add dev eth0 parent 1:10 fq_codel -# tc qdisc add dev eth0 parent 1:12 fq_codel -\end{Verbatim} -This is basically identical to above, but with changed leaf class IDs and the -second priority class being the default. Using the \filter{flow} filter with it's \texttt{map} -functionality, a single filter command is enough: -\begin{Verbatim} -# tc filter add dev eth0 parent 1: handle 0x1337 flow \ - map key priority baseclass 1:10 -\end{Verbatim} -The \filter{flow} filter now uses the priority value to construct a destination class ID -by adding it to the value of \texttt{baseclass}. While this works for priority values of -0, 2 and 6, it will result in non-existent class ID 1:14 for Interactive Bulk -traffic. In that case, the \qdisc{HTB} default applies so that traffic goes into class -ID 1:10 just as intended. Please note that specifying a handle is a mandatory -requirement by the \filter{flow} filter, although I didn't see where one would use that -later. For more information about \filter{flow}, see \man{tc-flow(8)}. - -While \filter{flow} and \filter{basic} filters are relatively easy to apply and understand, they -are as well quite limited to their intended purpose. A more flexible option is -the \filter{u32} filter, which allows to match on arbitrary parts of the packet data - -yet only on that, not any meta data associated to it by the kernel (with the -exception of firewall mark value). So in order to continue this little -exercise with \filter{u32}, we have to base classification directly upon the actual TOS -value. An intuitive attempt might look like this: -\begin{Verbatim} -# alias tcfilter='tc filter add dev eth0 parent 1:' -# tcfilter u32 match ip dsfield 0x10 0x1e classid 1:16 -# tcfilter u32 match ip dsfield 0x12 0x1e classid 1:16 -# tcfilter u32 match ip dsfield 0x14 0x1e classid 1:16 -# tcfilter u32 match ip dsfield 0x16 0x1e classid 1:16 -# tcfilter u32 match ip dsfield 0x8 0x1e classid 1:12 -# tcfilter u32 match ip dsfield 0xa 0x1e classid 1:12 -# tcfilter u32 match ip dsfield 0xc 0x1e classid 1:12 -# tcfilter u32 match ip dsfield 0xe 0x1e classid 1:12 -\end{Verbatim} -The obvious drawback here is the amount of filters needed. And without the -default class, eight more filters would be necessary. This also has performance -implications: A packet with TOS value 0xe will be checked eight times in total -in order to determine it's destination class. While there's not much to be done -about the number of filters, at least the performance problem can be eliminated -by using \filter{u32}'s hash table support: -\begin{Verbatim} -# tc filter add dev eth0 parent 1: prio 99 handle 1: u32 divisor 16 -\end{Verbatim} -This creates a hash table with 16 buckets. The table size is arbitrary, but not -random: Since the first bit of the TOS field is not interesting, it can be -ignored and therefore the range of values to consider is just [0;15], i.e. a -number of 16 different values. The next step is to populate the hash table: -\begin{Verbatim} -# alias tcfilter='tc filter add dev eth0 parent 1: prio 99' -# tcfilter u32 match u8 0 0 ht 1:0: classid 1:16 -# tcfilter u32 match u8 0 0 ht 1:1: classid 1:16 -# tcfilter u32 match u8 0 0 ht 1:2: classid 1:16 -# tcfilter u32 match u8 0 0 ht 1:3: classid 1:16 -# tcfilter u32 match u8 0 0 ht 1:4: classid 1:12 -# tcfilter u32 match u8 0 0 ht 1:5: classid 1:12 -# tcfilter u32 match u8 0 0 ht 1:6: classid 1:12 -# tcfilter u32 match u8 0 0 ht 1:7: classid 1:12 -# tcfilter u32 match u8 0 0 ht 1:8: classid 1:16 -# tcfilter u32 match u8 0 0 ht 1:9: classid 1:16 -# tcfilter u32 match u8 0 0 ht 1:a: classid 1:16 -# tcfilter u32 match u8 0 0 ht 1:b: classid 1:16 -# tcfilter u32 match u8 0 0 ht 1:c: classid 1:10 -# tcfilter u32 match u8 0 0 ht 1:d: classid 1:10 -# tcfilter u32 match u8 0 0 ht 1:e: classid 1:10 -# tcfilter u32 match u8 0 0 ht 1:f: classid 1:10 -\end{Verbatim} -The parameter \texttt{ht} denotes the hash table and bucket the filter should be added -to. Since the first TOS bit is ignored, it's value has to be divided by two in -order to get to the bucket it maps to. E.g. a TOS value of 0x10 will therefore -map to bucket 0x8. For the sake of completeness, all possible values are mapped -and therefore a configurable default class is not required. Note that the used -match expression is not necessary, but mandatory. Therefore anything that -matches any packet will suffice. Finally, a filter which links to the defined -hash table is needed: -\begin{Verbatim} -# tc filter add dev eth0 parent 1: prio 1 protocol ip u32 \ - link 1: hashkey mask 0x001e0000 match u8 0 0 -\end{Verbatim} -Here again, the actual match statement is not necessary, but syntactically -required. All the magic lies within the \texttt{hashkey} parameter, which defines which -part of the packet should be used directly as hash key. Here's a drawing of the -first four bytes of the IPv4 header, with the area selected by \texttt{hashkey mask} -highlighted: -\begin{figure}[H] -\begin{Verbatim} - 0 1 2 3 - .-----------------------------------------------------------------. - | | | ######## | | | - | Version| IHL | #DSCP### | ECN| Total Length | - | | | ######## | | | - '-----------------------------------------------------------------' -\end{Verbatim} -\end{figure} -\noindent -This may look confusing at first, but keep in mind that bit- as well as -byte-ordering here is LSB while the mask value is written in MSB we humans use. -Therefore reading the mask is done like so, starting from left: -\begin{enumerate} -\item Skip the first byte (which contains Version and IHL fields). -\item Skip the lowest bit of the second byte (0x1e is even). -\item Mark the four following bits (0x1e is 11110 in binary). -\item Skip the remaining three bits of the second byte as well as the remaining two - bytes. -\end{enumerate} -Before doing the lookup, the kernel right-shifts the masked value by the amount -of zero-bits in \texttt{mask}, which implicitly also does the division by two which the -hash table depends on. With this setup, every packet has to pass exactly two -filters to be classified. Note that this filter is limited to IPv4 packets: Due -to the related Traffic Class field being at a different offset in the packet, it -would not work for IPv6. To use the same setup for IPv6 as well, a second -entry-level filter is necessary: -\begin{Verbatim} -# tc filter add dev eth0 parent 1: prio 2 protocol ipv6 u32 \ - link 1: hashkey mask 0x01e00000 match u8 0 0 -\end{Verbatim} -For illustration purposes, here again is a drawing of the first four bytes of -the IPv6 header, again with masked area highlighted: -\begin{figure}[H] -\begin{Verbatim} - 0 1 2 3 - .-----------------------------------------------------------------. - | | ######## | | - | Version| #Traffic Class| Flow Label | - | | ######## | | - '-----------------------------------------------------------------' -\end{Verbatim} -\end{figure} -\noindent -Reading the mask value is analogous to IPv4 with the added complexity that -Traffic Class spans over two bytes. Yet, for comparison there's a simple trick: -IPv6 has the interesting field shifted by four bits to the left, and the new -mask's value is shifted by the same amount. For further information about -\filter{u32} and what can be done with it, consult it's man page -\man{tc-u32(8)}. - -Of course, the kernel provides many more filters than just \filter{basic}, -\filter{flow} and \filter{u32} which have been presented above. As of now, the -remaining ones are: -\begin{description} -\item[bpf] - Filtering using Berkeley Packet Filter programs. The program's return - code determines the packet's destination class ID. - -\item[cgroup] - Filter packets based on control groups. This is only useful for packets - originating from the local host, as control groups only exist in that - scope. - -\item[flower] - An extended variant of the flow filter. - -\item[fw] - Matches on firewall mark values previously assigned to the packet by - netfilter (or a filter action, see below for details). This allows to - export the classification algorithm into netfilter, which is very - convenient if appropriate rules exist on the same system in there - already. - -\item[route] - Filter packets based on matching routing table entry. Basically - equivalent to the \texttt{fw} filter above, to make use of an already existing - extensive routing table setup. - -\item[rsvp, rsvp6] - Implementation of the Resource Reservation Protocol in Linux, to react - upon requests sent by an RSVP daemon. - -\item[tcindex] - Match packets based on tcindex value, which is usually set by the dsmark - qdisc. This is part of an approach to support Differentiated Services in - Linux, which is another topic on it's own. -\end{description} - - -\section*{Filter Actions} - -The tc filter framework provides the infrastructure to another extensible set of -tools as well, namely tc actions. As the name suggests, they allow to do things -with packets (or associated data). (The list of) Actions are part of a given -filter. If it matches, each action it contains is executed in order before -returning the classification result. Since the action has direct access to the -latter, it is in theory possible for an action to react upon or even change the -filtering result - as long as the packet matched, of course. Yet none of the -currently in-tree actions make use of this. - -The Generic Actions framework originally evolved out of the filters' ability to -police traffic to a given maximum bandwidth. One common use case for that is to -limit ingress traffic, dropping packets which exceed the threshold. A classic -setup example is like so: -\begin{Verbatim} -# tc qdisc add dev eth0 handle ffff: ingress -# tc filter add dev eth0 parent ffff: u32 \ - match u32 0 0 - police rate 1mbit burst 100k -\end{Verbatim} -The ingress qdisc is not a real one, but merely a point of reference for filters -to attach to which should get applied to incoming traffic. The \filter{u32} filter added -above matches on any packet and therefore limits the total incoming bandwidth to -1mbit/s, allowing bursts of up to 100kbytes. Using the new syntax, the filter -command changes slightly: -\begin{Verbatim} -# tc filter add dev eth0 parent ffff: u32 \ - match u32 0 0 \ - action police rate 1mbit burst 100k -\end{Verbatim} -The important detail is that this syntax allows to define multiple actions. -E.g. for testing purposes, it is possible to redirect exceeding traffic to the -loopback interface instead of dropping it: -\begin{Verbatim} -# tc filter add dev eth0 parent ffff: u32 \ - match u32 0 0 \ - action police rate 1mbit burst 100k conform-exceed pipe \ - action mirred egress redirect dev lo -\end{Verbatim} -The added parameter \texttt{conform-exceed pipe} tells the police action to allow for -further actions to handle the exceeding packet. - -Apart from \texttt{police} and \texttt{mirred} actions, there are a few more. Here's a full -list of the currently implemented ones: -\begin{description} -\item[bpf] - Apply a Berkeley Packet Filter program to the packet. - -\item[connmark] - Set the packet's firewall mark to that of it's connection. This works by - searching the conntrack table for a matching entry. If found, the mark - is restored. - -\item[csum] - Trigger recalculation of packet checksums. The supported protocols are: - IPv4, ICMP, IGMP, TCP, UDP and UDPLite. - -\item[ipt] - Pass the packet to an iptables target. This allows to use iptables - extensions directly instead of having to go the extra mile via setting - an arbitrary firewall mark and matching on that from within netfilter. - -\item[mirred] - Mirror or redirect packets. This is often combined with the ifb pseudo - device to share a common QoS setup between multiple interfaces or even - ingress traffic. - -\item[nat] - Perform stateless Native Address Translation. This is certainly not - complete and therefore inferior to NAT using iptables: Although the - kernel module decides between TCP, UDP and ICMP traffic, it does not - handle typical problematic protocols such as active FTP or SIP. - -\item[pedit] - Generic packet editing. This allows to alter arbitrary bytes of the - packet, either by specifying an offset into the packet or by naming a - packet header and field name to change. Currently, the latter is - implemented only for IPv4 yet. - -\item[police] - Apply a bandwidth rate limiting policy. Packets exceeding it are dropped - by default, but may optionally be handled differently. - -\item[simple] - This is rather an example than real action. All it does is print a - user-defined string together with a packet counter. Useful maybe for - debugging when filter statistics are not available or too complicated. - -\item[skbedit] - Edit associated packet data, supports changing queue mapping, priority - field and firewall mark value. - -\item[vlan] - Add/remove a VLAN header to/from the packet. This might serve as - alternative to using 802.1Q pseudo-interfaces in combination with - routing rules when e.g. packets for a given destination need to be - encapsulated. -\end{description} - - -\section*{Intermediate Functional Block} - -The Intermediate Functional Block (\texttt{ifb}) pseudo network interface acts as a QoS -concentrator for multiple different sources of traffic. Packets from or to other -interfaces have to be redirected to it using the \texttt{mirred} action in order to be -handled, regularly routed traffic will be dropped. This way, a single stack of -qdiscs, classes and filters can be shared between multiple interfaces. - -Here's a simple example to feed incoming traffic from multiple interfaces -through a Stochastic Fairness Queue (\qdisc{sfq}): -\begin{Verbatim} -(1) # modprobe ifb -(2) # ip link set ifb0 up -(3) # tc qdisc add dev ifb0 root sfq -\end{Verbatim} -The first step is to load the \texttt{ifb} kernel module (1). By default, this will -create two ifb devices: \iface{ifb0} and \iface{ifb1}. After setting -\iface{ifb0} up in (2), the root -qdisc is replaced by \qdisc{sfq} in (3). Finally, one can start redirecting ingress -traffic to \iface{ifb0}, e.g. from \iface{eth0}: -\begin{Verbatim} -# tc qdisc add dev eth0 handle ffff: ingress -# tc filter add dev eth0 parent ffff: u32 \ - match u32 0 0 \ - action mirred egress redirect dev ifb0 -\end{Verbatim} -The same can be done for other interfaces, just replacing \iface{eth0} in the two -commands above. One thing to keep in mind here is the asymmetrical routing this -creates within the host doing the QoS: Incoming packets enter the system via -\iface{ifb0}, while corresponding replies leave directly via \iface{eth0}. This can be observed -using \cmd{tcpdump} on \iface{ifb0}, which shows the input part of the traffic only. What's -more confusing is that \cmd{tcpdump} on \iface{eth0} shows both incoming and outgoing traffic, -but the redirection is still effective - a simple prove is setting -\iface{ifb0} down, -which will interrupt the communication. Obviously \cmd{tcpdump} catches the packets to -dump before they enter the ingress qdisc, which is why it sees them while the -kernel itself doesn't. - - -\section*{Conclusion} - -Once the steep learning curve has been mastered, the conglomerate of (classful) -qdiscs, filters and actions provides a highly sophisticated and flexible -infrastructure to perform QoS, which plays nicely along with routing and -firewalling setups. - - -\section*{Further Reading} - -A good starting point for novice users and experienced ones diving into unknown -areas is the extensive HOWTO at \url{http://lartc.org}. The iproute2 package ships -some examples (usually in /usr/share/doc/, depending on distribution) as well as -man pages for \cmd{tc} in general, qdiscs and filters. The latter have been added -just recently though, so if your distribution does not ship iproute2 version -4.3.0 yet, these are not in there. Apart from that, the internet is a spring of -HOWTOs and scripts people wrote - though these should be taken with a grain of -salt: The complexity of the matter often leads to copying others' solutions -without much validation, which allows for less optimal or even obsolete -implementations to survive much longer than desired. - -\end{document} From a4cda980bb21d140222f8cb918913d5bbfbc7083 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger <stephen@networkplumber.org> Date: Fri, 29 Sep 2017 10:06:50 -0700 Subject: [PATCH 07/28] doc: remove outdated IPv6 flow label document Not updated since Linux 2.2 Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- doc/Makefile | 6 +- doc/Plan | 16 -- doc/api-ip6-flowlabels.tex | 429 ------------------------------------- 3 files changed, 1 insertion(+), 450 deletions(-) delete mode 100644 doc/Plan delete mode 100644 doc/api-ip6-flowlabels.tex diff --git a/doc/Makefile b/doc/Makefile index 38be3d90..3bfcdc2b 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,8 +1,4 @@ -PSFILES=ip-cref.ps api-ip6-flowlabels.ps tc-filters.ps -# tc-cref.ps -# api-rtnl.tex api-pmtudisc.tex api-news.tex -# iki-netdev.ps iki-neighdst.ps - +PSFILES=ip-cref.ps LATEX=latex DVIPS=dvips diff --git a/doc/Plan b/doc/Plan deleted file mode 100644 index 55f478ea..00000000 --- a/doc/Plan +++ /dev/null @@ -1,16 +0,0 @@ -Partially finished work. - -1. User Reference manuals. -1.1 IP Command reference (ip-cref.tex, published) -1.2 TC Command reference (tc-cref.tex) -1.3 IP tunnels (ip-tunnels.tex, published) - -2. Linux-2.2 Networking API -2.1 RTNETLINK (api-rtnl.tex) -2.2 Path MTU Discovery (api-pmtudisc.tex) -2.3 IPv6 Flow Labels (api-ip6-flowlabels.tex, published) -2.4 Miscellaneous extensions (api-misc.tex) - -3. Linux-2.2 Networking Intra-Kernel Interfaces -3.1 NetDev --- Networking Devices and netdev... (iki-netdev.tex) -3.2 Neighbour cache and destination cache. (iki-neighdst.tex) diff --git a/doc/api-ip6-flowlabels.tex b/doc/api-ip6-flowlabels.tex deleted file mode 100644 index aa34e947..00000000 --- a/doc/api-ip6-flowlabels.tex +++ /dev/null @@ -1,429 +0,0 @@ -\documentstyle[12pt,twoside]{article} -\def\TITLE{IPv6 Flow Labels} -\input preamble -\begin{center} -\Large\bf IPv6 Flow Labels in Linux-2.2. -\end{center} - - -\begin{center} -{ \large Alexey~N.~Kuznetsov } \\ -\em Institute for Nuclear Research, Moscow \\ -\verb|kuznet@ms2.inr.ac.ru| \\ -\rm April 11, 1999 -\end{center} - -\vspace{5mm} - -\tableofcontents - -\section{Introduction.} - -Every IPv6 packet carries 28 bits of flow information. RFC2460 splits -these bits to two fields: 8 bits of traffic class (or DS field, if you -prefer this term) and 20 bits of flow label. Currently there exist -no well-defined API to manage IPv6 flow information. In this document -I describe an attempt to design the API for Linux-2.2 IPv6 stack. - -\vskip 1mm - -The API must solve the following tasks: - -\begin{enumerate} - -\item To allow user to set traffic class bits. - -\item To allow user to read traffic class bits of received packets. -This feature is not so useful as the first one, however it will be -necessary f.e.\ to implement ECN [RFC2481] for datagram oriented services -or to implement receiver side of SRP or another end-to-end protocol -using traffic class bits. - -\item To assign flow labels to packets sent by user. - -\item To get flow labels of received packets. I do not know -any applications of this feature, but it is possible that receiver will -want to use flow labels to distinguish sub-flows. - -\item To allocate flow labels in the way, compliant to RFC2460. Namely: - -\begin{itemize} -\item -Flow labels must be uniformly distributed (pseudo-)random numbers, -so that any subset of 20 bits can be used as hash key. - -\item -Flows with coinciding source address and flow label must have identical -destination address and not-fragmentable extensions headers (i.e.\ -hop by hop options and all the headers up to and including routing header, -if it is present.) - -\begin{NB} -There is a hole in specs: some hop-by-hop options can be -defined only on per-packet base (f.e.\ jumbo payload option). -Essentially, it means that such options cannot present in packets -with flow labels. -\end{NB} -\begin{NB} -NB notes here and below reflect only my personal opinion, -they should be read with smile or should not be read at all :-). -\end{NB} - - -\item -Flow labels have finite lifetime and source is not allowed to reuse -flow label for another flow within the maximal lifetime has expired, -so that intermediate nodes will be able to invalidate flow state before -the label is taken over by another flow. -Flow state, including lifetime, is propagated along datagram path -by some application specific methods -(f.e.\ in RSVP PATH messages or in some hop-by-hop option). - - -\end{itemize} - -\end{enumerate} - -\section{Sending/receiving flow information.} - -\paragraph{Discussion.} -\addcontentsline{toc}{subsection}{Discussion} -It was proposed (Where? I do not remember any explicit statement) -to solve the first four tasks using -\verb|sin6_flowinfo| field added to \verb|struct| \verb|sockaddr_in6| -(see RFC2553). - -\begin{NB} - This method is difficult to consider as reasonable, because it - puts additional overhead to all the services, despite of only - very small subset of them (none, to be more exact) really use it. - It contradicts both to IETF spirit and the letter. Before RFC2553 - one justification existed, IPv6 address alignment left 4 byte - hole in \verb|sockaddr_in6| in any case. Now it has no justification. -\end{NB} - -We have two problems with this method. The first one is common for all OSes: -if \verb|recvmsg()| initializes \verb|sin6_flowinfo| to flow info -of received packet, we loose one very important property of BSD socket API, -namely, we are not allowed to use received address for reply directly -and have to mangle it, even if we are not interested in flowinfo subtleties. - -\begin{NB} - RFC2553 adds new requirement: to clear \verb|sin6_flowinfo|. - Certainly, it is not solution but rather attempt to force applications - to make unnecessary work. Well, as usually, one mistake in design - is followed by attempts to patch the hole and more mistakes... -\end{NB} - -Another problem is Linux specific. Historically Linux IPv6 did not -initialize \verb|sin6_flowinfo| at all, so that, if kernel does not -support flow labels, this field is not zero, but a random number. -Some applications also did not take care about it. - -\begin{NB} -Following RFC2553 such applications can be considered as broken, -but I still think that they are right: clearing all the address -before filling known fields is robust but stupid solution. -Useless wasting CPU cycles and -memory bandwidth is not a good idea. Such patches are acceptable -as temporary hacks, but not as standard of the future. -\end{NB} - - -\paragraph{Implementation.} -\addcontentsline{toc}{subsection}{Implementation} -By default Linux IPv6 does not read \verb|sin6_flowinfo| field -assuming that common applications are not obliged to initialize it -and are permitted to consider it as pure alignment padding. -In order to tell kernel that application -is aware of this field, it is necessary to set socket option -\verb|IPV6_FLOWINFO_SEND|. - -\begin{verbatim} - int on = 1; - setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO_SEND, - (void*)&on, sizeof(on)); -\end{verbatim} - -Linux kernel never fills \verb|sin6_flowinfo| field, when passing -message to user space, though the kernels which support flow labels -initialize it to zero. If user wants to get received flowinfo, he -will set option \verb|IPV6_FLOWINFO| and after this he will receive -flowinfo as ancillary data object of type \verb|IPV6_FLOWINFO| -(cf.\ RFC2292). - -\begin{verbatim} - int on = 1; - setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO, (void*)&on, sizeof(on)); -\end{verbatim} - -Flowinfo received and latched by a connected TCP socket also may be fetched -with \verb|getsockopt()| \verb|IPV6_PKTOPTIONS| together with -another optional information. - -Besides that, in the spirit of RFC2292 the option \verb|IPV6_FLOWINFO| -may be used as alternative way to send flowinfo with \verb|sendmsg()| or -to latch it with \verb|IPV6_PKTOPTIONS|. - -\paragraph{Note about IPv6 options and destination address.} -\addcontentsline{toc}{subsection}{IPv6 options and destination address} -If \verb|sin6_flowinfo| does contain not zero flow label, -destination address in \verb|sin6_addr| and non-fragmentable -extension headers are ignored. Instead, kernel uses the values -cached at flow setup (see below). However, for connected sockets -kernel prefers the values set at connection time. - -\paragraph{Example.} -\addcontentsline{toc}{subsection}{Example} -After setting socket option \verb|IPV6_FLOWINFO| -flowlabel and DS field are received as ancillary data object -of type \verb|IPV6_FLOWINFO| and level \verb|SOL_IPV6|. -In the cases when it is convenient to use \verb|recvfrom(2)|, -it is possible to replace library variant with your own one, -sort of: - -\begin{verbatim} -#include <sys/socket.h> -#include <netinet/in6.h> - -size_t recvfrom(int fd, char *buf, size_t len, int flags, - struct sockaddr *addr, int *addrlen) -{ - size_t cc; - char cbuf[128]; - struct cmsghdr *c; - struct iovec iov = { buf, len }; - struct msghdr msg = { addr, *addrlen, - &iov, 1, - cbuf, sizeof(cbuf), - 0 }; - - cc = recvmsg(fd, &msg, flags); - if (cc < 0) - return cc; - ((struct sockaddr_in6*)addr)->sin6_flowinfo = 0; - *addrlen = msg.msg_namelen; - for (c=CMSG_FIRSTHDR(&msg); c; c = CMSG_NEXTHDR(&msg, c)) { - if (c->cmsg_level != SOL_IPV6 || - c->cmsg_type != IPV6_FLOWINFO) - continue; - ((struct sockaddr_in6*)addr)->sin6_flowinfo = *(__u32*)CMSG_DATA(c); - } - return cc; -} -\end{verbatim} - - - -\section{Flow label management.} - -\paragraph{Discussion.} -\addcontentsline{toc}{subsection}{Discussion} -Requirements of RFC2460 are pretty tough. Particularly, lifetimes -longer than boot time require to store allocated labels at stable -storage, so that the full implementation necessarily includes user space flow -label manager. There are at least three different approaches: - -\begin{enumerate} -\item {\bf ``Cooperative''. } We could leave flow label allocation wholly -to user space. When user needs label he requests manager directly. The approach -is valid, but as any ``cooperative'' approach it suffers of security problems. - -\begin{NB} -One idea is to disallow not privileged user to allocate flow -labels, but instead to pass the socket to manager via \verb|SCM_RIGHTS| -control message, so that it will allocate label and assign it to socket -itself. Hmm... the idea is interesting. -\end{NB} - -\item {\bf ``Indirect''.} Kernel redirects requests to user level daemon -and does not install label until the daemon acknowledged the request. -The approach is the most promising, it is especially pleasant to recognize -parallel with IPsec API [RFC2367,Craig]. Actually, it may share API with -IPsec. - -\item {\bf ``Stupid''.} To allocate labels in kernel space. It is the simplest -method, but it suffers of two serious flaws: the first, -we cannot lease labels with lifetimes longer than boot time, the second, -it is sensitive to DoS attacks. Kernel have to remember all the obsolete -labels until their expiration and malicious user may fastly eat all the -flow label space. - -\end{enumerate} - -Certainly, I choose the most ``stupid'' method. It is the cheapest one -for implementor (i.e.\ me), and taking into account that flow labels -still have no serious applications it is not useful to work on more -advanced API, especially, taking into account that eventually we -will get it for no fee together with IPsec. - - -\paragraph{Implementation.} -\addcontentsline{toc}{subsection}{Implementation} -Socket option \verb|IPV6_FLOWLABEL_MGR| allows to -request flow label manager to allocate new flow label, to reuse -already allocated one or to delete old flow label. -Its argument is \verb|struct| \verb|in6_flowlabel_req|: - -\begin{verbatim} -struct in6_flowlabel_req -{ - struct in6_addr flr_dst; - __u32 flr_label; - __u8 flr_action; - __u8 flr_share; - __u16 flr_flags; - __u16 flr_expires; - __u16 flr_linger; - __u32 __flr_reserved; - /* Options in format of IPV6_PKTOPTIONS */ -}; -\end{verbatim} - -\begin{itemize} - -\item \verb|dst| is IPv6 destination address associated with the label. - -\item \verb|label| is flow label value in network byte order. If it is zero, -kernel will allocate new pseudo-random number. Otherwise, kernel will try -to lease flow label ordered by user. In this case, it is user task to provide -necessary flow label randomness. - -\item \verb|action| is requested operation. Currently, only three operations -are defined: - -\begin{verbatim} -#define IPV6_FL_A_GET 0 /* Get flow label */ -#define IPV6_FL_A_PUT 1 /* Release flow label */ -#define IPV6_FL_A_RENEW 2 /* Update expire time */ -\end{verbatim} - -\item \verb|flags| are optional modifiers. Currently -only \verb|IPV6_FL_A_GET| has modifiers: - -\begin{verbatim} -#define IPV6_FL_F_CREATE 1 /* Allowed to create new label */ -#define IPV6_FL_F_EXCL 2 /* Do not create new label */ -\end{verbatim} - - -\item \verb|share| defines who is allowed to reuse the same flow label. - -\begin{verbatim} -#define IPV6_FL_S_NONE 0 /* Not defined */ -#define IPV6_FL_S_EXCL 1 /* Label is private */ -#define IPV6_FL_S_PROCESS 2 /* May be reused by this process */ -#define IPV6_FL_S_USER 3 /* May be reused by this user */ -#define IPV6_FL_S_ANY 255 /* Anyone may reuse it */ -\end{verbatim} - -\item \verb|linger| is time in seconds. After the last user releases flow -label, it will not be reused with different destination and options at least -during this time. If \verb|share| is not \verb|IPV6_FL_S_EXCL| the label -still can be shared by another sockets. Current implementation does not allow -unprivileged user to set linger longer than 60 sec. - -\item \verb|expires| is time in seconds. Flow label will be kept at least -for this time, but it will not be destroyed before user released it explicitly -or closed all the sockets using it. Current implementation does not allow -unprivileged user to set timeout longer than 60 sec. Proviledged applications -MAY set longer lifetimes, but in this case they MUST save allocated -labels at stable storage and restore them back after reboot before the first -application allocates new flow. - -\end{itemize} - -This structure is followed by optional extension headers associated -with this flow label in format of \verb|IPV6_PKTOPTIONS|. Only -\verb|IPV6_HOPOPTS|, \verb|IPV6_RTHDR| and, if \verb|IPV6_RTHDR| presents, -\verb|IPV6_DSTOPTS| are allowed. - -\paragraph{Example.} -\addcontentsline{toc}{subsection}{Example} - The function \verb|get_flow_label| allocates -private flow label. - -\begin{verbatim} -int get_flow_label(int fd, struct sockaddr_in6 *dst, __u32 fl) -{ - int on = 1; - struct in6_flowlabel_req freq; - - memset(&freq, 0, sizeof(freq)); - freq.flr_label = htonl(fl); - freq.flr_action = IPV6_FL_A_GET; - freq.flr_flags = IPV6_FL_F_CREATE | IPV6_FL_F_EXCL; - freq.flr_share = IPV6_FL_S_EXCL; - memcpy(&freq.flr_dst, &dst->sin6_addr, 16); - if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, - &freq, sizeof(freq)) == -1) { - perror ("can't lease flowlabel"); - return -1; - } - dst->sin6_flowinfo |= freq.flr_label; - - if (setsockopt(fd, SOL_IPV6, IPV6_FLOWINFO_SEND, - &on, sizeof(on)) == -1) { - perror ("can't send flowinfo"); - - freq.flr_action = IPV6_FL_A_PUT; - setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, - &freq, sizeof(freq)); - return -1; - } - return 0; -} -\end{verbatim} - -A bit more complicated example using routing header can be found -in \verb|ping6| utility (\verb|iputils| package). Linux rsvpd backend -contains an example of using operation \verb|IPV6_FL_A_RENEW|. - -\paragraph{Listing flow labels.} -\addcontentsline{toc}{subsection}{Listing flow labels} -List of currently allocated -flow labels may be read from \verb|/proc/net/ip6_flowlabel|. - -\begin{verbatim} -Label S Owner Users Linger Expires Dst Opt -A1BE5 1 0 0 6 3 3ffe2400000000010a0020fffe71fb30 0 -\end{verbatim} - -\begin{itemize} -\item \verb|Label| is hexadecimal flow label value. -\item \verb|S| is sharing style. -\item \verb|Owner| is ID of creator, it is zero, pid or uid, depending on - sharing style. -\item \verb|Users| is number of applications using the label now. -\item \verb|Linger| is \verb|linger| of this label in seconds. -\item \verb|Expires| is time until expiration of the label in seconds. It may - be negative, if the label is in use. -\item \verb|Dst| is IPv6 destination address. -\item \verb|Opt| is length of options, associated with the label. Option - data are not accessible. -\end{itemize} - - -\paragraph{Flow labels and RSVP.} -\addcontentsline{toc}{subsection}{Flow labels and RSVP} -RSVP daemon supports IPv6 flow labels -without any modifications to standard ISI RAPI. Sender must allocate -flow label, fill corresponding sender template and submit it to local rsvp -daemon. rsvpd will check the label and start to announce it in PATH -messages. Rsvpd on sender node will renew the flow label, so that it will not -be reused before path state expires and all the intermediate -routers and receiver purge flow state. - -\verb|rtap| utility is modified to parse flow labels. F.e.\ if user allocated -flow label \verb|0xA1234|, he may write: - -\begin{verbatim} -RTAP> sender 3ffe:2400::1/FL0xA1234 <Tspec> -\end{verbatim} - -Receiver makes reservation with command: -\begin{verbatim} -RTAP> reserve ff 3ffe:2400::1/FL0xA1234 <Flowspec> -\end{verbatim} - -\end{document} From 429f314ef7b2405dde8168dd47e5ec410eb742d6 Mon Sep 17 00:00:00 2001 From: Julien Fortin <julien@cumulusnetworks.com> Date: Tue, 26 Sep 2017 16:45:39 -0700 Subject: [PATCH 08/28] lib: json_print: rework 'new_json_obj' drop FILE* argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As Stephen Hemminger mentioned on the last submission the new_json_obj function is always called with fp == stdout, so right now, there's no need of this extra argument. The background for the rework is the following: The ip monitor didn't call `new_json_obj` (even for in non json context), so the static FILE* _fp variable wasn't initialized, thus raising a SIGSEGV in ipaddress.c. This patch should fix this issue for good, new paths won't have to call `new_json_obj`. How to reproduce: $ ip -t mon label link (gdb) bt .#0 _IO_vfprintf_internal (s=s@entry=0x0, format=format@entry=0x45460d “%d: “, ap=ap@entry=0x7fffffff7f18) at vfprintf.c:1278 .#1 0x0000000000451310 in color_fprintf (fp=0x0, attr=<optimized out>, fmt=0x45460d “%d: “) at color.c:108 .#2 0x000000000044a856 in print_color_int (t=t@entry=PRINT_ANY, color=color@entry=4294967295, key=key@entry=0x4545fc “ifindex”, fmt=fmt@entry=0x45460d “%d: “, value=<optimized out>) at ip_print.c:132 .#3 0x000000000040ccd2 in print_int (value=<optimized out>, fmt=0x45460d “%d: “, key=0x4545fc “ifindex”, t=PRINT_ANY) at ip_common.h:189 .#4 print_linkinfo (who=<optimized out>, n=0x7fffffffa380, arg=0x7ffff77a82a0 <_IO_2_1_stdout_>) at ipaddress.c:1107 .#5 0x0000000000422e13 in accept_msg (who=0x7fffffff8320, ctrl=0x7fffffff8310, n=0x7fffffffa380, arg=0x7ffff77a82a0 <_IO_2_1_stdout_>) at ipmonitor.c:89 .#6 0x000000000044c58f in rtnl_listen (rtnl=0x672160 <rth>, handler=handler@entry=0x422c70 <accept_msg>, jarg=0x7ffff77a82a0 <_IO_2_1_stdout_>) at libnetlink.c:761 .#7 0x00000000004233db in do_ipmonitor (argc=<optimized out>, argv=0x7fffffffe5a0) at ipmonitor.c:310 .#8 0x0000000000408f74 in do_cmd (argv0=0x7fffffffe7f5 “mon”, argc=3, argv=0x7fffffffe588) at ip.c:116 .#9 0x0000000000408a94 in main (argc=4, argv=0x7fffffffe580) at ip.c:311 Fixes: 6377572f ("ip: ip_print: add new API to print JSON or regular format output") Reported-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: Julien Fortin <julien@cumulusnetworks.com> --- include/json_print.h | 4 +--- ip/ipaddress.c | 4 ++-- lib/json_print.c | 31 ++++++++++--------------------- 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/include/json_print.h b/include/json_print.h index 44cf5ac5..b6ce1f9f 100644 --- a/include/json_print.h +++ b/include/json_print.h @@ -29,13 +29,11 @@ enum output_type { PRINT_ANY = 4, }; -void new_json_obj(int json, FILE *fp); +void new_json_obj(int json); void delete_json_obj(void); bool is_json_context(void); -void set_current_fp(FILE *fp); - void fflush_fp(void); void open_json_object(const char *str); diff --git a/ip/ipaddress.c b/ip/ipaddress.c index b8bc387a..9e9a7e0a 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -1815,7 +1815,7 @@ static int ipaddr_showdump(void) if (ipadd_dump_check_magic()) exit(-1); - new_json_obj(json, stdout); + new_json_obj(json); open_json_object(NULL); open_json_array(PRINT_JSON, "addr_info"); @@ -2176,7 +2176,7 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action) * Initialize a json_writer and open an array object * if -json was specified. */ - new_json_obj(json, stdout); + new_json_obj(json); /* * If only filter_dev present and none of the other diff --git a/lib/json_print.c b/lib/json_print.c index 93b4119d..aa527af6 100644 --- a/lib/json_print.c +++ b/lib/json_print.c @@ -16,15 +16,14 @@ #include "json_print.h" static json_writer_t *_jw; -static FILE *_fp; #define _IS_JSON_CONTEXT(type) ((type & PRINT_JSON || type & PRINT_ANY) && _jw) #define _IS_FP_CONTEXT(type) (!_jw && (type & PRINT_FP || type & PRINT_ANY)) -void new_json_obj(int json, FILE *fp) +void new_json_obj(int json) { if (json) { - _jw = jsonw_new(fp); + _jw = jsonw_new(stdout); if (!_jw) { perror("json object"); exit(1); @@ -32,7 +31,6 @@ void new_json_obj(int json, FILE *fp) jsonw_pretty(_jw, true); jsonw_start_array(_jw); } - set_current_fp(fp); } void delete_json_obj(void) @@ -48,15 +46,6 @@ bool is_json_context(void) return _jw != NULL; } -void set_current_fp(FILE *fp) -{ - if (!fp) { - fprintf(stderr, "Error: invalid file pointer.\n"); - exit(1); - } - _fp = fp; -} - json_writer_t *get_json_writer(void) { return _jw; @@ -89,7 +78,7 @@ void open_json_array(enum output_type type, const char *str) jsonw_name(_jw, str); jsonw_start_array(_jw); } else if (_IS_FP_CONTEXT(type)) { - fprintf(_fp, "%s", str); + printf("%s", str); } } @@ -103,7 +92,7 @@ void close_json_array(enum output_type type, const char *str) jsonw_end_array(_jw); jsonw_pretty(_jw, true); } else if (_IS_FP_CONTEXT(type)) { - fprintf(_fp, "%s", str); + printf("%s", str); } } @@ -124,7 +113,7 @@ void close_json_array(enum output_type type, const char *str) else \ jsonw_##type_name##_field(_jw, key, value); \ } else if (_IS_FP_CONTEXT(t)) { \ - color_fprintf(_fp, color, fmt, value); \ + color_fprintf(stdout, color, fmt, value); \ } \ } _PRINT_FUNC(int, int); @@ -147,7 +136,7 @@ void print_color_string(enum output_type type, else jsonw_string_field(_jw, key, value); } else if (_IS_FP_CONTEXT(type)) { - color_fprintf(_fp, color, fmt, value); + color_fprintf(stdout, color, fmt, value); } } @@ -168,7 +157,7 @@ void print_color_bool(enum output_type type, else jsonw_bool(_jw, value); } else if (_IS_FP_CONTEXT(type)) { - color_fprintf(_fp, color, fmt, value ? "true" : "false"); + color_fprintf(stdout, color, fmt, value ? "true" : "false"); } } @@ -187,7 +176,7 @@ void print_color_0xhex(enum output_type type, snprintf(b1, sizeof(b1), "%#x", hex); print_string(PRINT_JSON, key, NULL, b1); } else if (_IS_FP_CONTEXT(type)) { - color_fprintf(_fp, color, fmt, hex); + color_fprintf(stdout, color, fmt, hex); } } @@ -206,7 +195,7 @@ void print_color_hex(enum output_type type, else jsonw_string(_jw, b1); } else if (_IS_FP_CONTEXT(type)) { - color_fprintf(_fp, color, fmt, hex); + color_fprintf(stdout, color, fmt, hex); } } @@ -226,6 +215,6 @@ void print_color_null(enum output_type type, else jsonw_null(_jw); } else if (_IS_FP_CONTEXT(type)) { - color_fprintf(_fp, color, fmt, value); + color_fprintf(stdout, color, fmt, value); } } From b2fd7a0e6efa7b85a041b5cb9ea6fc1a6a798fd3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger <stephen@networkplumber.org> Date: Fri, 29 Sep 2017 10:50:13 -0700 Subject: [PATCH 09/28] doc: drop old ip command documentation The old IP cross reference manual was very out of date, barely updated since 1999. The correct documentation is in the man pages. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- Makefile | 4 +- doc/Makefile | 69 - doc/SNAPSHOT.tex | 1 - doc/do-psnup | 16 - doc/ip-cref.tex | 3453 ---------------------------------------------- doc/preamble.tex | 26 - 6 files changed, 2 insertions(+), 3567 deletions(-) delete mode 100644 doc/Makefile delete mode 100644 doc/SNAPSHOT.tex delete mode 100644 doc/do-psnup delete mode 100644 doc/ip-cref.tex delete mode 100644 doc/preamble.tex diff --git a/Makefile b/Makefile index 7a691dea..75c0e570 100644 --- a/Makefile +++ b/Makefile @@ -73,7 +73,7 @@ install: all $(DESTDIR)$(DOCDIR)/examples install -m 0644 $(shell find examples/diffserv -maxdepth 1 -type f) \ $(DESTDIR)$(DOCDIR)/examples/diffserv - @for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done + @for i in $(SUBDIRS); do $(MAKE) -C $$i install; done install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR) install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR) install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR) @@ -84,7 +84,7 @@ snapshot: > include/SNAPSHOT.h clean: - @for i in $(SUBDIRS) doc; \ + @for i in $(SUBDIRS); \ do $(MAKE) $(MFLAGS) -C $$i clean; done clobber: diff --git a/doc/Makefile b/doc/Makefile deleted file mode 100644 index 3bfcdc2b..00000000 --- a/doc/Makefile +++ /dev/null @@ -1,69 +0,0 @@ -PSFILES=ip-cref.ps - -LATEX=latex -DVIPS=dvips -SGML2DVI=sgml2latex -SGML2HTML=sgml2html -s 0 -LPR=lpr -Zsduplex -SHELL=bash -PAGESIZE=a4 -PAGESPERPAGE=2 - -HTMLFILES=$(subst .sgml,.html,$(shell echo *.sgml)) -DVIFILES=$(subst .ps,.dvi,$(PSFILES)) -PDFFILES=$(subst .ps,.pdf,$(PSFILES)) - - -all: pstwocol - -pstwocol: $(PSFILES) - -html: $(HTMLFILES) - -dvi: $(DVIFILES) - -pdf: $(PDFFILES) - -print: $(PSFILES) - $(LPR) $(PSFILES) - -%.tex: %.sgml - $(SGML2DVI) --output=tex $< - -%.dvi: %.sgml - $(SGML2DVI) --output=dvi $< - -%.dvi: %.tex - @set -e; pass=2; echo "Running LaTeX $<"; \ - while [ `$(LATEX) $< </dev/null 2>&1 | \ - grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \ - if [ $$pass -gt 3 ]; then \ - echo "Seems, something is wrong. Try by hands." ; exit 1 ; \ - fi; \ - echo "Re-running LaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \ - done - -%.pdf: %.tex - @set -e; pass=2; echo "Running pdfLaTeX $<"; \ - while [ `pdflatex $< </dev/null 2>&1 | \ - grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \ - if [ $$pass -gt 3 ]; then \ - echo "Seems, something is wrong. Try by hands." ; exit 1 ; \ - fi; \ - echo "Re-running pdfLaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \ - done -#%.pdf: %.ps -# ps2pdf $< - -%.ps: %.dvi - $(DVIPS) $< -o $@ - -%.html: %.sgml - $(SGML2HTML) $< - -install: - install -m 0644 $(shell echo *.tex) $(DESTDIR)$(DOCDIR) - install -m 0644 $(shell echo *.sgml) $(DESTDIR)$(DOCDIR) - -clean: - rm -f *.aux *.log *.toc $(PSFILES) $(DVIFILES) *.html *.pdf diff --git a/doc/SNAPSHOT.tex b/doc/SNAPSHOT.tex deleted file mode 100644 index 7ed02984..00000000 --- a/doc/SNAPSHOT.tex +++ /dev/null @@ -1 +0,0 @@ -\def\Draft{020116} diff --git a/doc/do-psnup b/doc/do-psnup deleted file mode 100644 index 2dce848e..00000000 --- a/doc/do-psnup +++ /dev/null @@ -1,16 +0,0 @@ -#! /bin/bash -# $1 = Temporary file . "string" -# $2 = File to process . "string" -# $3 = Page size . ie: a4 , letter ... "string" -# $4 = Number of pages to fit on a single sheet . "numeric" - -if type psnup >&/dev/null; then - echo "psnup -$4 -p$3 $1 $2" - psnup -$4 -p$3 $1 $2 -elif type psmulti >&/dev/null; then - echo "psmulti $1 > $2" - psmulti $1 > $2 -else - echo "cp $1 $2" - cp $1 $2 -fi diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex deleted file mode 100644 index 179baa2f..00000000 --- a/doc/ip-cref.tex +++ /dev/null @@ -1,3453 +0,0 @@ -\documentstyle[12pt,twoside]{article} -\def\TITLE{IP Command Reference} -\input preamble -\begin{center} -\Large\bf IP Command Reference. -\end{center} - - -\begin{center} -{ \large Alexey~N.~Kuznetsov } \\ -\em Institute for Nuclear Research, Moscow \\ -\verb|kuznet@ms2.inr.ac.ru| \\ -\rm April 14, 1999 -\end{center} - -\vspace{5mm} - -\tableofcontents - -\newpage - -\section{About this document} - -This document presents a comprehensive description of the \verb|ip| utility -from the \verb|iproute2| package. It is not a tutorial or user's guide. -It is a {\em dictionary\/}, not explaining terms, -but translating them into other terms, which may also be unknown to the reader. -However, the document is self-contained and the reader, provided they have a -basic networking background, will find enough information -and examples to understand and configure Linux-2.2 IP and IPv6 -networking. - -This document is split into sections explaining \verb|ip| commands -and options, decrypting \verb|ip| output and containing a few examples. -More voluminous examples and some topics, which require more elaborate -discussion, are in the appendix. - -The paragraphs beginning with NB contain side notes, warnings about -bugs and design drawbacks. They may be skipped at the first reading. - -\section{{\tt ip} --- command syntax} - -The generic form of an \verb|ip| command is: -\begin{verbatim} -ip [ OPTIONS ] OBJECT [ COMMAND [ ARGUMENTS ]] -\end{verbatim} -where \verb|OPTIONS| is a set of optional modifiers affecting the -general behaviour of the \verb|ip| utility or changing its output. All options -begin with the character \verb|'-'| and may be used in either long or abbreviated -forms. Currently, the following options are available: - -\begin{itemize} -\item \verb|-V|, \verb|-Version| - ---- print the version of the \verb|ip| utility and exit. - - -\item \verb|-s|, \verb|-stats|, \verb|-statistics| - ---- output more information. If the option -appears twice or more, the amount of information increases. -As a rule, the information is statistics or some time values. - -\item \verb|-d|, \verb|-details| - ---- output more detailed information. - -\item \verb|-f|, \verb|-family| followed by a protocol family -identifier: \verb|inet|, \verb|inet6| or \verb|link|. - ---- enforce the protocol family to use. If the option is not present, -the protocol family is guessed from other arguments. If the rest of the command -line does not give enough information to guess the family, \verb|ip| falls back to the default -one, usually \verb|inet| or \verb|any|. \verb|link| is a special family -identifier meaning that no networking protocol is involved. - -\item \verb|-4| - ---- shortcut for \verb|-family inet|. - -\item \verb|-6| - ---- shortcut for \verb|-family inet6|. - -\item \verb|-0| - ---- shortcut for \verb|-family link|. - - -\item \verb|-o|, \verb|-oneline| - ---- output each record on a single line, replacing line feeds -with the \verb|'\'| character. This is convenient when you want to -count records with \verb|wc| or to \verb|grep| the output. The trivial -script \verb|rtpr| converts the output back into readable form. - -\item \verb|-r|, \verb|-resolve| - ---- use the system's name resolver to print DNS names instead of -host addresses. - -\begin{NB} - Do not use this option when reporting bugs or asking for advice. -\end{NB} -\begin{NB} - \verb|ip| never uses DNS to resolve names to addresses. -\end{NB} - -\item \verb|-b|, \verb|-batch FILE| - ---- read commands from provided file or standart input and invoke them. -First failure will cause termination of \verb|ip|. -In batch \verb|FILE| everything which begins with \verb|#| symbol is -ignored and can be used for comments. -\paragraph{Example:} -\begin{verbatim} -kuznet@kaiser $ cat /tmp/ip_batch.ip -# This is a comment -tuntap add mode tap tap1 # This is an another comment -link set up dev tap1 -addr add 10.0.0.1/24 dev tap1 -kuznet@kaiser $ sudo ip -b /tmp/ip_batch.ip -\end{verbatim} -or from standart input: -\begin{verbatim} -kuznet@kaiser $ cat /tmp/ip_batch.ip | sudo ip -b - -\end{verbatim} - -\item \verb|-force| - ---- don't terminate ip on errors in batch mode. -If there were any errors during execution of the commands, -the application return code will be non zero. - -\item \verb|-l|, \verb|-loops COUNT| - ---- specify maximum number of loops the 'ip addr flush' logic will attempt -before giving up. The default is 10. Zero (0) means loop until all -addresses are removed. - -\end{itemize} - -\verb|OBJECT| is the object to manage or to get information about. -The object types currently understood by \verb|ip| are: - -\begin{itemize} -\item \verb|link| --- network device -\item \verb|address| --- protocol (IP or IPv6) address on a device -\item \verb|neighbour| --- ARP or NDISC cache entry -\item \verb|route| --- routing table entry -\item \verb|rule| --- rule in routing policy database -\item \verb|maddress| --- multicast address -\item \verb|mroute| --- multicast routing cache entry -\item \verb|tunnel| --- tunnel over IP -\end{itemize} - -Again, the names of all objects may be written in full or -abbreviated form, f.e.\ \verb|address| is abbreviated as \verb|addr| -or just \verb|a|. - -\verb|COMMAND| specifies the action to perform on the object. -The set of possible actions depends on the object type. -As a rule, it is possible to \verb|add|, \verb|delete| and -\verb|show| (or \verb|list|) objects, but some objects -do not allow all of these operations or have some additional commands. -The \verb|help| command is available for all objects. It prints -out a list of available commands and argument syntax conventions. - -If no command is given, some default command is assumed. -Usually it is \verb|list| or, if the objects of this class -cannot be listed, \verb|help|. - -\verb|ARGUMENTS| is a list of arguments to the command. -The arguments depend on the command and object. There are two types of arguments: -{\em flags\/}, consisting of a single keyword, and {\em parameters\/}, -consisting of a keyword followed by a value. For convenience, -each command has some {\em default parameter\/} -which may be omitted. F.e.\ parameter \verb|dev| is the default -for the {\tt ip link} command, so {\tt ip link ls eth0} is equivalent -to {\tt ip link ls dev eth0}. -In the command descriptions below such parameters -are distinguished with the marker: ``(default)''. - -Almost all keywords may be abbreviated with several first (or even single) -letters. The shortcuts are convenient when \verb|ip| is used interactively, -but they are not recommended in scripts or when reporting bugs -or asking for advice. ``Officially'' allowed abbreviations are listed -in the document body. - - - -\section{{\tt ip} --- error messages} - -\verb|ip| may fail for one of the following reasons: - -\begin{itemize} -\item -A syntax error on the command line: an unknown keyword, incorrectly formatted -IP address {\em et al\/}. In this case \verb|ip| prints an error message -and exits. As a rule, the error message will contain information -about the reason for the failure. Sometimes it also prints a help page. - -\item -The arguments did not pass verification for self-consistency. - -\item -\verb|ip| failed to compile a kernel request from the arguments -because the user didn't give enough information. - -\item -The kernel returned an error to some syscall. In this case \verb|ip| -prints the error message, as it is output with \verb|perror(3)|, -prefixed with a comment and a syscall identifier. - -\item -The kernel returned an error to some RTNETLINK request. -In this case \verb|ip| prints the error message, as it is output -with \verb|perror(3)| prefixed with ``RTNETLINK answers:''. - -\end{itemize} - -All the operations are atomic, i.e.\ -if the \verb|ip| utility fails, it does not change anything -in the system. One harmful exception is \verb|ip link| command -(Sec.\ref{IP-LINK}, p.\pageref{IP-LINK}), -which may change only some of the device parameters given -on command line. - -It is difficult to list all the error messages (especially -syntax errors). However, as a rule, their meaning is clear -from the context of the command. - -The most common mistakes are: - -\begin{enumerate} -\item Netlink is not configured in the kernel. The message is: -\begin{verbatim} -Cannot open netlink socket: Invalid value -\end{verbatim} - -\item RTNETLINK is not configured in the kernel. In this case -one of the following messages may be printed, depending on the command: -\begin{verbatim} -Cannot talk to rtnetlink: Connection refused -Cannot send dump request: Connection refused -\end{verbatim} - -\item The \verb|CONFIG_IP_MULTIPLE_TABLES| option was not selected -when configuring the kernel. In this case any attempt to use the -\verb|ip| \verb|rule| command will fail, f.e. -\begin{verbatim} -kuznet@kaiser $ ip rule list -RTNETLINK error: Invalid argument -dump terminated -\end{verbatim} - -\end{enumerate} - - -\section{{\tt ip link} --- network device configuration} -\label{IP-LINK} - -\paragraph{Object:} A \verb|link| is a network device and the corresponding -commands display and change the state of devices. - -\paragraph{Commands:} \verb|set| and \verb|show| (or \verb|list|). - -\subsection{{\tt ip link set} --- change device attributes} - -\paragraph{Abbreviations:} \verb|set|, \verb|s|. - -\paragraph{Arguments:} - -\begin{itemize} -\item \verb|dev NAME| (default) - ---- \verb|NAME| specifies the network device on which to operate. - -\item \verb|up| and \verb|down| - ---- change the state of the device to \verb|UP| or \verb|DOWN|. - -\item \verb|arp on| or \verb|arp off| - ---- change the \verb|NOARP| flag on the device. - -\begin{NB} -This operation is {\em not allowed\/} if the device is in state \verb|UP|. -Though neither the \verb|ip| utility nor the kernel check for this condition. -You can get unpredictable results changing this flag while the -device is running. -\end{NB} - -\item \verb|multicast on| or \verb|multicast off| - ---- change the \verb|MULTICAST| flag on the device. - -\item \verb|dynamic on| or \verb|dynamic off| - ---- change the \verb|DYNAMIC| flag on the device. - -\item \verb|name NAME| - ---- change the name of the device. This operation is not -recommended if the device is running or has some addresses -already configured. - -\item \verb|txqueuelen NUMBER| or \verb|txqlen NUMBER| - ---- change the transmit queue length of the device. - -\item \verb|mtu NUMBER| - ---- change the MTU of the device. - -\item \verb|address LLADDRESS| - ---- change the station address of the interface. - -\item \verb|broadcast LLADDRESS|, \verb|brd LLADDRESS| or \verb|peer LLADDRESS| - ---- change the link layer broadcast address or the peer address when -the interface is \verb|POINTOPOINT|. - -\vskip 1mm -\begin{NB} -For most devices (f.e.\ for Ethernet) changing the link layer -broadcast address will break networking. -Do not use it, if you do not understand what this operation really does. -\end{NB} - -\item \verb|netns PID| - ---- move the device to the network namespace associated with the process PID. - -\end{itemize} - -\vskip 1mm -\begin{NB} -The \verb|PROMISC| and \verb|ALLMULTI| flags are considered -obsolete and should not be changed administratively, though -the {\tt ip} utility will allow that. -\end{NB} - -\paragraph{Warning:} If multiple parameter changes are requested, -\verb|ip| aborts immediately after any of the changes have failed. -This is the only case when \verb|ip| can move the system to -an unpredictable state. The solution is to avoid changing -several parameters with one {\tt ip link set} call. - -\paragraph{Examples:} -\begin{itemize} -\item \verb|ip link set dummy address 00:00:00:00:00:01| - ---- change the station address of the interface \verb|dummy|. - -\item \verb|ip link set dummy up| - ---- start the interface \verb|dummy|. - -\end{itemize} - - -\subsection{{\tt ip link show} --- display device attributes} -\label{IP-LINK-SHOW} - -\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|, -\verb|l|. - -\paragraph{Arguments:} -\begin{itemize} -\item \verb|dev NAME| (default) - ---- \verb|NAME| specifies the network device to show. -If this argument is omitted all devices are listed. - -\item \verb|up| - ---- only display running interfaces. - -\end{itemize} - - -\paragraph{Output format:} - -\begin{verbatim} -kuznet@alisa:~ $ ip link ls eth0 -3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100 - link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff -kuznet@alisa:~ $ ip link ls sit0 -5: sit0@NONE: <NOARP,UP> mtu 1480 qdisc noqueue - link/sit 0.0.0.0 brd 0.0.0.0 -kuznet@alisa:~ $ ip link ls dummy -2: dummy: <BROADCAST,NOARP> mtu 1500 qdisc noop - link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff -kuznet@alisa:~ $ -\end{verbatim} - - -The number before each colon is an {\em interface index\/} or {\em ifindex\/}. -This number uniquely identifies the interface. This is followed by the {\em interface name\/} -(\verb|eth0|, \verb|sit0| etc.). The interface name is also -unique at every given moment. However, the interface may disappear from the -list (f.e.\ when the corresponding driver module is unloaded) and another -one with the same name may be created later. Besides that, -the administrator may change the name of any device with -\verb|ip| \verb|link| \verb|set| \verb|name| -to make it more intelligible. - -The interface name may have another name or \verb|NONE| appended -after the \verb|@| sign. This means that this device is bound to some other -device, -i.e.\ packets send through it are encapsulated and sent via the ``master'' -device. If the name is \verb|NONE|, the master is unknown. - -Then we see the interface {\em mtu\/} (``maximal transfer unit''). This determines -the maximal size of data which can be sent as a single packet over this interface. - -{\em qdisc\/} (``queuing discipline'') shows the queuing algorithm used -on the interface. Particularly, \verb|noqueue| means that this interface -does not queue anything and \verb|noop| means that the interface is in blackhole -mode i.e.\ all packets sent to it are immediately discarded. -{\em qlen\/} is the default transmit queue length of the device measured -in packets. - -The interface flags are summarized in the angle brackets. - -\begin{itemize} -\item \verb|UP| --- the device is turned on. It is ready to accept -packets for transmission and it may inject into the kernel packets received -from other nodes on the network. - -\item \verb|LOOPBACK| --- the interface does not communicate with other -hosts. All packets sent through it will be returned -and nothing but bounced packets can be received. - -\item \verb|BROADCAST| --- the device has the facility to send packets -to all hosts sharing the same link. A typical example is an Ethernet link. - -\item \verb|POINTOPOINT| --- the link has only two ends with one node -attached to each end. All packets sent to this link will reach the peer -and all packets received by us came from this single peer. - -If neither \verb|LOOPBACK| nor \verb|BROADCAST| nor \verb|POINTOPOINT| -are set, the interface is assumed to be NMBA (Non-Broadcast Multi-Access). -This is the most generic type of device and the most complicated one, because -the host attached to a NBMA link has no means to send to anyone -without additionally configured information. - -\item \verb|MULTICAST| --- is an advisory flag indicating that the interface -is aware of multicasting i.e.\ sending packets to some subset of neighbouring -nodes. Broadcasting is a particular case of multicasting, where the multicast -group consists of all nodes on the link. It is important to emphasize -that software {\em must not\/} interpret the absence of this flag as the inability -to use multicasting on this interface. Any \verb|POINTOPOINT| and -\verb|BROADCAST| link is multicasting by definition, because we have -direct access to all the neighbours and, hence, to any part of them. -Certainly, the use of high bandwidth multicast transfers is not recommended -on broadcast-only links because of high expense, but it is not strictly -prohibited. - -\item \verb|PROMISC| --- the device listens to and feeds to the kernel all -traffic on the link even if it is not destined for us, not broadcasted -and not destined for a multicast group of which we are member. Usually -this mode exists only on broadcast links and is used by bridges and for network -monitoring. - -\item \verb|ALLMULTI| --- the device receives all multicast packets -wandering on the link. This mode is used by multicast routers. - -\item \verb|NOARP| --- this flag is different from the other ones. It has -no invariant value and its interpretation depends on the network protocols -involved. As a rule, it indicates that the device needs no address -resolution and that the software or hardware knows how to deliver packets -without any help from the protocol stacks. - -\item \verb|DYNAMIC| --- is an advisory flag indicating that the interface is -dynamically created and destroyed. - -\item \verb|SLAVE| --- this interface is bonded to some other interfaces -to share link capacities. - -\end{itemize} - -\vskip 1mm -\begin{NB} -There are other flags but they are either obsolete (\verb|NOTRAILERS|) -or not implemented (\verb|DEBUG|) or specific to some devices -(\verb|MASTER|, \verb|AUTOMEDIA| and \verb|PORTSEL|). We do not discuss -them here. -\end{NB} - - -The second line contains information on the link layer addresses -associated with the device. The first word (\verb|ether|, \verb|sit|) -defines the interface hardware type. This type determines the format and semantics -of the addresses and is logically part of the address. -The default format of the station address and the broadcast address -(or the peer address for pointopoint links) is a -sequence of hexadecimal bytes separated by colons, but some link -types may have their natural address format, f.e.\ addresses -of tunnels over IP are printed as dotted-quad IP addresses. - -\vskip 1mm -\begin{NB} - NBMA links have no well-defined broadcast or peer address, - however this field may contain useful information, f.e.\ - about the address of broadcast relay or about the address of the ARP server. -\end{NB} -\begin{NB} -Multicast addresses are not shown by this command, see -\verb|ip maddr ls| in~Sec.\ref{IP-MADDR} (p.\pageref{IP-MADDR} of this -document). -\end{NB} - - -\paragraph{Statistics:} With the \verb|-statistics| option, \verb|ip| also -prints interface statistics: - -\begin{verbatim} -kuznet@alisa:~ $ ip -s link ls eth0 -3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100 - link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff - RX: bytes packets errors dropped overrun mcast - 2449949362 2786187 0 0 0 0 - TX: bytes packets errors dropped carrier collsns - 178558497 1783945 332 0 332 35172 -kuznet@alisa:~ $ -\end{verbatim} -\verb|RX:| and \verb|TX:| lines summarize receiver and transmitter -statistics. They contain: -\begin{itemize} -\item \verb|bytes| --- the total number of bytes received or transmitted -on the interface. This number wraps when the maximal length of the data type -natural for the architecture is exceeded, so continuous monitoring requires -a user level daemon snapping it periodically. -\item \verb|packets| --- the total number of packets received or transmitted -on the interface. -\item \verb|errors| --- the total number of receiver or transmitter errors. -\item \verb|dropped| --- the total number of packets dropped due to lack -of resources. -\item \verb|overrun| --- the total number of receiver overruns resulting -in dropped packets. As a rule, if the interface is overrun, it means -serious problems in the kernel or that your machine is too slow -for this interface. -\item \verb|mcast| --- the total number of received multicast packets. This option -is only supported by a few devices. -\item \verb|carrier| --- total number of link media failures f.e.\ because -of lost carrier. -\item \verb|collsns| --- the total number of collision events -on Ethernet-like media. This number may have a different sense on other -link types. -\item \verb|compressed| --- the total number of compressed packets. This is -available only for links using VJ header compression. -\end{itemize} - - -If the \verb|-s| option is entered twice or more, -\verb|ip| prints more detailed statistics on receiver -and transmitter errors. - -\begin{verbatim} -kuznet@alisa:~ $ ip -s -s link ls eth0 -3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100 - link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff - RX: bytes packets errors dropped overrun mcast - 2449949362 2786187 0 0 0 0 - RX errors: length crc frame fifo missed - 0 0 0 0 0 - TX: bytes packets errors dropped carrier collsns - 178558497 1783945 332 0 332 35172 - TX errors: aborted fifo window heartbeat - 0 0 0 332 -kuznet@alisa:~ $ -\end{verbatim} -These error names are pure Ethernetisms. Other devices -may have non zero values in these fields but they may be -interpreted differently. - - -\section{{\tt ip address} --- protocol address management} - -\paragraph{Abbreviations:} \verb|address|, \verb|addr|, \verb|a|. - -\paragraph{Object:} The \verb|address| is a protocol (IP or IPv6) address attached -to a network device. Each device must have at least one address -to use the corresponding protocol. It is possible to have several -different addresses attached to one device. These addresses are not -discriminated, so that the term {\em alias\/} is not quite appropriate -for them and we do not use it in this document. - -The \verb|ip addr| command displays addresses and their properties, -adds new addresses and deletes old ones. - -\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|flush| and \verb|show| -(or \verb|list|). - - -\subsection{{\tt ip address add} --- add a new protocol address} -\label{IP-ADDR-ADD} - -\paragraph{Abbreviations:} \verb|add|, \verb|a|. - -\paragraph{Arguments:} - -\begin{itemize} -\item \verb|dev NAME| - -\noindent--- the name of the device to add the address to. - -\item \verb|local ADDRESS| (default) - ---- the address of the interface. The format of the address depends -on the protocol. It is a dotted quad for IP and a sequence of hexadecimal halfwords -separated by colons for IPv6. The \verb|ADDRESS| may be followed by -a slash and a decimal number which encodes the network prefix length. - - -\item \verb|peer ADDRESS| - ---- the address of the remote endpoint for pointopoint interfaces. -Again, the \verb|ADDRESS| may be followed by a slash and a decimal number, -encoding the network prefix length. If a peer address is specified, -the local address {\em cannot\/} have a prefix length. The network prefix is associated -with the peer rather than with the local address. - - -\item \verb|broadcast ADDRESS| - ---- the broadcast address on the interface. - -It is possible to use the special symbols \verb|'+'| and \verb|'-'| -instead of the broadcast address. In this case, the broadcast address -is derived by setting/resetting the host bits of the interface prefix. - -\vskip 1mm -\begin{NB} -Unlike \verb|ifconfig|, the \verb|ip| utility {\em does not\/} set any broadcast -address unless explicitly requested. -\end{NB} - - -\item \verb|label NAME| - ---- Each address may be tagged with a label string. -In order to preserve compatibility with Linux-2.0 net aliases, -this string must coincide with the name of the device or must be prefixed -with the device name followed by colon. - - -\item \verb|scope SCOPE_VALUE| - ---- the scope of the area where this address is valid. -The available scopes are listed in file \verb|/etc/iproute2/rt_scopes|. -Predefined scope values are: - - \begin{itemize} - \item \verb|global| --- the address is globally valid. - \item \verb|site| --- (IPv6 only) the address is site local, - i.e.\ it is valid inside this site. - \item \verb|link| --- the address is link local, i.e.\ - it is valid only on this device. - \item \verb|host| --- the address is valid only inside this host. - \end{itemize} - -Appendix~\ref{ADDR-SEL} (p.\pageref{ADDR-SEL} of this document) -contains more details on address scopes. - -\end{itemize} - -\paragraph{Examples:} -\begin{itemize} -\item \verb|ip addr add 127.0.0.1/8 dev lo brd + scope host| - ---- add the usual loopback address to the loopback device. - -\item \verb|ip addr add 10.0.0.1/24 brd + dev eth0 label eth0:Alias| - ---- add the address 10.0.0.1 with prefix length 24 (i.e.\ netmask -\verb|255.255.255.0|), standard broadcast and label \verb|eth0:Alias| -to the interface \verb|eth0|. -\end{itemize} - - -\subsection{{\tt ip address delete} --- delete a protocol address} - -\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|. - -\paragraph{Arguments:} coincide with the arguments of \verb|ip addr add|. -The device name is a required argument. The rest are optional. -If no arguments are given, the first address is deleted. - -\paragraph{Examples:} -\begin{itemize} -\item \verb|ip addr del 127.0.0.1/8 dev lo| - ---- deletes the loopback address from the loopback device. -It would be best not to repeat this experiment. - -\item Disable IP on the interface \verb|eth0|: -\begin{verbatim} - while ip -f inet addr del dev eth0; do - : nothing - done -\end{verbatim} -Another method to disable IP on an interface using {\tt ip addr flush} -may be found in sec.\ref{IP-ADDR-FLUSH}, p.\pageref{IP-ADDR-FLUSH}. - -\end{itemize} - - -\subsection{{\tt ip address show} --- display protocol addresses} - -\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|, -\verb|l|. - -\paragraph{Arguments:} - -\begin{itemize} -\item \verb|dev NAME| (default) - ---- the name of the device. - -\item \verb|scope SCOPE_VAL| - ---- only list addresses with this scope. - -\item \verb|to PREFIX| - ---- only list addresses matching this prefix. - -\item \verb|label PATTERN| - ---- only list addresses with labels matching the \verb|PATTERN|. -\verb|PATTERN| is a usual shell style pattern. - - -\item \verb|dynamic| and \verb|permanent| - ---- (IPv6 only) only list addresses installed due to stateless -address configuration or only list permanent (not dynamic) addresses. - -\item \verb|tentative| - ---- (IPv6 only) only list addresses which did not pass duplicate -address detection. - -\item \verb|deprecated| - ---- (IPv6 only) only list deprecated addresses. - - -\item \verb|primary| and \verb|secondary| - ---- only list primary (or secondary) addresses. - -\end{itemize} - - -\paragraph{Output format:} - -\begin{verbatim} -kuznet@alisa:~ $ ip addr ls eth0 -3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100 - link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff - inet 193.233.7.90/24 brd 193.233.7.255 scope global eth0 - inet6 3ffe:2400:0:1:2a0:ccff:fe66:1878/64 scope global dynamic - valid_lft forever preferred_lft 604746sec - inet6 fe80::2a0:ccff:fe66:1878/10 scope link -kuznet@alisa:~ $ -\end{verbatim} - -The first two lines coincide with the output of \verb|ip link ls|. -It is natural to interpret link layer addresses -as addresses of the protocol family \verb|AF_PACKET|. - -Then the list of IP and IPv6 addresses follows, accompanied by -additional address attributes: scope value (see Sec.\ref{IP-ADDR-ADD}, -p.\pageref{IP-ADDR-ADD} above), flags and the address label. - -Address flags are set by the kernel and cannot be changed -administratively. Currently, the following flags are defined: - -\begin{enumerate} -\item \verb|secondary| - ---- the address is not used when selecting the default source address -of outgoing packets (Cf.\ Appendix~\ref{ADDR-SEL}, p.\pageref{ADDR-SEL}.). -An IP address becomes secondary if another address with the same -prefix bits already exists. The first address is primary. -It is the leader of the group of all secondary addresses. When the leader -is deleted, all secondaries are purged too. -There is a tweak in \verb|/proc/sys/net/ipv4/conf/<dev>/promote_secondaries| -which activate secondaries promotion when a primary is deleted. -To permanently enable this feature on all devices add -\verb|net.ipv4.conf.all.promote_secondaries=1| to \verb|/etc/sysctl.conf|. -This tweak is available in linux 2.6.15 and later. - - -\item \verb|dynamic| - ---- the address was created due to stateless autoconfiguration~\cite{RFC-ADDRCONF}. -In this case the output also contains information on times, when -the address is still valid. After \verb|preferred_lft| expires the address is -moved to the deprecated state. After \verb|valid_lft| expires the address -is finally invalidated. - -\item \verb|deprecated| - ---- the address is deprecated, i.e.\ it is still valid, but cannot -be used by newly created connections. - -\item \verb|tentative| - ---- the address is not used because duplicate address detection~\cite{RFC-ADDRCONF} -is still not complete or failed. - -\end{enumerate} - - -\subsection{{\tt ip address flush} --- flush protocol addresses} -\label{IP-ADDR-FLUSH} - -\paragraph{Abbreviations:} \verb|flush|, \verb|f|. - -\paragraph{Description:}This command flushes the protocol addresses -selected by some criteria. - -\paragraph{Arguments:} This command has the same arguments as \verb|show|. -The difference is that it does not run when no arguments are given. - -\paragraph{Warning:} This command (and other \verb|flush| commands -described below) is pretty dangerous. If you make a mistake, it will -not forgive it, but will cruelly purge all the addresses. - -\paragraph{Statistics:} With the \verb|-statistics| option, the command -becomes verbose. It prints out the number of deleted addresses and the number -of rounds made to flush the address list. If this option is given -twice, \verb|ip addr flush| also dumps all the deleted addresses -in the format described in the previous subsection. - -\paragraph{Example:} Delete all the addresses from the private network -10.0.0.0/8: -\begin{verbatim} -netadm@amber:~ # ip -s -s a f to 10/8 -2: dummy inet 10.7.7.7/16 brd 10.7.255.255 scope global dummy -3: eth0 inet 10.10.7.7/16 brd 10.10.255.255 scope global eth0 -4: eth1 inet 10.8.7.7/16 brd 10.8.255.255 scope global eth1 - -*** Round 1, deleting 3 addresses *** -*** Flush is complete after 1 round *** -netadm@amber:~ # -\end{verbatim} -Another instructive example is disabling IP on all the Ethernets: -\begin{verbatim} -netadm@amber:~ # ip -4 addr flush label "eth*" -\end{verbatim} -And the last example shows how to flush all the IPv6 addresses -acquired by the host from stateless address autoconfiguration -after you enabled forwarding or disabled autoconfiguration. -\begin{verbatim} -netadm@amber:~ # ip -6 addr flush dynamic -\end{verbatim} - - - -\section{{\tt ip neighbour} --- neighbour/arp tables management} - -\paragraph{Abbreviations:} \verb|neighbour|, \verb|neighbor|, \verb|neigh|, -\verb|n|. - -\paragraph{Object:} \verb|neighbour| objects establish bindings between protocol -addresses and link layer addresses for hosts sharing the same link. -Neighbour entries are organized into tables. The IPv4 neighbour table -is known by another name --- the ARP table. - -The corresponding commands display neighbour bindings -and their properties, add new neighbour entries and delete old ones. - -\paragraph{Commands:} \verb|add|, \verb|change|, \verb|replace|, -\verb|delete|, \verb|flush| and \verb|show| (or \verb|list|). - -\paragraph{See also:} Appendix~\ref{PROXY-NEIGH}, p.\pageref{PROXY-NEIGH} -describes how to manage proxy ARP/NDISC with the \verb|ip| utility. - - -\subsection{{\tt ip neighbour add} --- add a new neighbour entry\\ - {\tt ip neighbour change} --- change an existing entry\\ - {\tt ip neighbour replace} --- add a new entry or change an existing one} - -\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|; -\verb|replace|, \verb|repl|. - -\paragraph{Description:} These commands create new neighbour records -or update existing ones. - -\paragraph{Arguments:} - -\begin{itemize} -\item \verb|to ADDRESS| (default) - ---- the protocol address of the neighbour. It is either an IPv4 or IPv6 address. - -\item \verb|dev NAME| - ---- the interface to which this neighbour is attached. - - -\item \verb|lladdr LLADDRESS| - ---- the link layer address of the neighbour. \verb|LLADDRESS| can also be -\verb|null|. - -\item \verb|nud NUD_STATE| - ---- the state of the neighbour entry. \verb|nud| is an abbreviation for ``Neighbour -Unreachability Detection''. The state can take one of the following values: - -\begin{enumerate} -\item \verb|permanent| --- the neighbour entry is valid forever and can be only be removed -administratively. -\item \verb|noarp| --- the neighbour entry is valid. No attempts to validate -this entry will be made but it can be removed when its lifetime expires. -\item \verb|reachable| --- the neighbour entry is valid until the reachability -timeout expires. -\item \verb|stale| --- the neighbour entry is valid but suspicious. -This option to \verb|ip neigh| does not change the neighbour state if -it was valid and the address is not changed by this command. -\end{enumerate} - -\end{itemize} - -\paragraph{Examples:} -\begin{itemize} -\item \verb|ip neigh add 10.0.0.3 lladdr 0:0:0:0:0:1 dev eth0 nud perm| - ---- add a permanent ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|. - -\item \verb|ip neigh chg 10.0.0.3 dev eth0 nud reachable| - ---- change its state to \verb|reachable|. -\end{itemize} - - -\subsection{{\tt ip neighbour delete} --- delete a neighbour entry} - -\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|. - -\paragraph{Description:} This command invalidates a neighbour entry. - -\paragraph{Arguments:} The arguments are the same as with \verb|ip neigh add|, -except that \verb|lladdr| and \verb|nud| are ignored. - - -\paragraph{Example:} -\begin{itemize} -\item \verb|ip neigh del 10.0.0.3 dev eth0| - ---- invalidate an ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|. - -\end{itemize} - -\begin{NB} - The deleted neighbour entry will not disappear from the tables - immediately. If it is in use it cannot be deleted until the last - client releases it. Otherwise it will be destroyed during - the next garbage collection. -\end{NB} - - -\paragraph{Warning:} Attempts to delete or manually change -a \verb|noarp| entry created by the kernel may result in unpredictable behaviour. -Particularly, the kernel may try to resolve this address even -on a \verb|NOARP| interface or if the address is multicast or broadcast. - - -\subsection{{\tt ip neighbour show} --- list neighbour entries} - -\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|. - -\paragraph{Description:}This commands displays neighbour tables. - -\paragraph{Arguments:} - -\begin{itemize} - -\item \verb|to ADDRESS| (default) - ---- the prefix selecting the neighbours to list. - -\item \verb|dev NAME| - ---- only list the neighbours attached to this device. - -\item \verb|unused| - ---- only list neighbours which are not currently in use. - -\item \verb|nud NUD_STATE| - ---- only list neighbour entries in this state. \verb|NUD_STATE| takes -values listed below or the special value \verb|all| which means all states. -This option may occur more than once. If this option is absent, \verb|ip| -lists all entries except for \verb|none| and \verb|noarp|. - -\end{itemize} - - -\paragraph{Output format:} - -\begin{verbatim} -kuznet@alisa:~ $ ip neigh ls -:: dev lo lladdr 00:00:00:00:00:00 nud noarp -fe80::200:cff:fe76:3f85 dev eth0 lladdr 00:00:0c:76:3f:85 router \ - nud stale -0.0.0.0 dev lo lladdr 00:00:00:00:00:00 nud noarp -193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 nud reachable -193.233.7.85 dev eth0 lladdr 00:e0:1e:63:39:00 nud stale -kuznet@alisa:~ $ -\end{verbatim} - -The first word of each line is the protocol address of the neighbour. -Then the device name follows. The rest of the line describes the contents of -the neighbour entry identified by the pair (device, address). - -\verb|lladdr| is the link layer address of the neighbour. - -\verb|nud| is the state of the ``neighbour unreachability detection'' machine -for this entry. The detailed description of the neighbour -state machine can be found in~\cite{RFC-NDISC}. Here is the full list -of the states with short descriptions: - -\begin{enumerate} -\item\verb|none| --- the state of the neighbour is void. -\item\verb|incomplete| --- the neighbour is in the process of resolution. -\item\verb|reachable| --- the neighbour is valid and apparently reachable. -\item\verb|stale| --- the neighbour is valid, but is probably already -unreachable, so the kernel will try to check it at the first transmission. -\item\verb|delay| --- a packet has been sent to the stale neighbour and the kernel is waiting -for confirmation. -\item\verb|probe| --- the delay timer expired but no confirmation was received. -The kernel has started to probe the neighbour with ARP/NDISC messages. -\item\verb|failed| --- resolution has failed. -\item\verb|noarp| --- the neighbour is valid. No attempts to check the entry -will be made. -\item\verb|permanent| --- it is a \verb|noarp| entry, but only the administrator -may remove the entry from the neighbour table. -\end{enumerate} - -The link layer address is valid in all states except for \verb|none|, -\verb|failed| and \verb|incomplete|. - -IPv6 neighbours can be marked with the additional flag \verb|router| -which means that the neighbour introduced itself as an IPv6 router~\cite{RFC-NDISC}. - -\paragraph{Statistics:} The \verb|-statistics| option displays some usage -statistics, f.e.\ - -\begin{verbatim} -kuznet@alisa:~ $ ip -s n ls 193.233.7.254 -193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \ - nud reachable -kuznet@alisa:~ $ -\end{verbatim} - -Here \verb|ref| is the number of users of this entry -and \verb|used| is a triplet of time intervals in seconds -separated by slashes. In this case they show that: - -\begin{enumerate} -\item the entry was used 12 seconds ago. -\item the entry was confirmed 13 seconds ago. -\item the entry was updated 20 seconds ago. -\end{enumerate} - -\subsection{{\tt ip neighbour flush} --- flush neighbour entries} - -\paragraph{Abbreviations:} \verb|flush|, \verb|f|. - -\paragraph{Description:}This command flushes neighbour tables, selecting -entries to flush by some criteria. - -\paragraph{Arguments:} This command has the same arguments as \verb|show|. -The differences are that it does not run when no arguments are given, -and that the default neighbour states to be flushed do not include -\verb|permanent| and \verb|noarp|. - - -\paragraph{Statistics:} With the \verb|-statistics| option, the command -becomes verbose. It prints out the number of deleted neighbours and the number -of rounds made to flush the neighbour table. If the option is given -twice, \verb|ip neigh flush| also dumps all the deleted neighbours -in the format described in the previous subsection. - -\paragraph{Example:} -\begin{verbatim} -netadm@alisa:~ # ip -s -s n f 193.233.7.254 -193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \ - nud reachable - -*** Round 1, deleting 1 entries *** -*** Flush is complete after 1 round *** -netadm@alisa:~ # -\end{verbatim} - - -\section{{\tt ip route} --- routing table management} -\label{IP-ROUTE} - -\paragraph{Abbreviations:} \verb|route|, \verb|ro|, \verb|r|. - -\paragraph{Object:} \verb|route| entries in the kernel routing tables keep -information about paths to other networked nodes. - -Each route entry has a {\em key\/} consisting of a {\em prefix\/} -(i.e.\ a pair containing a network address and the length of its mask) and, -optionally, the TOS value. An IP packet matches the route if the highest -bits of its destination address are equal to the route prefix at least -up to the prefix length and if the TOS of the route is zero or equal to -the TOS of the packet. - -If several routes match the packet, the following pruning rules -are used to select the best one (see~\cite{RFC1812}): -\begin{enumerate} -\item The longest matching prefix is selected. All shorter ones -are dropped. - -\item If the TOS of some route with the longest prefix is equal to the TOS -of the packet, the routes with different TOS are dropped. - -If no exact TOS match was found and routes with TOS=0 exist, -the rest of routes are pruned. - -Otherwise, the route lookup fails. - -\item If several routes remain after the previous steps, then -the routes with the best preference values are selected. - -\item If we still have several routes, then the {\em first\/} of them -is selected. - -\begin{NB} - Note the ambiguity of the last step. Unfortunately, Linux - historically allows such a bizarre situation. The sense of the -word ``first'' depends on the order of route additions and it is practically -impossible to maintain a bundle of such routes in this order. -\end{NB} - -For simplicity we will limit ourselves to the case where such a situation -is impossible and routes are uniquely identified by the triplet -\{prefix, tos, preference\}. Actually, it is impossible to create -non-unique routes with \verb|ip| commands described in this section. - -One useful exception to this rule is the default route on non-forwarding -hosts. It is ``officially'' allowed to have several fallback routes -when several routers are present on directly connected networks. -In this case, Linux-2.2 makes ``dead gateway detection''~\cite{RFC1122} -controlled by neighbour unreachability detection and by advice -from transport protocols to select a working router, so the order -of the routes is not essential. However, in this case, -fiddling with default routes manually is not recommended. Use the Router Discovery -protocol (see Appendix~\ref{EXAMPLE-SETUP}, p.\pageref{EXAMPLE-SETUP}) -instead. Actually, Linux-2.2 IPv6 does not give user level applications -any access to default routes. -\end{enumerate} - -Certainly, the steps above are not performed exactly -in this sequence. Instead, the routing table in the kernel is kept -in some data structure to achieve the final result -with minimal cost. However, not depending on a particular -routing algorithm implemented in the kernel, we can summarize -the statements above as: a route is identified by the triplet -\{prefix, tos, preference\}. This {\em key\/} lets us locate -the route in the routing table. - -\paragraph{Route attributes:} Each route key refers to a routing -information record containing -the data required to deliver IP packets (f.e.\ output device and -next hop router) and some optional attributes (f.e. the path MTU or -the preferred source address when communicating with this destination). -These attributes are described in the following subsection. - -\paragraph{Route types:} \label{IP-ROUTE-TYPES} -It is important that the set -of required and optional attributes depend on the route {\em type\/}. -The most important route type -is \verb|unicast|. It describes real paths to other hosts. -As a rule, common routing tables contain only such routes. However, -there are other types of routes with different semantics. The -full list of types understood by Linux-2.2 is: -\begin{itemize} -\item \verb|unicast| --- the route entry describes real paths to the -destinations covered by the route prefix. -\item \verb|unreachable| --- these destinations are unreachable. Packets -are discarded and the ICMP message {\em host unreachable\/} is generated. -The local senders get an \verb|EHOSTUNREACH| error. -\item \verb|blackhole| --- these destinations are unreachable. Packets -are discarded silently. The local senders get an \verb|EINVAL| error. -\item \verb|prohibit| --- these destinations are unreachable. Packets -are discarded and the ICMP message {\em communication administratively -prohibited\/} is generated. The local senders get an \verb|EACCES| error. -\item \verb|local| --- the destinations are assigned to this -host. The packets are looped back and delivered locally. -\item \verb|broadcast| --- the destinations are broadcast addresses. -The packets are sent as link broadcasts. -\item \verb|throw| --- a special control route used together with policy -rules (see sec.\ref{IP-RULE}, p.\pageref{IP-RULE}). If such a route is selected, lookup -in this table is terminated pretending that no route was found. -Without policy routing it is equivalent to the absence of the route in the routing -table. The packets are dropped and the ICMP message {\em net unreachable\/} -is generated. The local senders get an \verb|ENETUNREACH| error. -\item \verb|nat| --- a special NAT route. Destinations covered by the prefix -are considered to be dummy (or external) addresses which require translation -to real (or internal) ones before forwarding. The addresses to translate to -are selected with the attribute \verb|via|. More about NAT is -in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}. -\item \verb|anycast| --- ({\em not implemented\/}) the destinations are -{\em anycast\/} addresses assigned to this host. They are mainly equivalent -to \verb|local| with one difference: such addresses are invalid when used -as the source address of any packet. -\item \verb|multicast| --- a special type used for multicast routing. -It is not present in normal routing tables. -\end{itemize} - -\paragraph{Route tables:} Linux-2.2 can pack routes into several routing -tables identified by a number in the range from 1 to 255 or by -name from the file \verb|/etc/iproute2/rt_tables|. By default all normal -routes are inserted into the \verb|main| table (ID 254) and the kernel only uses -this table when calculating routes. - -Actually, one other table always exists, which is invisible but -even more important. It is the \verb|local| table (ID 255). This table -consists of routes for local and broadcast addresses. The kernel maintains -this table automatically and the administrator usually need not modify it -or even look at it. - -The multiple routing tables enter the game when {\em policy routing\/} -is used. See sec.\ref{IP-RULE}, p.\pageref{IP-RULE}. -In this case, the table identifier effectively becomes -one more parameter, which should be added to the triplet -\{prefix, tos, preference\} to uniquely identify the route. - - -\subsection{{\tt ip route add} --- add a new route\\ - {\tt ip route change} --- change a route\\ - {\tt ip route replace} --- change a route or add a new one} -\label{IP-ROUTE-ADD} - -\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|; - \verb|replace|, \verb|repl|. - - -\paragraph{Arguments:} -\begin{itemize} -\item \verb|to PREFIX| or \verb|to TYPE PREFIX| (default) - ---- the destination prefix of the route. If \verb|TYPE| is omitted, -\verb|ip| assumes type \verb|unicast|. Other values of \verb|TYPE| -are listed above. \verb|PREFIX| is an IP or IPv6 address optionally followed -by a slash and the prefix length. If the length of the prefix is missing, -\verb|ip| assumes a full-length host route. There is also a special -\verb|PREFIX| --- \verb|default| --- which is equivalent to IP \verb|0/0| or -to IPv6 \verb|::/0|. - -\item \verb|tos TOS| or \verb|dsfield TOS| - ---- the Type Of Service (TOS) key. This key has no associated mask and -the longest match is understood as: First, compare the TOS -of the route and of the packet. If they are not equal, then the packet -may still match a route with a zero TOS. \verb|TOS| is either an 8 bit hexadecimal -number or an identifier from {\tt /etc/iproute2/rt\_dsfield}. - - -\item \verb|metric NUMBER| or \verb|preference NUMBER| - ---- the preference value of the route. \verb|NUMBER| is an arbitrary 32bit number. - -\item \verb|table TABLEID| - ---- the table to add this route to. -\verb|TABLEID| may be a number or a string from the file -\verb|/etc/iproute2/rt_tables|. If this parameter is omitted, -\verb|ip| assumes the \verb|main| table, with the exception of -\verb|local|, \verb|broadcast| and \verb|nat| routes, which are -put into the \verb|local| table by default. - -\item \verb|dev NAME| - ---- the output device name. - -\item \verb|via ADDRESS| - ---- the address of the nexthop router. Actually, the sense of this field depends -on the route type. For normal \verb|unicast| routes it is either the true nexthop -router or, if it is a direct route installed in BSD compatibility mode, -it can be a local address of the interface. -For NAT routes it is the first address of the block of translated IP destinations. - -\item \verb|src ADDRESS| - ---- the source address to prefer when sending to the destinations -covered by the route prefix. - -\item \verb|realm REALMID| - ---- the realm to which this route is assigned. -\verb|REALMID| may be a number or a string from the file -\verb|/etc/iproute2/rt_realms|. Sec.\ref{RT-REALMS} (p.\pageref{RT-REALMS}) -contains more information on realms. - -\item \verb|mtu MTU| or \verb|mtu lock MTU| - ---- the MTU along the path to the destination. If the modifier \verb|lock| is -not used, the MTU may be updated by the kernel due to Path MTU Discovery. -If the modifier \verb|lock| is used, no path MTU discovery will be tried, -all packets will be sent without the DF bit in IPv4 case -or fragmented to MTU for IPv6. - -\item \verb|window NUMBER| - ---- the maximal window for TCP to advertise to these destinations, -measured in bytes. It limits maximal data bursts that our TCP -peers are allowed to send to us. - -\item \verb|rtt NUMBER| - ---- the initial RTT (``Round Trip Time'') estimate. - - -\item \verb|rttvar NUMBER| - ---- \threeonly the initial RTT variance estimate. - - -\item \verb|ssthresh NUMBER| - ---- \threeonly an estimate for the initial slow start threshold. - - -\item \verb|cwnd NUMBER| - ---- \threeonly the clamp for congestion window. It is ignored if the \verb|lock| - flag is not used. - - -\item \verb|advmss NUMBER| - ---- \threeonly the MSS (``Maximal Segment Size'') to advertise to these - destinations when establishing TCP connections. If it is not given, - Linux uses a default value calculated from the first hop device MTU. - -\begin{NB} - If the path to these destination is asymmetric, this guess may be wrong. -\end{NB} - -\item \verb|reordering NUMBER| - ---- \threeonly Maximal reordering on the path to this destination. - If it is not given, Linux uses the value selected with \verb|sysctl| - variable \verb|net/ipv4/tcp_reordering|. - -\item \verb|hoplimit NUMBER| - ---- [2.5.74+ only] Maximum number of hops on the path to this destination. - The default is the value selected with the \verb|sysctl| variable - \verb|net/ipv4/ip_default_ttl|. - -\item \verb|initcwnd NUMBER| ---- [2.5.70+ only] Initial congestion window size for connections to - this destination. Actual window size is this value multiplied by the - MSS (``Maximal Segment Size'') for same connection. The default is - zero, meaning to use the values specified in~\cite{RFC2414}. - -+\item \verb|initrwnd NUMBER| - -+--- [2.6.33+ only] Initial receive window size for connections to -+ this destination. The actual window size is this value multiplied -+ by the MSS (''Maximal Segment Size'') of the connection. The default -+ value is zero, meaning to use Slow Start value. - -\item \verb|nexthop NEXTHOP| - ---- the nexthop of a multipath route. \verb|NEXTHOP| is a complex value -with its own syntax similar to the top level argument lists: -\begin{itemize} -\item \verb|via ADDRESS| is the nexthop router. -\item \verb|dev NAME| is the output device. -\item \verb|weight NUMBER| is a weight for this element of a multipath -route reflecting its relative bandwidth or quality. -\end{itemize} - -\item \verb|scope SCOPE_VAL| - ---- the scope of the destinations covered by the route prefix. -\verb|SCOPE_VAL| may be a number or a string from the file -\verb|/etc/iproute2/rt_scopes|. -If this parameter is omitted, -\verb|ip| assumes scope \verb|global| for all gatewayed \verb|unicast| -routes, scope \verb|link| for direct \verb|unicast| and \verb|broadcast| routes -and scope \verb|host| for \verb|local| routes. - -\item \verb|protocol RTPROTO| - ---- the routing protocol identifier of this route. -\verb|RTPROTO| may be a number or a string from the file -\verb|/etc/iproute2/rt_protos|. If the routing protocol ID is -not given, \verb|ip| assumes protocol \verb|boot| (i.e.\ -it assumes the route was added by someone who doesn't -understand what they are doing). Several protocol values have a fixed interpretation. -Namely: -\begin{itemize} -\item \verb|redirect| --- the route was installed due to an ICMP redirect. -\item \verb|kernel| --- the route was installed by the kernel during -autoconfiguration. -\item \verb|boot| --- the route was installed during the bootup sequence. -If a routing daemon starts, it will purge all of them. -\item \verb|static| --- the route was installed by the administrator -to override dynamic routing. Routing daemon will respect them -and, probably, even advertise them to its peers. -\item \verb|ra| --- the route was installed by Router Discovery protocol. -\end{itemize} -The rest of the values are not reserved and the administrator is free -to assign (or not to assign) protocol tags. At least, routing -daemons should take care of setting some unique protocol values, -f.e.\ as they are assigned in \verb|rtnetlink.h| or in \verb|rt_protos| -database. - - -\item \verb|onlink| - ---- pretend that the nexthop is directly attached to this link, -even if it does not match any interface prefix. One application of this -option may be found in~\cite{IP-TUNNELS}. - -\item \verb|pref PREF| - ---- the IPv6 route preference. -\verb|PREF| PREF is a string specifying the route preference as defined in -RFC4191 for Router Discovery messages. Namely: -\begin{itemize} -\item \verb|low| --- the route has a lowest priority. -\item \verb|medium| --- the route has a default priority. -\item \verb|high| --- the route has a highest priority. -\end{itemize} - -\end{itemize} - - -\begin{NB} - Actually there are more commands: \verb|prepend| does the same - thing as classic \verb|route add|, i.e.\ adds a route, even if another - route to the same destination exists. Its opposite case is \verb|append|, - which adds the route to the end of the list. Avoid these - features. -\end{NB} -\begin{NB} - More sad news, IPv6 only understands the \verb|append| command correctly. - All the others are translated into \verb|append| commands. Certainly, - this will change in the future. -\end{NB} - -\paragraph{Examples:} -\begin{itemize} -\item add a plain route to network 10.0.0/24 via gateway 193.233.7.65 -\begin{verbatim} - ip route add 10.0.0/24 via 193.233.7.65 -\end{verbatim} -\item change it to a direct route via the \verb|dummy| device -\begin{verbatim} - ip ro chg 10.0.0/24 dev dummy -\end{verbatim} -\item add a default multipath route splitting the load between \verb|ppp0| -and \verb|ppp1| -\begin{verbatim} - ip route add default scope global nexthop dev ppp0 \ - nexthop dev ppp1 -\end{verbatim} -Note the scope value. It is not necessary but it informs the kernel -that this route is gatewayed rather than direct. Actually, if you -know the addresses of remote endpoints it would be better to use the -\verb|via| parameter. -\item announce that the address 192.203.80.144 is not a real one, but -should be translated to 193.233.7.83 before forwarding -\begin{verbatim} - ip route add nat 192.203.80.144 via 193.233.7.83 -\end{verbatim} -Backward translation is setup with policy rules described -in the following section (sec.\ref{IP-RULE}, p.\pageref{IP-RULE}). -\end{itemize} - -\subsection{{\tt ip route delete} --- delete a route} - -\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|. - -\paragraph{Arguments:} \verb|ip route del| has the same arguments as -\verb|ip route add|, but their semantics are a bit different. - -Key values (\verb|to|, \verb|tos|, \verb|preference| and \verb|table|) -select the route to delete. If optional attributes are present, \verb|ip| -verifies that they coincide with the attributes of the route to delete. -If no route with the given key and attributes was found, \verb|ip route del| -fails. -\begin{NB} -Linux-2.0 had the option to delete a route selected only by prefix address, -ignoring its length (i.e.\ netmask). This option no longer exists -because it was ambiguous. However, look at {\tt ip route flush} -(sec.\ref{IP-ROUTE-FLUSH}, p.\pageref{IP-ROUTE-FLUSH}) which -provides similar and even richer functionality. -\end{NB} - -\paragraph{Example:} -\begin{itemize} -\item delete the multipath route created by the command in previous subsection -\begin{verbatim} - ip route del default scope global nexthop dev ppp0 \ - nexthop dev ppp1 -\end{verbatim} -\end{itemize} - - - -\subsection{{\tt ip route show} --- list routes} - -\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|. - -\paragraph{Description:} the command displays the contents of the routing tables -or the route(s) selected by some criteria. - - -\paragraph{Arguments:} -\begin{itemize} -\item \verb|to SELECTOR| (default) - ---- only select routes from the given range of destinations. \verb|SELECTOR| -consists of an optional modifier (\verb|root|, \verb|match| or \verb|exact|) -and a prefix. \verb|root PREFIX| selects routes with prefixes not shorter -than \verb|PREFIX|. F.e.\ \verb|root 0/0| selects the entire routing table. -\verb|match PREFIX| selects routes with prefixes not longer than -\verb|PREFIX|. F.e.\ \verb|match 10.0/16| selects \verb|10.0/16|, -\verb|10/8| and \verb|0/0|, but it does not select \verb|10.1/16| and -\verb|10.0.0/24|. And \verb|exact PREFIX| (or just \verb|PREFIX|) -selects routes with this exact prefix. If neither of these options -are present, \verb|ip| assumes \verb|root 0/0| i.e.\ it lists the entire table. - - -\item \verb|tos TOS| or \verb|dsfield TOS| - - --- only select routes with the given TOS. - - -\item \verb|table TABLEID| - - --- show the routes from this table(s). The default setting is to show -\verb|table| \verb|main|. \verb|TABLEID| may either be the ID of a real table -or one of the special values: - \begin{itemize} - \item \verb|all| --- list all of the tables. - \item \verb|cache| --- dump the routing cache. - \end{itemize} -\begin{NB} - IPv6 has a single table. However, splitting it into \verb|main|, \verb|local| - and \verb|cache| is emulated by the \verb|ip| utility. -\end{NB} - -\item \verb|cloned| or \verb|cached| - ---- list cloned routes i.e.\ routes which were dynamically forked from -other routes because some route attribute (f.e.\ MTU) was updated. -Actually, it is equivalent to \verb|table cache|. - -\item \verb|from SELECTOR| - ---- the same syntax as for \verb|to|, but it binds the source address range -rather than destinations. Note that the \verb|from| option only works with -cloned routes. - -\item \verb|protocol RTPROTO| - ---- only list routes of this protocol. - - -\item \verb|scope SCOPE_VAL| - ---- only list routes with this scope. - -\item \verb|type TYPE| - ---- only list routes of this type. - -\item \verb|dev NAME| - ---- only list routes going via this device. - -\item \verb|via PREFIX| - ---- only list routes going via the nexthop routers selected by \verb|PREFIX|. - -\item \verb|src PREFIX| - ---- only list routes with preferred source addresses selected -by \verb|PREFIX|. - -\item \verb|realm REALMID| or \verb|realms FROMREALM/TOREALM| - ---- only list routes with these realms. - -\end{itemize} - -\paragraph{Examples:} Let us count routes of protocol \verb|gated/bgp| -on a router: -\begin{verbatim} -kuznet@amber:~ $ ip ro ls proto gated/bgp | wc - 1413 9891 79010 -kuznet@amber:~ $ -\end{verbatim} -To count the size of the routing cache, we have to use the \verb|-o| option -because cached attributes can take more than one line of output: -\begin{verbatim} -kuznet@amber:~ $ ip -o ro ls cloned | wc - 159 2543 18707 -kuznet@amber:~ $ -\end{verbatim} - - -\paragraph{Output format:} The output of this command consists -of per route records separated by line feeds. -However, some records may consist -of more than one line: particularly, this is the case when the route -is cloned or you requested additional statistics. If the -\verb|-o| option was given, then line feeds separating lines inside -records are replaced with the backslash sign. - -The output has the same syntax as arguments given to {\tt ip route add}, -so that it can be understood easily. F.e.\ -\begin{verbatim} -kuznet@amber:~ $ ip ro ls 193.233.7/24 -193.233.7.0/24 dev eth0 proto gated/conn scope link \ - src 193.233.7.65 realms inr.ac -kuznet@amber:~ $ -\end{verbatim} - -If you list cloned entries, the output contains other attributes which -are evaluated during route calculation and updated during route -lifetime. An example of the output is: -\begin{verbatim} -kuznet@amber:~ $ ip ro ls 193.233.7.82 tab cache -193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \ - realms inr.ac/inr.ac - cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0 -193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac - cache mtu 1500 rtt 300 -kuznet@amber:~ $ -\end{verbatim} -\begin{NB} - \label{NB-strange-route} - The route looks a bit strange, doesn't it? Did you notice that - it is a path from 193.233.7.82 back to 193.233.82? Well, you will - see in the section on \verb|ip route get| (p.\pageref{NB-nature-of-strangeness}) - how it appeared. -\end{NB} -The second line, starting with the word \verb|cache|, shows -additional attributes which normal routes do not possess. -Cached flags are summarized in angle brackets: -\begin{itemize} -\item \verb|local| --- packets are delivered locally. -It stands for loopback unicast routes, for broadcast routes -and for multicast routes, if this host is a member of the corresponding -group. - -\item \verb|reject| --- the path is bad. Any attempt to use it results -in an error. See attribute \verb|error| below (p.\pageref{IP-ROUTE-GET-error}). - -\item \verb|mc| --- the destination is multicast. - -\item \verb|brd| --- the destination is broadcast. - -\item \verb|src-direct| --- the source is on a directly connected -interface. - -\item \verb|redirected| --- the route was created by an ICMP Redirect. - -\item \verb|redirect| --- packets going via this route will -trigger an ICMP redirect. - -\item \verb|fastroute| --- the route is eligible to be used for fastroute. - -\item \verb|equalize| --- make packet by packet randomization -along this path. - -\item \verb|dst-nat| --- the destination address requires translation. - -\item \verb|src-nat| --- the source address requires translation. - -\item \verb|masq| --- the source address requires masquerading. -This feature disappeared in linux-2.4. - -\item \verb|notify| --- ({\em not implemented}) change/deletion -of this route will trigger RTNETLINK notification. -\end{itemize} - -Then some optional attributes follow: -\begin{itemize} -\item \verb|error| --- on \verb|reject| routes it is error code -returned to local senders when they try to use this route. -These error codes are translated into ICMP error codes, sent to remote -senders, according to the rules described above in the subsection -devoted to route types (p.\pageref{IP-ROUTE-TYPES}). -\label{IP-ROUTE-GET-error} - -\item \verb|expires| --- this entry will expire after this timeout. - -\item \verb|iif| --- the packets for this path are expected to arrive -on this interface. -\end{itemize} - -\paragraph{Statistics:} With the \verb|-statistics| option, more -information about this route is shown: -\begin{itemize} -\item \verb|users| --- the number of users of this entry. -\item \verb|age| --- shows when this route was last used. -\item \verb|used| --- the number of lookups of this route since its creation. -\end{itemize} - -\subsection{{\tt ip route save} -- save routing tables} -\label{IP-ROUTE-SAVE} - -\paragraph{Description:} this command saves the contents of the routing -tables or the route(s) selected by some criteria to standard output. - -\paragraph{Arguments:} \verb|ip route save| has the same arguments as -\verb|ip route show|. - -\paragraph{Example:} This saves all the routes to the {\tt saved\_routes} -file: -\begin{verbatim} -dan@caffeine:~ # ip route save > saved_routes -\end{verbatim} - -\paragraph{Output format:} The format of the data stream provided by -\verb|ip route save| is that of \verb|rtnetlink|. See -\verb|rtnetlink(7)| for more information. - -\subsection{{\tt ip route restore} -- restore routing tables} -\label{IP-ROUTE-RESTORE} - -\paragraph{Description:} this command restores the contents of the routing -tables according to a data stream as provided by \verb|ip route save| via -standard input. Note that any routes already in the table are left unchanged. -Any routes in the input stream that already exist in the tables are ignored. - -\paragraph{Arguments:} This command takes no arguments. - -\paragraph{Example:} This restores all routes that were saved to the -{\tt saved\_routes} file: - -\begin{verbatim} -dan@caffeine:~ # ip route restore < saved_routes -\end{verbatim} - -\subsection{{\tt ip route flush} --- flush routing tables} -\label{IP-ROUTE-FLUSH} - -\paragraph{Abbreviations:} \verb|flush|, \verb|f|. - -\paragraph{Description:} this command flushes routes selected -by some criteria. - -\paragraph{Arguments:} the arguments have the same syntax and semantics -as the arguments of \verb|ip route show|, but routing tables are not -listed but purged. The only difference is the default action: \verb|show| -dumps all the IP main routing table but \verb|flush| prints the helper page. -The reason for this difference does not require any explanation, does it? - - -\paragraph{Statistics:} With the \verb|-statistics| option, the command -becomes verbose. It prints out the number of deleted routes and the number -of rounds made to flush the routing table. If the option is given -twice, \verb|ip route flush| also dumps all the deleted routes -in the format described in the previous subsection. - -\paragraph{Examples:} The first example flushes all the -gatewayed routes from the main table (f.e.\ after a routing daemon crash). -\begin{verbatim} -netadm@amber:~ # ip -4 ro flush scope global type unicast -\end{verbatim} -This option deserves to be put into a scriptlet \verb|routef|. -\begin{NB} -This option was described in the \verb|route(8)| man page borrowed -from BSD, but was never implemented in Linux. -\end{NB} - -The second example flushes all IPv6 cloned routes: -\begin{verbatim} -netadm@amber:~ # ip -6 -s -s ro flush cache -3ffe:2400::220:afff:fef4:c5d1 via 3ffe:2400::220:afff:fef4:c5d1 \ - dev eth0 metric 0 - cache used 2 age 12sec mtu 1500 rtt 300 -3ffe:2400::280:adff:feb7:8034 via 3ffe:2400::280:adff:feb7:8034 \ - dev eth0 metric 0 - cache used 2 age 15sec mtu 1500 rtt 300 -3ffe:2400::280:c8ff:fe59:5bcc via 3ffe:2400::280:c8ff:fe59:5bcc \ - dev eth0 metric 0 - cache users 1 used 1 age 23sec mtu 1500 rtt 300 -3ffe:2400:0:1:2a0:ccff:fe66:1878 via 3ffe:2400:0:1:2a0:ccff:fe66:1878 \ - dev eth1 metric 0 - cache used 2 age 20sec mtu 1500 rtt 300 -3ffe:2400:0:1:a00:20ff:fe71:fb30 via 3ffe:2400:0:1:a00:20ff:fe71:fb30 \ - dev eth1 metric 0 - cache used 2 age 33sec mtu 1500 rtt 300 -ff02::1 via ff02::1 dev eth1 metric 0 - cache users 1 used 1 age 45sec mtu 1500 rtt 300 - -*** Round 1, deleting 6 entries *** -*** Flush is complete after 1 round *** -netadm@amber:~ # ip -6 -s -s ro flush cache -Nothing to flush. -netadm@amber:~ # -\end{verbatim} - -The third example flushes BGP routing tables after a \verb|gated| -death. -\begin{verbatim} -netadm@amber:~ # ip ro ls proto gated/bgp | wc - 1408 9856 78730 -netadm@amber:~ # ip -s ro f proto gated/bgp - -*** Round 1, deleting 1408 entries *** -*** Flush is complete after 1 round *** -netadm@amber:~ # ip ro f proto gated/bgp -Nothing to flush. -netadm@amber:~ # ip ro ls proto gated/bgp -netadm@amber:~ # -\end{verbatim} - - -\subsection{{\tt ip route get} --- get a single route} -\label{IP-ROUTE-GET} - -\paragraph{Abbreviations:} \verb|get|, \verb|g|. - -\paragraph{Description:} this command gets a single route to a destination -and prints its contents exactly as the kernel sees it. - -\paragraph{Arguments:} -\begin{itemize} -\item \verb|to ADDRESS| (default) - ---- the destination address. - -\item \verb|from ADDRESS| - ---- the source address. - -\item \verb|tos TOS| or \verb|dsfield TOS| - ---- the Type Of Service. - -\item \verb|iif NAME| - ---- the device from which this packet is expected to arrive. - -\item \verb|oif NAME| - ---- force the output device on which this packet will be routed. - -\item \verb|connected| - ---- if no source address (option \verb|from|) was given, relookup -the route with the source set to the preferred address received from the first lookup. -If policy routing is used, it may be a different route. - -\end{itemize} - -Note that this operation is not equivalent to \verb|ip route show|. -\verb|show| shows existing routes. \verb|get| resolves them and -creates new clones if necessary. Essentially, \verb|get| -is equivalent to sending a packet along this path. -If the \verb|iif| argument is not given, the kernel creates a route -to output packets towards the requested destination. -This is equivalent to pinging the destination -with a subsequent {\tt ip route ls cache}, however, no packets are -actually sent. With the \verb|iif| argument, the kernel pretends -that a packet arrived from this interface and searches for -a path to forward the packet. - -\paragraph{Output format:} This command outputs routes in the same -format as \verb|ip route ls|. - -\paragraph{Examples:} -\begin{itemize} -\item Find a route to output packets to 193.233.7.82: -\begin{verbatim} -kuznet@amber:~ $ ip route get 193.233.7.82 -193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac - cache mtu 1500 rtt 300 -kuznet@amber:~ $ -\end{verbatim} - -\item Find a route to forward packets arriving on \verb|eth0| -from 193.233.7.82 and destined for 193.233.7.82: -\begin{verbatim} -kuznet@amber:~ $ ip r g 193.233.7.82 from 193.233.7.82 iif eth0 -193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \ - realms inr.ac/inr.ac - cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0 -kuznet@amber:~ $ -\end{verbatim} -\begin{NB} - \label{NB-nature-of-strangeness} - This is the command that created the funny route from 193.233.7.82 - looped back to 193.233.7.82 (cf.\ NB on~p.\pageref{NB-strange-route}). - Note the \verb|redirect| flag on it. -\end{NB} - -\item Find a multicast route for packets arriving on \verb|eth0| -from host 193.233.7.82 and destined for multicast group 224.2.127.254 -(it is assumed that a multicast routing daemon is running. -In this case, it is \verb|pimd|) -\begin{verbatim} -kuznet@amber:~ $ ip r g 224.2.127.254 from 193.233.7.82 iif eth0 -multicast 224.2.127.254 from 193.233.7.82 dev lo \ - src 193.233.7.65 realms inr.ac/cosmos - cache <mc> iif eth0 Oifs: eth1 pimreg -kuznet@amber:~ $ -\end{verbatim} -This route differs from the ones seen before. It contains a ``normal'' part -and a ``multicast'' part. The normal part is used to deliver (or not to -deliver) the packet to local IP listeners. In this case the router -is not a member -of this group, so that route has no \verb|local| flag and only -forwards packets. The output device for such entries is always loopback. -The multicast part consists of an additional \verb|Oifs:| list showing -the output interfaces. -\end{itemize} - - -It is time for a more complicated example. Let us add an invalid -gatewayed route for a destination which is really directly connected: -\begin{verbatim} -netadm@alisa:~ # ip route add 193.233.7.98 via 193.233.7.254 -netadm@alisa:~ # ip route get 193.233.7.98 -193.233.7.98 via 193.233.7.254 dev eth0 src 193.233.7.90 - cache mtu 1500 rtt 3072 -netadm@alisa:~ # -\end{verbatim} -and probe it with ping: -\begin{verbatim} -netadm@alisa:~ # ping -n 193.233.7.98 -PING 193.233.7.98 (193.233.7.98) from 193.233.7.90 : 56 data bytes -From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98) -64 bytes from 193.233.7.98: icmp_seq=0 ttl=255 time=3.5 ms -From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98) -64 bytes from 193.233.7.98: icmp_seq=1 ttl=255 time=2.2 ms -64 bytes from 193.233.7.98: icmp_seq=2 ttl=255 time=0.4 ms -64 bytes from 193.233.7.98: icmp_seq=3 ttl=255 time=0.4 ms -64 bytes from 193.233.7.98: icmp_seq=4 ttl=255 time=0.4 ms -^C ---- 193.233.7.98 ping statistics --- -5 packets transmitted, 5 packets received, 0% packet loss -round-trip min/avg/max = 0.4/1.3/3.5 ms -netadm@alisa:~ # -\end{verbatim} -What happened? Router 193.233.7.254 understood that we have a much -better path to the destination and sent us an ICMP redirect message. -We may retry \verb|ip route get| to see what we have in the routing -tables now: -\begin{verbatim} -netadm@alisa:~ # ip route get 193.233.7.98 -193.233.7.98 dev eth0 src 193.233.7.90 - cache <redirected> mtu 1500 rtt 3072 -netadm@alisa:~ # -\end{verbatim} - - - -\section{{\tt ip rule} --- routing policy database management} -\label{IP-RULE} - -\paragraph{Abbreviations:} \verb|rule|, \verb|ru|. - -\paragraph{Object:} \verb|rule|s in the routing policy database control -the route selection algorithm. - -Classic routing algorithms used in the Internet make routing decisions -based only on the destination address of packets (and in theory, -but not in practice, on the TOS field). The seminal review of classic -routing algorithms and their modifications can be found in~\cite{RFC1812}. - -In some circumstances we want to route packets differently depending not only -on destination addresses, but also on other packet fields: source address, -IP protocol, transport protocol ports or even packet payload. -This task is called ``policy routing''. - -\begin{NB} - ``policy routing'' $\neq$ ``routing policy''. - -\noindent ``policy routing'' $=$ ``cunning routing''. - -\noindent ``routing policy'' $=$ ``routing tactics'' or ``routing plan''. -\end{NB} - -To solve this task, the conventional destination based routing table, ordered -according to the longest match rule, is replaced with a ``routing policy -database'' (or RPDB), which selects routes -by executing some set of rules. The rules may have lots of keys of different -natures and therefore they have no natural ordering, but one imposed -by the administrator. Linux-2.2 RPDB is a linear list of rules -ordered by numeric priority value. -RPDB explicitly allows matching a few packet fields: - -\begin{itemize} -\item packet source address. -\item packet destination address. -\item TOS. -\item incoming interface (which is packet metadata, rather than a packet field). -\end{itemize} - -Matching IP protocols and transport ports is also possible, -indirectly, via \verb|ipchains|, by exploiting their ability -to mark some classes of packets with \verb|fwmark|. Therefore, -\verb|fwmark| is also included in the set of keys checked by rules. - -Each policy routing rule consists of a {\em selector\/} and an {\em action\/} -predicate. The RPDB is scanned in the order of increasing priority. The selector -of each rule is applied to \{source address, destination address, incoming -interface, tos, fwmark\} and, if the selector matches the packet, -the action is performed. The action predicate may return with success. -In this case, it will either give a route or failure indication -and the RPDB lookup is terminated. Otherwise, the RPDB program -continues on the next rule. - -What is the action, semantically? The natural action is to select the -nexthop and the output device. This is what -Cisco IOS~\cite{IOS} does. Let us call it ``match \& set''. -The Linux-2.2 approach is more flexible. The action includes -lookups in destination-based routing tables and selecting -a route from these tables according to the classic longest match algorithm. -The ``match \& set'' approach is the simplest case of the Linux one. It is realized -when a second level routing table contains a single default route. -Recall that Linux-2.2 supports multiple tables -managed with the \verb|ip route| command, described in the previous section. - -At startup time the kernel configures the default RPDB consisting of three -rules: - -\begin{enumerate} -\item Priority: 0, Selector: match anything, Action: lookup routing -table \verb|local| (ID 255). -The \verb|local| table is a special routing table containing -high priority control routes for local and broadcast addresses. - -\item Priority: 32766, Selector: match anything, Action: lookup routing -table \verb|main| (ID 254). -The \verb|main| table is the normal routing table containing all non-policy -routes. This rule may be deleted and/or overridden with other -ones by the administrator. - -\item Priority: 32767, Selector: match anything, Action: lookup routing -table \verb|default| (ID 253). -The \verb|default| table is empty. It is reserved for some -post-processing if no previous default rules selected the packet. -This rule may also be deleted. - -\end{enumerate} - -Do not confuse routing tables with rules: rules point to routing tables, -several rules may refer to one routing table and some routing tables -may have no rules pointing to them. If the administrator deletes all the rules -referring to a table, the table is not used, but it still exists -and will disappear only after all the routes contained in it are deleted. - - -\paragraph{Rule attributes:} Each RPDB entry has additional -attributes. F.e.\ each rule has a pointer to some routing -table. NAT and masquerading rules have an attribute to select new IP -address to translate/masquerade. Besides that, rules have some -optional attributes, which routes have, namely \verb|realms|. -These values do not override those contained in the routing tables. They -are only used if the route did not select any attributes. - - -\paragraph{Rule types:} The RPDB may contain rules of the following -types: -\begin{itemize} -\item \verb|unicast| --- the rule prescribes to return the route found -in the routing table referenced by the rule. -\item \verb|blackhole| --- the rule prescribes to silently drop the packet. -\item \verb|unreachable| --- the rule prescribes to generate a ``Network -is unreachable'' error. -\item \verb|prohibit| --- the rule prescribes to generate -``Communication is administratively prohibited'' error. -\item \verb|nat| --- the rule prescribes to translate the source address -of the IP packet into some other value. More about NAT is -in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}. -\end{itemize} - - -\paragraph{Commands:} \verb|add|, \verb|delete| and \verb|show| -(or \verb|list|). - -\subsection{{\tt ip rule add} --- insert a new rule\\ - {\tt ip rule delete} --- delete a rule} -\label{IP-RULE-ADD} - -\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|, - \verb|d|. - -\paragraph{Arguments:} - -\begin{itemize} -\item \verb|type TYPE| (default) - ---- the type of this rule. The list of valid types was given in the previous -subsection. - -\item \verb|from PREFIX| - ---- select the source prefix to match. - -\item \verb|to PREFIX| - ---- select the destination prefix to match. - -\item \verb|iif NAME| - ---- select the incoming device to match. If the interface is loopback, -the rule only matches packets originating from this host. This means that you -may create separate routing tables for forwarded and local packets and, -hence, completely segregate them. - -\item \verb|tos TOS| or \verb|dsfield TOS| - ---- select the TOS value to match. - -\item \verb|fwmark MARK| - ---- select the \verb|fwmark| value to match. - -\item \verb|priority PREFERENCE| - ---- the priority of this rule. Each rule should have an explicitly -set {\em unique\/} priority value. -\begin{NB} - Really, for historical reasons \verb|ip rule add| does not require a - priority value and allows them to be non-unique. - If the user does not supplied a priority, it is selected by the kernel. - If the user creates a rule with a priority value that - already exists, the kernel does not reject the request. It adds - the new rule before all old rules of the same priority. - - It is mistake in design, no more. And it will be fixed one day, - so do not rely on this feature. Use explicit priorities. -\end{NB} - - -\item \verb|table TABLEID| - ---- the routing table identifier to lookup if the rule selector matches. - -\item \verb|realms FROM/TO| - ---- Realms to select if the rule matched and the routing table lookup -succeeded. Realm \verb|TO| is only used if the route did not select -any realm. - -\item \verb|nat ADDRESS| - ---- The base of the IP address block to translate (for source addresses). -The \verb|ADDRESS| may be either the start of the block of NAT addresses -(selected by NAT routes) or in linux-2.2 a local host address (or even zero). -In the last case the router does not translate the packets, -but masquerades them to this address; this feature disappered in 2.4. -More about NAT is in Appendix~\ref{ROUTE-NAT}, -p.\pageref{ROUTE-NAT}. - -\end{itemize} - -\paragraph{Warning:} Changes to the RPDB made with these commands -do not become active immediately. It is assumed that after -a script finishes a batch of updates, it flushes the routing cache -with \verb|ip route flush cache|. - -\paragraph{Examples:} -\begin{itemize} -\item Route packets with source addresses from 192.203.80/24 -according to routing table \verb|inr.ruhep|: -\begin{verbatim} -ip ru add from 192.203.80.0/24 table inr.ruhep prio 220 -\end{verbatim} - -\item Translate packet source address 193.233.7.83 into 192.203.80.144 -and route it according to table \#1 (actually, it is \verb|inr.ruhep|): -\begin{verbatim} -ip ru add from 193.233.7.83 nat 192.203.80.144 table 1 prio 320 -\end{verbatim} - -\item Delete the unused default rule: -\begin{verbatim} -ip ru del prio 32767 -\end{verbatim} - -\end{itemize} - - - -\subsection{{\tt ip rule show} --- list rules} -\label{IP-RULE-SHOW} - -\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|. - - -\paragraph{Arguments:} Good news, this is one command that has no arguments. - -\paragraph{Output format:} - -\begin{verbatim} -kuznet@amber:~ $ ip ru ls -0: from all lookup local -200: from 192.203.80.0/24 to 193.233.7.0/24 lookup main -210: from 192.203.80.0/24 to 192.203.80.0/24 lookup main -220: from 192.203.80.0/24 lookup inr.ruhep realms inr.ruhep/radio-msu -300: from 193.233.7.83 to 193.233.7.0/24 lookup main -310: from 193.233.7.83 to 192.203.80.0/24 lookup main -320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144 -32766: from all lookup main -kuznet@amber:~ $ -\end{verbatim} - -In the first column is the rule priority value followed -by a colon. Then the selectors follow. Each key is prefixed -with the same keyword that was used to create the rule. - -The keyword \verb|lookup| is followed by a routing table identifier, -as it is recorded in the file \verb|/etc/iproute2/rt_tables|. - -If the rule does NAT (f.e.\ rule \#320), it is shown by the keyword -\verb|map-to| followed by the start of the block of addresses to map. - -The sense of this example is pretty simple. The prefixes -192.203.80.0/24 and 193.233.7.0/24 form the internal network, but -they are routed differently when the packets leave it. -Besides that, the host 193.233.7.83 is translated into -another prefix to look like 192.203.80.144 when talking -to the outer world. - -\subsection{{\tt ip rule save} -- save rules tables} -\label{IP-RULE-SAVE} - -\paragraph{Description:} this command saves the contents of the rules -tables or the rule(s) selected by some criteria to standard output. - -\paragraph{Arguments:} \verb|ip rule save| has the same arguments as -\verb|ip rule show|. - -\paragraph{Example:} This saves all the rules to the {\tt saved\_rules} -file: -\begin{verbatim} -dan@caffeine:~ # ip rule save > saved_rules -\end{verbatim} - -\paragraph{Output format:} The format of the data stream provided by -\verb|ip rule save| is that of \verb|rtnetlink|. See -\verb|rtnetlink(7)| for more information. - -\subsection{{\tt ip rule restore} -- restore rules tables} -\label{IP-RULE-RESTORE} - -\paragraph{Description:} this command restores the contents of the rules -tables according to a data stream as provided by \verb|ip rule save| via -standard input. Note that any rules already in the table are left unchanged, -and duplicates are not ignored. - -\paragraph{Arguments:} This command takes no arguments. - -\paragraph{Example:} This restores all rules that were saved to the -{\tt saved\_rules} file: - -\begin{verbatim} -dan@caffeine:~ # ip rule restore < saved_rules -\end{verbatim} - - - -\section{{\tt ip maddress} --- multicast addresses management} -\label{IP-MADDR} - -\paragraph{Object:} \verb|maddress| objects are multicast addresses. - -\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|show| (or \verb|list|). - -\subsection{{\tt ip maddress show} --- list multicast addresses} - -\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|. - -\paragraph{Arguments:} - -\begin{itemize} - -\item \verb|dev NAME| (default) - ---- the device name. - -\end{itemize} - -\paragraph{Output format:} - -\begin{verbatim} -kuznet@alisa:~ $ ip maddr ls dummy -2: dummy - link 33:33:00:00:00:01 - link 01:00:5e:00:00:01 - inet 224.0.0.1 users 2 - inet6 ff02::1 -kuznet@alisa:~ $ -\end{verbatim} - -The first line of the output shows the interface index and its name. -Then the multicast address list follows. Each line starts with the -protocol identifier. The word \verb|link| denotes a link layer -multicast addresses. - -If a multicast address has more than one user, the number -of users is shown after the \verb|users| keyword. - -One additional feature not present in the example above -is the \verb|static| flag, which indicates that the address was joined -with \verb|ip maddr add|. See the following subsection. - - - -\subsection{{\tt ip maddress add} --- add a multicast address\\ - {\tt ip maddress delete} --- delete a multicast address} - -\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|, \verb|d|. - -\paragraph{Description:} these commands attach/detach -a static link layer multicast address to listen on the interface. -Note that it is impossible to join protocol multicast groups -statically. This command only manages link layer addresses. - - -\paragraph{Arguments:} - -\begin{itemize} -\item \verb|address LLADDRESS| (default) - ---- the link layer multicast address. - -\item \verb|dev NAME| - ---- the device to join/leave this multicast address. - -\end{itemize} - - -\paragraph{Example:} Let us continue with the example from the previous subsection. - -\begin{verbatim} -netadm@alisa:~ # ip maddr add 33:33:00:00:00:01 dev dummy -netadm@alisa:~ # ip -0 maddr ls dummy -2: dummy - link 33:33:00:00:00:01 users 2 static - link 01:00:5e:00:00:01 -netadm@alisa:~ # ip maddr del 33:33:00:00:00:01 dev dummy -\end{verbatim} - -\begin{NB} - Neither \verb|ip| nor the kernel check for multicast address validity. - Particularly, this means that you can try to load a unicast address - instead of a multicast address. Most drivers will ignore such addresses, - but several (f.e.\ Tulip) will intern it to their on-board filter. - The effects may be strange. Namely, the addresses become additional - local link addresses and, if you loaded the address of another host - to the router, wait for duplicated packets on the wire. - It is not a bug, but rather a hole in the API and intra-kernel interfaces. - This feature is really more useful for traffic monitoring, but using it - with Linux-2.2 you {\em have to\/} be sure that the host is not - a router and, especially, that it is not a transparent proxy or masquerading - agent. -\end{NB} - - - -\section{{\tt ip mroute} --- multicast routing cache management} -\label{IP-MROUTE} - -\paragraph{Abbreviations:} \verb|mroute|, \verb|mr|. - -\paragraph{Object:} \verb|mroute| objects are multicast routing cache -entries created by a user level mrouting daemon -(f.e.\ \verb|pimd| or \verb|mrouted|). - -Due to the limitations of the current interface to the multicast routing -engine, it is impossible to change \verb|mroute| objects administratively, -so we may only display them. This limitation will be removed -in the future. - -\paragraph{Commands:} \verb|show| (or \verb|list|). - - -\subsection{{\tt ip mroute show} --- list mroute cache entries} - -\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|. - -\paragraph{Arguments:} - -\begin{itemize} -\item \verb|to PREFIX| (default) - ---- the prefix selecting the destination multicast addresses to list. - - -\item \verb|iif NAME| - ---- the interface on which multicast packets are received. - - -\item \verb|from PREFIX| - ---- the prefix selecting the IP source addresses of the multicast route. - - -\end{itemize} - -\paragraph{Output format:} - -\begin{verbatim} -kuznet@amber:~ $ ip mroute ls -(193.232.127.6, 224.0.1.39) Iif: unresolved -(193.232.244.34, 224.0.1.40) Iif: unresolved -(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg -kuznet@amber:~ $ -\end{verbatim} - -Each line shows one (S,G) entry in the multicast routing cache, -where S is the source address and G is the multicast group. \verb|Iif| is -the interface on which multicast packets are expected to arrive. -If the word \verb|unresolved| is there instead of the interface name, -it means that the routing daemon still hasn't resolved this entry. -The keyword \verb|oifs| is followed by a list of output interfaces, separated -by spaces. If a multicast routing entry is created with non-trivial -TTL scope, administrative distances are appended to the device names -in the \verb|oifs| list. - -\paragraph{Statistics:} The \verb|-statistics| option also prints the -number of packets and bytes forwarded along this route and -the number of packets that arrived on the wrong interface, if this number is not zero. - -\begin{verbatim} -kuznet@amber:~ $ ip -s mr ls 224.66/16 -(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg - 9383 packets, 300256 bytes -kuznet@amber:~ $ -\end{verbatim} - - -\section{{\tt ip tunnel} --- tunnel configuration} -\label{IP-TUNNEL} - -\paragraph{Abbreviations:} \verb|tunnel|, \verb|tunl|. - -\paragraph{Object:} \verb|tunnel| objects are tunnels, encapsulating -packets in IPv4 packets and then sending them over the IP infrastructure. - -\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|change|, \verb|show| -(or \verb|list|). - -\paragraph{See also:} A more informal discussion of tunneling -over IP and the \verb|ip tunnel| command can be found in~\cite{IP-TUNNELS}. - -\subsection{{\tt ip tunnel add} --- add a new tunnel\\ - {\tt ip tunnel change} --- change an existing tunnel\\ - {\tt ip tunnel delete} --- destroy a tunnel} - -\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|; -\verb|delete|, \verb|del|, \verb|d|. - - -\paragraph{Arguments:} - -\begin{itemize} - -\item \verb|name NAME| (default) - ---- select the tunnel device name. - -\item \verb|mode MODE| - ---- set the tunnel mode. Three modes are currently available: - \verb|ipip|, \verb|sit| and \verb|gre|. - -\item \verb|remote ADDRESS| - ---- set the remote endpoint of the tunnel. - -\item \verb|local ADDRESS| - ---- set the fixed local address for tunneled packets. -It must be an address on another interface of this host. - -\item \verb|ttl N| - ---- set a fixed TTL \verb|N| on tunneled packets. - \verb|N| is a number in the range 1--255. 0 is a special value - meaning that packets inherit the TTL value. - The default value is: \verb|inherit|. - -\item \verb|tos T| or \verb|dsfield T| - ---- set a fixed TOS \verb|T| on tunneled packets. - The default value is: \verb|inherit|. - - - -\item \verb|dev NAME| - ---- bind the tunnel to the device \verb|NAME| so that - tunneled packets will only be routed via this device and will - not be able to escape to another device when the route to endpoint changes. - -\item \verb|nopmtudisc| - ---- disable Path MTU Discovery on this tunnel. - It is enabled by default. Note that a fixed ttl is incompatible - with this option: tunnelling with a fixed ttl always makes pmtu discovery. - -\item \verb|ignore-df| - ---- (only GRE tunnels) enable IPv4 DF flag suppression on this tunnel. - If is disabled by default. Enabling this option will cause IPv4 - payloads to be handled like any other GRE payload, - regardless of the DF flag. - -\item \verb|key K|, \verb|ikey K|, \verb|okey K| - ---- (only GRE tunnels) use keyed GRE with key \verb|K|. \verb|K| is - either a number or an IP address-like dotted quad. - The \verb|key| parameter sets the key to use in both directions. - The \verb|ikey| and \verb|okey| parameters set different keys for input and output. - - -\item \verb|csum|, \verb|icsum|, \verb|ocsum| - ---- (only GRE tunnels) generate/require checksums for tunneled packets. - The \verb|ocsum| flag calculates checksums for outgoing packets. - The \verb|icsum| flag requires that all input packets have the correct - checksum. The \verb|csum| flag is equivalent to the combination - ``\verb|icsum| \verb|ocsum|''. - -\item \verb|seq|, \verb|iseq|, \verb|oseq| - ---- (only GRE tunnels) serialize packets. - The \verb|oseq| flag enables sequencing of outgoing packets. - The \verb|iseq| flag requires that all input packets are serialized. - The \verb|seq| flag is equivalent to the combination ``\verb|iseq| \verb|oseq|''. - -\begin{NB} - I think this option does not - work. At least, I did not test it, did not debug it and - do not even understand how it is supposed to work or for what - purpose Cisco planned to use it. Do not use it. -\end{NB} - - -\end{itemize} - -\paragraph{Example:} Create a pointopoint IPv6 tunnel with maximal TTL of 32. -\begin{verbatim} -netadm@amber:~ # ip tunl add Cisco mode sit remote 192.31.7.104 \ - local 192.203.80.142 ttl 32 -\end{verbatim} - -\subsection{{\tt ip tunnel show} --- list tunnels} - -\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|. - - -\paragraph{Arguments:} None. - -\paragraph{Output format:} -\begin{verbatim} -kuznet@amber:~ $ ip tunl ls Cisco -Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32 -kuznet@amber:~ $ -\end{verbatim} -The line starts with the tunnel device name followed by a colon. -Then the tunnel mode follows. The parameters of the tunnel are listed -with the same keywords that were used when creating the tunnel. - -\paragraph{Statistics:} - -\begin{verbatim} -kuznet@amber:~ $ ip -s tunl ls Cisco -Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32 -RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts - 12566 1707516 0 0 0 0 -TX: Packets Bytes Errors DeadLoop NoRoute NoBufs - 13445 1879677 0 0 0 0 -kuznet@amber:~ $ -\end{verbatim} -Essentially, these numbers are the same as the numbers -printed with {\tt ip -s link show} -(sec.\ref{IP-LINK-SHOW}, p.\pageref{IP-LINK-SHOW}) but the tags are different -to reflect that they are tunnel specific. -\begin{itemize} -\item \verb|CsumErrs| --- the total number of packets dropped -because of checksum failures for a GRE tunnel with checksumming enabled. -\item \verb|OutOfSeq| --- the total number of packets dropped -because they arrived out of sequence for a GRE tunnel with -serialization enabled. -\item \verb|Mcasts| --- the total number of multicast packets -received on a broadcast GRE tunnel. -\item \verb|DeadLoop| --- the total number of packets which were not -transmitted because the tunnel is looped back to itself. -\item \verb|NoRoute| --- the total number of packets which were not -transmitted because there is no IP route to the remote endpoint. -\item \verb|NoBufs| --- the total number of packets which were not -transmitted because the kernel failed to allocate a buffer. -\end{itemize} - - -\section{{\tt ip monitor} and {\tt rtmon} --- state monitoring} -\label{IP-MONITOR} - -The \verb|ip| utility can monitor the state of devices, addresses -and routes continuously. This option has a slightly different format. -Namely, -the \verb|monitor| command is the first in the command line and then -the object list follows: -\begin{verbatim} - ip monitor [ file FILE ] [ all | OBJECT-LIST ] [ label ] -\end{verbatim} -\verb|OBJECT-LIST| is the list of object types that we want to -monitor. It may contain \verb|link|, \verb|address| and \verb|route|. -Specifying \verb|label| indicates that output lines should be labelled -with the type of object being printed --- this happens by default if -\verb|all| is specified. If no \verb|file| argument is given, -\verb|ip| opens RTNETLINK, listens on it and dumps state changes in -the format described in previous sections. - -If a file name is given, it does not listen on RTNETLINK, -but opens the file containing RTNETLINK messages saved in binary format -and dumps them. Such a history file can be generated with the -\verb|rtmon| utility. This utility has a command line syntax similar to -\verb|ip monitor|. -Ideally, \verb|rtmon| should be started before -the first network configuration command is issued. F.e.\ if -you insert: -\begin{verbatim} - rtmon file /var/log/rtmon.log -\end{verbatim} -in a startup script, you will be able to view the full history -later. - -Certainly, it is possible to start \verb|rtmon| at any time. -It prepends the history with the state snapshot dumped at the moment -of starting. - - -\section{Route realms and policy propagation, {\tt rtacct}} -\label{RT-REALMS} - -On routers using OSPF ASE or, especially, the BGP protocol, routing -tables may be huge. If we want to classify or to account for the packets -per route, we will have to keep lots of information. Even worse, if we -want to distinguish the packets not only by their destination, but -also by their source, the task gets quadratic complexity and its solution -is physically impossible. - -One approach to propagating the policy from routing protocols -to the forwarding engine has been proposed in~\cite{IOS-BGP-PP}. -Essentially, Cisco Policy Propagation via BGP is based on the fact -that dedicated routers all have the RIB (Routing Information Base) -close to the forwarding engine, so policy routing rules can -check all the route attributes, including ASPATH information -and community strings. - -The Linux architecture, splitting the RIB (maintained by a user level -daemon) and the kernel based FIB (Forwarding Information Base), -does not allow such a simple approach. - -It is to our fortune because there is another solution -which allows even more flexible policy and richer semantics. - -Namely, routes can be clustered together in user space, based on their -attributes. F.e.\ a BGP router knows route ASPATH, its community; -an OSPF router knows the route tag or its area. The administrator, when adding -routes manually, also knows their nature. Providing that the number of such -aggregates (we call them {\em realms\/}) is low, the task of full -classification both by source and destination becomes quite manageable. - -So each route may be assigned to a realm. It is assumed that -this identification is made by a routing daemon, but static routes -can also be handled manually with \verb|ip route| (see sec.\ref{IP-ROUTE}, -p.\pageref{IP-ROUTE}). -\begin{NB} - There is a patch to \verb|gated|, allowing classification of routes - to realms with all the set of policy rules implemented in \verb|gated|: - by prefix, by ASPATH, by origin, by tag etc. -\end{NB} - -To facilitate the construction (f.e.\ in case the routing -daemon is not aware of realms), missing realms may be completed -with routing policy rules, see sec.~\ref{IP-RULE}, p.\pageref{IP-RULE}. - -For each packet the kernel calculates a tuple of realms: source realm -and destination realm, using the following algorithm: - -\begin{enumerate} -\item If the route has a realm, the destination realm of the packet is set to it. -\item If the rule has a source realm, the source realm of the packet is set to it. -If the destination realm was not inherited from the route and the rule has a destination realm, -it is also set. -\item If at least one of the realms is still unknown, the kernel finds -the reversed route to the source of the packet. -\item If the source realm is still unknown, get it from the reversed route. -\item If one of the realms is still unknown, swap the realms of reversed -routes and apply step 2 again. -\end{enumerate} - -After this procedure is completed we know what realm the packet -arrived from and the realm where it is going to propagate to. -If some of the realms are unknown, they are initialized to zero -(or realm \verb|unknown|). - -The main application of realms is the TC \verb|route| classifier~\cite{TC-CREF}, -where they are used to help assign packets to traffic classes, -to account, police and schedule them according to this -classification. - -A much simpler but still very useful application is incoming packet -accounting by realms. The kernel gathers a packet statistics summary -which can be viewed with the \verb|rtacct| utility. -\begin{verbatim} -kuznet@amber:~ $ rtacct russia -Realm BytesTo PktsTo BytesFrom PktsFrom -russia 20576778 169176 47080168 153805 -kuznet@amber:~ $ -\end{verbatim} -This shows that this router received 153805 packets from -the realm \verb|russia| and forwarded 169176 packets to \verb|russia|. -The realm \verb|russia| consists of routes with ASPATHs not leaving -Russia. - -Note that locally originating packets are not accounted here, -\verb|rtacct| shows incoming packets only. Using the \verb|route| -classifier (see~\cite{TC-CREF}) you can get even more detailed -accounting information about outgoing packets, optionally -summarizing traffic not only by source or destination, but -by any pair of source and destination realms. - - -\begin{thebibliography}{99} -\addcontentsline{toc}{section}{References} -\bibitem{RFC-NDISC} T.~Narten, E.~Nordmark, W.~Simpson. -``Neighbor Discovery for IP Version 6 (IPv6)'', RFC-2461. - -\bibitem{RFC-ADDRCONF} S.~Thomson, T.~Narten. -``IPv6 Stateless Address Autoconfiguration'', RFC-2462. - -\bibitem{RFC1812} F.~Baker. -``Requirements for IP Version 4 Routers'', RFC-1812. - -\bibitem{RFC1122} R.~T.~Braden. -``Requirements for Internet hosts --- communication layers'', RFC-1122. - -\bibitem{IOS} ``Cisco IOS Release 12.0 Network Protocols -Command Reference, Part 1'' and -``Cisco IOS Release 12.0 Quality of Service Solutions -Configuration Guide: Configuring Policy-Based Routing'',\\ -http://www.cisco.com/univercd/cc/td/doc/product/software/ios120. - -\bibitem{IP-TUNNELS} A.~N.~Kuznetsov. -``Tunnels over IP in Linux-2.2'', \\ -In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}. - -\bibitem{TC-CREF} A.~N.~Kuznetsov. ``TC Command Reference'',\\ -In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}. - -\bibitem{IOS-BGP-PP} ``Cisco IOS Release 12.0 Quality of Service Solutions -Configuration Guide: Configuring QoS Policy Propagation via -Border Gateway Protocol'',\\ -http://www.cisco.com/univercd/cc/td/doc/product/software/ios120. - -\bibitem{RFC-DHCP} R.~Droms. -``Dynamic Host Configuration Protocol.'', RFC-2131 - -\bibitem{RFC2414} M.~Allman, S.~Floyd, C.~Partridge. -``Increasing TCP's Initial Window'', RFC-2414. - -\end{thebibliography} - - - - -\appendix -\addcontentsline{toc}{section}{Appendix} - -\section{Source address selection} -\label{ADDR-SEL} - -When a host creates an IP packet, it must select some source -address. Correct source address selection is a critical procedure, -because it gives the receiver the information needed to deliver a -reply. If the source is selected incorrectly, in the best case, -the backward path may appear different to the forward one which -is harmful for performance. In the worst case, when the addresses -are administratively scoped, the reply may be lost entirely. - -Linux-2.2 selects source addresses using the following algorithm: - -\begin{itemize} -\item -The application may select a source address explicitly with \verb|bind(2)| -syscall or supplying it to \verb|sendmsg(2)| via the ancillary data object -\verb|IP_PKTINFO|. In this case the kernel only checks the validity -of the address and never tries to ``improve'' an incorrect user choice, -generating an error instead. -\begin{NB} - Never say ``Never''. The sysctl option \verb|ip_dynaddr| breaks - this axiom. It has been made deliberately with the purpose - of automatically reselecting the address on hosts with dynamic dial-out interfaces. - However, this hack {\em must not\/} be used on multihomed hosts - and especially on routers: it would break them. -\end{NB} - - -\item Otherwise, IP routing tables can contain an explicit source -address hint for this destination. The hint is set with the \verb|src| parameter -to the \verb|ip route| command, sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}. - - -\item Otherwise, the kernel searches through the list of addresses -attached to the interface through which the packets will be routed. -The search strategies are different for IP and IPv6. Namely: - -\begin{itemize} -\item IPv6 searches for the first valid, not deprecated address -with the same scope as the destination. - -\item IP searches for the first valid address with a scope wider -than the scope of the destination but it prefers addresses -which fall to the same subnet as the nexthop of the route -to the destination. Unlike IPv6, the scopes of IPv4 destinations -are not encoded in their addresses but are supplied -in routing tables instead (the \verb|scope| parameter to the \verb|ip route| command, -sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}). - -\end{itemize} - - -\item Otherwise, if the scope of the destination is \verb|link| or \verb|host|, -the algorithm fails and returns a zero source address. - -\item Otherwise, all interfaces are scanned to search for an address -with an appropriate scope. The loopback device \verb|lo| is always the first -in the search list, so that if an address with global scope (not 127.0.0.1!) -is configured on loopback, it is always preferred. - -\end{itemize} - - -\section{Proxy ARP/NDISC} -\label{PROXY-NEIGH} - -Routers may answer ARP/NDISC solicitations on behalf of other hosts. -In Linux-2.2 proxy ARP on an interface may be enabled -by setting the kernel \verb|sysctl| variable -\verb|/proc/sys/net/ipv4/conf/<dev>/proxy_arp| to 1. After this, the router -starts to answer ARP requests on the interface \verb|<dev>|, provided -the route to the requested destination does {\em not\/} go back via the same -device. - -The variable \verb|/proc/sys/net/ipv4/conf/all/proxy_arp| enables proxy -ARP on all the IP devices. - -However, this approach fails in the case of IPv6 because the router -must join the solicited node multicast address to listen for the corresponding -NDISC queries. It means that proxy NDISC is possible only on a per destination -basis. - -Logically, proxy ARP/NDISC is not a kernel task. It can easily be implemented -in user space. However, similar functionality was present in BSD kernels -and in Linux-2.0, so we have to preserve it at least to the extent that -is standardized in BSD. -\begin{NB} - Linux-2.0 ARP had a feature called {\em subnet\/} proxy ARP. - It is replaced with the sysctl flag in Linux-2.2. -\end{NB} - - -The \verb|ip| utility provides a way to manage proxy ARP/NDISC -with the \verb|ip neigh| command, namely: -\begin{verbatim} - ip neigh add proxy ADDRESS [ dev NAME ] -\end{verbatim} -adds a new proxy ARP/NDISC record and -\begin{verbatim} - ip neigh del proxy ADDRESS [ dev NAME ] -\end{verbatim} -deletes it. - -If the name of the device is not given, the router will answer solicitations -for address \verb|ADDRESS| on all devices, otherwise it will only serve -the device \verb|NAME|. Even if the proxy entry is created with -\verb|ip neigh|, the router {\em will not\/} answer a query if the route -to the destination goes back via the interface from which the solicitation -was received. - -It is important to emphasize that proxy entries have {\em no\/} -parameters other than these (IP/IPv6 address and optional device). -Particularly, the entry does not store any link layer address. -It always advertises the station address of the interface -on which it sends advertisements (i.e. it's own station address). - -\section{Route NAT status} -\label{ROUTE-NAT} - -NAT (or ``Network Address Translation'') remaps some parts -of the IP address space into other ones. Linux-2.2 route NAT is supposed -to be used to facilitate policy routing by rewriting addresses -to other routing domains or to help while renumbering sites -to another prefix. - -\paragraph{What it is not:} -It is necessary to emphasize that {\em it is not supposed\/} -to be used to compress address space or to split load. -This is not missing functionality but a design principle. -Route NAT is {\em stateless\/}. It does not hold any state -about translated sessions. This means that it handles any number -of sessions flawlessly. But it also means that it is {\em static\/}. -It cannot detect the moment when the last TCP client stops -using an address. For the same reason, it will not help to split -load between several servers. -\begin{NB} -It is a pretty commonly held belief that it is useful to split load between -several servers with NAT. This is a mistake. All you get from this -is the requirement that the router keep the state of all the TCP connections -going via it. Well, if the router is so powerful, run apache on it. 8) -\end{NB} - -The second feature: it does not touch packet payload, -does not try to ``improve'' broken protocols by looking -through its data and mangling it. It mangles IP addresses, -only IP addresses and nothing but IP addresses. -This also, is not missing any functionality. - -To resume: if you need to compress address space or keep -active FTP clients happy, your choice is not route NAT but masquerading, -port forwarding, NAPT etc. -\begin{NB} -By the way, you may also want to look at -http://www.suse.com/\~mha/HyperNews/get/linux-ip-nat.html -\end{NB} - - -\paragraph{How it works.} -Some part of the address space is reserved for dummy addresses -which will look for all the world like some host addresses -inside your network. No other hosts may use these addresses, -however other routers may also be configured to translate them. -\begin{NB} -A great advantage of route NAT is that it may be used not -only in stub networks but in environments with arbitrarily complicated -structure. It does not firewall, it {\em forwards.} -\end{NB} -These addresses are selected by the \verb|ip route| command -(sec.\ref{IP-ROUTE-ADD}, p.\pageref{IP-ROUTE-ADD}). F.e.\ -\begin{verbatim} - ip route add nat 192.203.80.144 via 193.233.7.83 -\end{verbatim} -states that the single address 192.203.80.144 is a dummy NAT address. -For all the world it looks like a host address inside our network. -For neighbouring hosts and routers it looks like the local address -of the translating router. The router answers ARP for it, advertises -this address as routed via it, {\em et al\/}. When the router -receives a packet destined for 192.203.80.144, it replaces -this address with 193.233.7.83 which is the address of some real -host and forwards the packet. If you need to remap -blocks of addresses, you may use a command like: -\begin{verbatim} - ip route add nat 192.203.80.192/26 via 193.233.7.64 -\end{verbatim} -This command will map a block of 63 addresses 192.203.80.192-255 to -193.233.7.64-127. - -When an internal host (193.233.7.83 in the example above) -sends something to the outer world and these packets are forwarded -by our router, it should translate the source address 193.233.7.83 -into 192.203.80.144. This task is solved by setting a special -policy rule (sec.\ref{IP-RULE-ADD}, p.\pageref{IP-RULE-ADD}): -\begin{verbatim} - ip rule add prio 320 from 193.233.7.83 nat 192.203.80.144 -\end{verbatim} -This rule says that the source address 193.233.7.83 -should be translated into 192.203.80.144 before forwarding. -It is important that the address after the \verb|nat| keyword -is some NAT address, declared by {\tt ip route add nat}. -If it is just a random address the router will not map to it. -\begin{NB} -The exception is when the address is a local address of this -router (or 0.0.0.0) and masquerading is configured in the linux-2.2 -kernel. In this case the router will masquerade the packets as this address. -If 0.0.0.0 is selected, the result is equivalent to one -obtained with firewalling rules. Otherwise, you have the way -to order Linux to masquerade to this fixed address. -NAT mechanism used in linux-2.4 is more flexible than -masquerading, so that this feature has lost meaning and disabled. -\end{NB} - -If the network has non-trivial internal structure, it is -useful and even necessary to add rules disabling translation -when a packet does not leave this network. Let us return to the -example from sec.\ref{IP-RULE-SHOW} (p.\pageref{IP-RULE-SHOW}). -\begin{verbatim} -300: from 193.233.7.83 to 193.233.7.0/24 lookup main -310: from 193.233.7.83 to 192.203.80.0/24 lookup main -320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144 -\end{verbatim} -This block of rules causes normal forwarding when -packets from 193.233.7.83 do not leave networks 193.233.7/24 -and 192.203.80/24. Also, if the \verb|inr.ruhep| table does not -contain a route to the destination (which means that the routing -domain owning addresses from 192.203.80/24 is dead), no translation -will occur. Otherwise, the packets are translated. - -\paragraph{How to only translate selected ports:} -If you only want to translate selected ports (f.e.\ http) -and leave the rest intact, you may use \verb|ipchains| -to \verb|fwmark| a class of packets. -Suppose you did and all the packets from 193.233.7.83 -destined for port 80 are marked with marker 0x1234 in input fwchain. -In this case you may replace rule \#320 with: -\begin{verbatim} -320: from 193.233.7.83 fwmark 1234 lookup main map-to 192.203.80.144 -\end{verbatim} -and translation will only be enabled for outgoing http requests. - -\section{Example: minimal host setup} -\label{EXAMPLE-SETUP} - -The following script gives an example of a fault safe -setup of IP (and IPv6, if it is compiled into the kernel) -in the common case of a node attached to a single broadcast -network. A more advanced script, which may be used both on multihomed -hosts and on routers, is described in the following -section. - -The utilities used in the script may be found in the -directory ftp://ftp.inr.ac.ru/ip-routing/: -\begin{enumerate} -\item \verb|ip| --- package \verb|iproute2|. -\item \verb|arping| --- package \verb|iputils|. -\item \verb|rdisc| --- package \verb|iputils|. -\end{enumerate} -\begin{NB} -It also refers to a DHCP client, \verb|dhcpcd|. I should refrain from -recommending a good DHCP client to use. All that I can -say is that ISC \verb|dhcp-2.0b1pl6| patched with the patch that -can be found in the \verb|dhcp.bootp.rarp| subdirectory of -the same ftp site {\em does\/} work, -at least on Ethernet and Token Ring. -\end{NB} - -\begin{verbatim} -#! /bin/bash -\end{verbatim} -\begin{flushleft} -\# {\bf Usage: \verb|ifone ADDRESS[/PREFIX-LENGTH] [DEVICE]|}\\ -\# {\bf Parameters:}\\ -\# \$1 --- Static IP address, optionally followed by prefix length.\\ -\# \$2 --- Device name. If it is missing, \verb|eth0| is asssumed.\\ -\# F.e. \verb|ifone 193.233.7.90| -\end{flushleft} -\begin{verbatim} -dev=$2 -: ${dev:=eth0} -ipaddr= -\end{verbatim} -\# Parse IP address, splitting prefix length. -\begin{verbatim} -if [ "$1" != "" ]; then - ipaddr=${1%/*} - if [ "$1" != "$ipaddr" ]; then - pfxlen=${1#*/} - fi - : ${pfxlen:=24} -fi -pfx="${ipaddr}/${pfxlen}" -\end{verbatim} - -\begin{flushleft} -\# {\bf Step 0} --- enable loopback.\\ -\#\\ -\# This step is necessary on any networked box before attempt\\ -\# to configure any other device.\\ -\end{flushleft} -\begin{verbatim} -ip link set up dev lo -ip addr add 127.0.0.1/8 dev lo brd + scope host -\end{verbatim} -\begin{flushleft} -\# IPv6 autoconfigure themself on loopback.\\ -\#\\ -\# If user gave loopback as device, we add the address as alias and exit. -\end{flushleft} -\begin{verbatim} -if [ "$dev" = "lo" ]; then - if [ "$ipaddr" != "" -a "$ipaddr" != "127.0.0.1" ]; then - ip address add $ipaddr dev $dev - exit $? - fi - exit 0 -fi -\end{verbatim} - -\noindent\# {\bf Step 1} --- enable device \verb|$dev| - -\begin{verbatim} -if ! ip link set up dev $dev ; then - echo "Cannot enable interface $dev. Aborting." 1>&2 - exit 1 -fi -\end{verbatim} -\begin{flushleft} -\# The interface is \verb|UP|. IPv6 started stateless autoconfiguration itself,\\ -\# and its configuration finishes here. However,\\ -\# IP still needs some static preconfigured address. -\end{flushleft} -\begin{verbatim} -if [ "$ipaddr" = "" ]; then - echo "No address for $dev is configured, trying DHCP..." 1>&2 - dhcpcd - exit $? -fi -\end{verbatim} - -\begin{flushleft} -\# {\bf Step 2} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\ -\# Send two probes and wait for result for 3 seconds.\\ -\# If the interface opens slower f.e.\ due to long media detection,\\ -\# you want to increase the timeout.\\ -\end{flushleft} -\begin{verbatim} -if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then - echo "Address $ipaddr is busy, trying DHCP..." 1>&2 - dhcpcd - exit $? -fi -\end{verbatim} -\begin{flushleft} -\# OK, the address is unique, we may add it on the interface.\\ -\#\\ -\# {\bf Step 3} --- Configure the address on the interface. -\end{flushleft} - -\begin{verbatim} -if ! ip address add $pfx brd + dev $dev; then - echo "Failed to add $pfx on $dev, trying DHCP..." 1>&2 - dhcpcd - exit $? -fi -\end{verbatim} - -\noindent\# {\bf Step 4} --- Announce our presence on the link. -\begin{verbatim} -arping -A -c 1 -I $dev $ipaddr -noarp=$? -( sleep 2; - arping -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null & -\end{verbatim} - -\begin{flushleft} -\# {\bf Step 5} (optional) --- Add some control routes.\\ -\#\\ -\# 1. Prohibit link local multicast addresses.\\ -\# 2. Prohibit link local (alias, limited) broadcast.\\ -\# 3. Add default multicast route. -\end{flushleft} -\begin{verbatim} -ip route add unreachable 224.0.0.0/24 -ip route add unreachable 255.255.255.255 -if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then - ip route add 224.0.0.0/4 dev $dev scope global -fi -\end{verbatim} - -\begin{flushleft} -\# {\bf Step 6} --- Add fallback default route with huge metric.\\ -\# If a proxy ARP server is present on the interface, we will be\\ -\# able to talk to all the Internet without further configuration.\\ -\# It is not so cheap though and we still hope that this route\\ -\# will be overridden by more correct one by rdisc.\\ -\# Do not make this step if the device is not ARPable,\\ -\# because dead nexthop detection does not work on them. -\end{flushleft} -\begin{verbatim} -if [ "$noarp" = "0" ]; then - ip ro add default dev $dev metric 30000 scope global -fi -\end{verbatim} - -\begin{flushleft} -\# {\bf Step 7} --- Restart router discovery and exit. -\end{flushleft} -\begin{verbatim} -killall -HUP rdisc || rdisc -fs -exit 0 -\end{verbatim} - - -\section{Example: {\protect\tt ifcfg} --- interface address management} -\label{EXAMPLE-IFCFG} - -This is a simplistic script replacing one option of \verb|ifconfig|, -namely, IP address management. It not only adds -addresses, but also carries out Duplicate Address Detection~\cite{RFC-DHCP}, -sends unsolicited ARP to update the caches of other hosts sharing -the interface, adds some control routes and restarts Router Discovery -when it is necessary. - -I strongly recommend using it {\em instead\/} of \verb|ifconfig| both -on hosts and on routers. - -\begin{verbatim} -#! /bin/bash -\end{verbatim} -\begin{flushleft} -\# {\bf Usage: \verb?ifcfg DEVICE[:ALIAS] [add|del] ADDRESS[/LENGTH] [PEER]?}\\ -\# {\bf Parameters:}\\ -\# ---Device name. It may have alias suffix, separated by colon.\\ -\# ---Command: add, delete or stop.\\ -\# ---IP address, optionally followed by prefix length.\\ -\# ---Optional peer address for pointopoint interfaces.\\ -\# F.e. \verb|ifcfg eth0 193.233.7.90/24| - -\noindent\# This function determines, whether it is router or host.\\ -\# It returns 0, if the host is apparently not router. -\end{flushleft} -\begin{verbatim} -CheckForwarding () { - local sbase fwd - sbase=/proc/sys/net/ipv4/conf - fwd=0 - if [ -d $sbase ]; then - for dir in $sbase/*/forwarding; do - fwd=$[$fwd + `cat $dir`] - done - else - fwd=2 - fi - return $fwd -} -\end{verbatim} -\begin{flushleft} -\# This function restarts Router Discovery.\\ -\end{flushleft} -\begin{verbatim} -RestartRDISC () { - killall -HUP rdisc || rdisc -fs -} -\end{verbatim} -\begin{flushleft} -\# Calculate ABC "natural" mask length\\ -\# Arg: \$1 = dotquad address -\end{flushleft} -\begin{verbatim} -ABCMaskLen () { - local class; - class=${1%%.*} - if [ $class -eq 0 -o $class -ge 224 ]; then return 0 - elif [ $class -ge 192 ]; then return 24 - elif [ $class -ge 128 ]; then return 16 - else return 8 ; fi -} -\end{verbatim} - - -\begin{flushleft} -\# {\bf MAIN()}\\ -\#\\ -\# Strip alias suffix separated by colon. -\end{flushleft} -\begin{verbatim} -label="label $1" -ldev=$1 -dev=${1%:*} -if [ "$dev" = "" -o "$1" = "help" ]; then - echo "Usage: ifcfg DEV [[add|del [ADDR[/LEN]] [PEER] | stop]" 1>&2 - echo " add - add new address" 1>&2 - echo " del - delete address" 1>&2 - echo " stop - completely disable IP" 1>&2 - exit 1 -fi -shift - -CheckForwarding -fwd=$? -\end{verbatim} -\begin{flushleft} -\# Parse command. If it is ``stop'', flush and exit. -\end{flushleft} -\begin{verbatim} -deleting=0 -case "$1" in -add) shift ;; -stop) - if [ "$ldev" != "$dev" ]; then - echo "Cannot stop alias $ldev" 1>&2 - exit 1; - fi - ip -4 addr flush dev $dev $label || exit 1 - if [ $fwd -eq 0 ]; then RestartRDISC; fi - exit 0 ;; -del*) - deleting=1; shift ;; -*) -esac -\end{verbatim} -\begin{flushleft} -\# Parse prefix, split prefix length, separated by slash. -\end{flushleft} -\begin{verbatim} -ipaddr= -pfxlen= -if [ "$1" != "" ]; then - ipaddr=${1%/*} - if [ "$1" != "$ipaddr" ]; then - pfxlen=${1#*/} - fi - if [ "$ipaddr" = "" ]; then - echo "$1 is bad IP address." 1>&2 - exit 1 - fi -fi -shift -\end{verbatim} -\begin{flushleft} -\# If peer address is present, prefix length is 32.\\ -\# Otherwise, if prefix length was not given, guess it. -\end{flushleft} -\begin{verbatim} -peer=$1 -if [ "$peer" != "" ]; then - if [ "$pfxlen" != "" -a "$pfxlen" != "32" ]; then - echo "Peer address with non-trivial netmask." 1>&2 - exit 1 - fi - pfx="$ipaddr peer $peer" -else - if [ "$pfxlen" = "" ]; then - ABCMaskLen $ipaddr - pfxlen=$? - fi - pfx="$ipaddr/$pfxlen" -fi -if [ "$ldev" = "$dev" -a "$ipaddr" != "" ]; then - label= -fi -\end{verbatim} -\begin{flushleft} -\# If deletion was requested, delete the address and restart RDISC -\end{flushleft} -\begin{verbatim} -if [ $deleting -ne 0 ]; then - ip addr del $pfx dev $dev $label || exit 1 - if [ $fwd -eq 0 ]; then RestartRDISC; fi - exit 0 -fi -\end{verbatim} -\begin{flushleft} -\# Start interface initialization.\\ -\#\\ -\# {\bf Step 0} --- enable device \verb|$dev| -\end{flushleft} -\begin{verbatim} -if ! ip link set up dev $dev ; then - echo "Error: cannot enable interface $dev." 1>&2 - exit 1 -fi -if [ "$ipaddr" = "" ]; then exit 0; fi -\end{verbatim} -\begin{flushleft} -\# {\bf Step 1} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\ -\# Send two probes and wait for result for 3 seconds.\\ -\# If the interface opens slower f.e.\ due to long media detection,\\ -\# you want to increase the timeout.\\ -\end{flushleft} -\begin{verbatim} -if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then - echo "Error: some host already uses address $ipaddr on $dev." 1>&2 - exit 1 -fi -\end{verbatim} -\begin{flushleft} -\# OK, the address is unique. We may add it to the interface.\\ -\#\\ -\# {\bf Step 2} --- Configure the address on the interface. -\end{flushleft} -\begin{verbatim} -if ! ip address add $pfx brd + dev $dev $label; then - echo "Error: failed to add $pfx on $dev." 1>&2 - exit 1 -fi -\end{verbatim} -\noindent\# {\bf Step 3} --- Announce our presence on the link -\begin{verbatim} -arping -q -A -c 1 -I $dev $ipaddr -noarp=$? -( sleep 2 ; - arping -q -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null & -\end{verbatim} -\begin{flushleft} -\# {\bf Step 4} (optional) --- Add some control routes.\\ -\#\\ -\# 1. Prohibit link local multicast addresses.\\ -\# 2. Prohibit link local (alias, limited) broadcast.\\ -\# 3. Add default multicast route. -\end{flushleft} -\begin{verbatim} -ip route add unreachable 224.0.0.0/24 >& /dev/null -ip route add unreachable 255.255.255.255 >& /dev/null -if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then - ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null -fi -\end{verbatim} -\begin{flushleft} -\# {\bf Step 5} --- Add fallback default route with huge metric.\\ -\# If a proxy ARP server is present on the interface, we will be\\ -\# able to talk to all the Internet without further configuration.\\ -\# Do not make this step on router or if the device is not ARPable.\\ -\# because dead nexthop detection does not work on them. -\end{flushleft} -\begin{verbatim} -if [ $fwd -eq 0 ]; then - if [ $noarp -eq 0 ]; then - ip ro append default dev $dev metric 30000 scope global - elif [ "$peer" != "" ]; then - if ping -q -c 2 -w 4 $peer ; then - ip ro append default via $peer dev $dev metric 30001 - fi - fi - RestartRDISC -fi - -exit 0 -\end{verbatim} -\begin{flushleft} -\# End of {\bf MAIN()} -\end{flushleft} - - -\end{document} diff --git a/doc/preamble.tex b/doc/preamble.tex deleted file mode 100644 index 80ca5087..00000000 --- a/doc/preamble.tex +++ /dev/null @@ -1,26 +0,0 @@ -\textwidth 6.0in -\textheight 8.5in - -\input SNAPSHOT - -\pagestyle{myheadings} -\markboth{\protect\TITLE}{} -\markright{{\protect\sc iproute2-ss\Draft}} - -% To print it in compact form: both sides on one sheet (psnup -2) -\evensidemargin=\oddsidemargin - -\newenvironment{NB}{\bgroup \vskip 1mm\leftskip 1cm \footnotesize \noindent NB. -}{\par\egroup \vskip 1mm} - -\def\threeonly{[2.3.15+ only] } - -\begin{document} - -\makeatletter -\renewcommand{\@oddhead}{{\protect\sc iproute2-ss\Draft} \hfill \protect\arabic{page}} -\makeatother -\let\oldthefootnote\thefootnote -\def\thefootnote{} -\footnotetext{Copyright \copyright~1999 A.N.Kuznetsov} - From e4139268ba9608fff5915a9852c72f7ef69690a3 Mon Sep 17 00:00:00 2001 From: Phil Sutter <phil@nwl.cc> Date: Thu, 28 Sep 2017 19:33:56 +0200 Subject: [PATCH 10/28] ip-route: Fix for listing routes with RTAX_LOCK attribute This fixes a corner-case for routes with a certain metric locked to zero: | ip route add 192.168.7.0/24 dev eth0 window 0 | ip route add 192.168.7.0/24 dev eth0 window lock 0 Since the kernel doesn't dump the attribute if it is zero, both routes added above would appear as if they were equal although they are not. Fix this by taking mxlock value for the given metric into account before skipping it if it is not present. Reported-by: Thomas Haller <thaller@redhat.com> Signed-off-by: Phil Sutter <phil@nwl.cc> --- ip/iproute.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ip/iproute.c b/ip/iproute.c index a8733f45..e81bc05e 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -574,10 +574,10 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) for (i = 2; i <= RTAX_MAX; i++) { __u32 val = 0U; - if (mxrta[i] == NULL) + if (mxrta[i] == NULL && !(mxlock & (1 << i))) continue; - if (i != RTAX_CC_ALGO) + if (mxrta[i] != NULL && i != RTAX_CC_ALGO) val = rta_getattr_u32(mxrta[i]); if (i == RTAX_HOPLIMIT && (int)val == -1) From 73451259daaa84185bd151461252590ba67cdee0 Mon Sep 17 00:00:00 2001 From: Yulia Kartseva <hex@fb.com> Date: Sat, 30 Sep 2017 20:18:40 -0700 Subject: [PATCH 11/28] tc: fix ipv6 filter selector attribute for some prefix lengths Wrong TCA_U32_SEL attribute packing if prefixLen AND 0x1f equals 0x1f. These are /31, /63, /95 and /127 prefix lengths. Example: ip6 dst face:b00f::/31 filter parent b: protocol ipv6 pref 2307 u32 filter parent b: protocol ipv6 pref 2307 u32 fh 800: ht divisor 1 filter parent b: protocol ipv6 pref 2307 u32 fh 800::800 order 2048 key ht 800 bkt 0 match faceb00f/ffffffff at 24 v2: previous patch was made with a wrong repo Signed-off-by: Yulia Kartseva <hex@fb.com> --- tc/f_u32.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tc/f_u32.c b/tc/f_u32.c index 5815be9c..14b95889 100644 --- a/tc/f_u32.c +++ b/tc/f_u32.c @@ -385,8 +385,7 @@ static int parse_ip6_addr(int *argc_p, char ***argv_p, plen = addr.bitlen; for (i = 0; i < plen; i += 32) { - /* if (((i + 31) & ~0x1F) <= plen) { */ - if (i + 31 <= plen) { + if (i + 31 < plen) { res = pack_key(sel, addr.data[i / 32], 0xFFFFFFFF, off + 4 * (i / 32), offmask); if (res < 0) From 4c0939a29e2c1739f0141c87ecd7940825734a22 Mon Sep 17 00:00:00 2001 From: Michal Kubecek <mkubecek@suse.cz> Date: Fri, 29 Sep 2017 13:41:05 +0200 Subject: [PATCH 12/28] ip xfrm: use correct key length for netlink message When SA is added manually using "ip xfrm state add", xfrm_state_modify() uses alg_key_len field of struct xfrm_algo for the length of key passed to kernel in the netlink message. However alg_key_len is bit length of the key while we need byte length here. This is usually harmless as kernel ignores the excess data but when the bit length of the key exceeds 512 (XFRM_ALGO_KEY_BUF_SIZE), it can result in buffer overflow. We can simply divide by 8 here as the only place setting alg_key_len is in xfrm_algo_parse() where it is always set to a multiple of 8 (and there are already multiple places using "algo->alg_key_len / 8"). Signed-off-by: Michal Kubecek <mkubecek@suse.cz> --- ip/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index 4483fb8f..99fdec23 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -539,7 +539,7 @@ static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv) xfrm_algo_parse((void *)&alg, type, name, key, buf, sizeof(alg.buf)); - len += alg.u.alg.alg_key_len; + len += alg.u.alg.alg_key_len / 8; addattr_l(&req.n, sizeof(req.buf), type, (void *)&alg, len); From 26111ab1dba820421ccaf283ac097a79b95023a2 Mon Sep 17 00:00:00 2001 From: Phil Sutter <phil@nwl.cc> Date: Mon, 2 Oct 2017 13:46:35 +0200 Subject: [PATCH 13/28] ip{6, }tunnel: Avoid copying user-supplied interface name around In both files' parse_args() functions as well as in iptunnel's do_prl() and do_6rd() functions, a user-supplied 'dev' parameter is uselessly copied into a temporary buffer before passing it to ll_name_to_index() or copying into a struct ifreq. Avoid this by just caching the argv pointer value until the later lookup/strcpy. Signed-off-by: Phil Sutter <phil@nwl.cc> --- ip/ip6tunnel.c | 6 +++--- ip/iptunnel.c | 22 +++++++++------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c index b4a7def1..c12d700e 100644 --- a/ip/ip6tunnel.c +++ b/ip/ip6tunnel.c @@ -136,7 +136,7 @@ static void print_tunnel(struct ip6_tnl_parm2 *p) static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) { int count = 0; - char medium[IFNAMSIZ] = {}; + const char *medium = NULL; while (argc > 0) { if (strcmp(*argv, "mode") == 0) { @@ -180,7 +180,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) memcpy(&p->laddr, &laddr.data, sizeof(p->laddr)); } else if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); - strncpy(medium, *argv, IFNAMSIZ - 1); + medium = *argv; } else if (strcmp(*argv, "encaplimit") == 0) { NEXT_ARG(); if (strcmp(*argv, "none") == 0) { @@ -285,7 +285,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) count++; argc--; argv++; } - if (medium[0]) { + if (medium) { p->link = ll_name_to_index(medium); if (p->link == 0) { fprintf(stderr, "Cannot find device \"%s\"\n", medium); diff --git a/ip/iptunnel.c b/ip/iptunnel.c index 105d0f55..0acfd079 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -60,7 +60,7 @@ static void set_tunnel_proto(struct ip_tunnel_parm *p, int proto) static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) { int count = 0; - char medium[IFNAMSIZ] = {}; + const char *medium = NULL; int isatap = 0; memset(p, 0, sizeof(*p)); @@ -139,7 +139,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) p->iph.saddr = htonl(INADDR_ANY); } else if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); - strncpy(medium, *argv, IFNAMSIZ - 1); + medium = *argv; } else if (strcmp(*argv, "ttl") == 0 || strcmp(*argv, "hoplimit") == 0 || strcmp(*argv, "hlim") == 0) { @@ -216,7 +216,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) } } - if (medium[0]) { + if (medium) { p->link = ll_name_to_index(medium); if (p->link == 0) { fprintf(stderr, "Cannot find device \"%s\"\n", medium); @@ -465,9 +465,8 @@ static int do_prl(int argc, char **argv) { struct ip_tunnel_prl p = {}; int count = 0; - int devname = 0; int cmd = 0; - char medium[IFNAMSIZ] = {}; + const char *medium = NULL; while (argc > 0) { if (strcmp(*argv, "prl-default") == 0) { @@ -488,8 +487,7 @@ static int do_prl(int argc, char **argv) count++; } else if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); - strncpy(medium, *argv, IFNAMSIZ-1); - devname++; + medium = *argv; } else { fprintf(stderr, "Invalid PRL parameter \"%s\"\n", *argv); @@ -502,7 +500,7 @@ static int do_prl(int argc, char **argv) } argc--; argv++; } - if (devname == 0) { + if (!medium) { fprintf(stderr, "Must specify device\n"); exit(-1); } @@ -513,9 +511,8 @@ static int do_prl(int argc, char **argv) static int do_6rd(int argc, char **argv) { struct ip_tunnel_6rd ip6rd = {}; - int devname = 0; int cmd = 0; - char medium[IFNAMSIZ] = {}; + const char *medium = NULL; inet_prefix prefix; while (argc > 0) { @@ -537,8 +534,7 @@ static int do_6rd(int argc, char **argv) cmd = SIOCDEL6RD; } else if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); - strncpy(medium, *argv, IFNAMSIZ-1); - devname++; + medium = *argv; } else { fprintf(stderr, "Invalid 6RD parameter \"%s\"\n", *argv); @@ -546,7 +542,7 @@ static int do_6rd(int argc, char **argv) } argc--; argv++; } - if (devname == 0) { + if (!medium) { fprintf(stderr, "Must specify device\n"); exit(-1); } From ee474849c85116ec36e387882447f737ac3fdefb Mon Sep 17 00:00:00 2001 From: Phil Sutter <phil@nwl.cc> Date: Mon, 2 Oct 2017 13:46:36 +0200 Subject: [PATCH 14/28] tc: flower: No need to cache indev arg Since addattrstrz() will copy the provided string into the attribute payload, there is no need to cache the data. Signed-off-by: Phil Sutter <phil@nwl.cc> --- tc/f_flower.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tc/f_flower.c b/tc/f_flower.c index 934832e2..99e62a38 100644 --- a/tc/f_flower.c +++ b/tc/f_flower.c @@ -629,11 +629,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, } else if (matches(*argv, "skip_sw") == 0) { flags |= TCA_CLS_FLAGS_SKIP_SW; } else if (matches(*argv, "indev") == 0) { - char ifname[IFNAMSIZ] = {}; - NEXT_ARG(); - strncpy(ifname, *argv, sizeof(ifname) - 1); - addattrstrz(n, MAX_MSG, TCA_FLOWER_INDEV, ifname); + addattrstrz(n, MAX_MSG, TCA_FLOWER_INDEV, *argv); } else if (matches(*argv, "vlan_id") == 0) { __u16 vid; From 625df645b703dc858d54784c35beff64464afae2 Mon Sep 17 00:00:00 2001 From: Phil Sutter <phil@nwl.cc> Date: Mon, 2 Oct 2017 13:46:37 +0200 Subject: [PATCH 15/28] Check user supplied interface name lengths The original problem was that something like: | strncpy(ifr.ifr_name, *argv, IFNAMSIZ); might leave ifr.ifr_name unterminated if length of *argv exceeds IFNAMSIZ. In order to fix this, I thought about replacing all those cases with (equivalent) calls to snprintf() or even introducing strlcpy(). But as Ulrich Drepper correctly pointed out when rejecting the latter from being added to glibc, truncating a string without notifying the user is not to be considered good practice. So let's excercise what he suggested and reject empty, overlong or otherwise invalid interface names right from the start - this way calls to strncpy() like shown above become safe and the user has a chance to reconsider what he was trying to do. Note that this doesn't add calls to check_ifname() to all places where user supplied interface name is parsed. In many cases, the interface must exist already and is therefore looked up using ll_name_to_index(), so if_nametoindex() will perform the necessary checks already. Signed-off-by: Phil Sutter <phil@nwl.cc> --- include/utils.h | 2 ++ ip/ip6tunnel.c | 3 ++- ip/ipl2tp.c | 4 +++- ip/iplink.c | 31 ++++++++++++------------------- ip/ipmaddr.c | 3 ++- ip/iprule.c | 10 ++++++++-- ip/iptunnel.c | 7 ++++++- ip/iptuntap.c | 6 ++++-- lib/utils.c | 29 +++++++++++++++++++++++++++++ misc/arpd.c | 3 ++- tc/f_flower.c | 2 ++ 11 files changed, 72 insertions(+), 28 deletions(-) diff --git a/include/utils.h b/include/utils.h index c9ed230b..76addb32 100644 --- a/include/utils.h +++ b/include/utils.h @@ -133,6 +133,8 @@ void missarg(const char *) __attribute__((noreturn)); void invarg(const char *, const char *) __attribute__((noreturn)); void duparg(const char *, const char *) __attribute__((noreturn)); void duparg2(const char *, const char *) __attribute__((noreturn)); +int check_ifname(const char *); +int get_ifname(char *, const char *); int matches(const char *arg, const char *pattern); int inet_addr_match(const inet_prefix *a, const inet_prefix *b, int bits); diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c index c12d700e..bc44bef7 100644 --- a/ip/ip6tunnel.c +++ b/ip/ip6tunnel.c @@ -273,7 +273,8 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p) usage(); if (p->name[0]) duparg2("name", *argv); - strncpy(p->name, *argv, IFNAMSIZ - 1); + if (get_ifname(p->name, *argv)) + invarg("\"name\" not a valid ifname", *argv); if (cmd == SIOCCHGTUNNEL && count == 0) { struct ip6_tnl_parm2 old_p = {}; diff --git a/ip/ipl2tp.c b/ip/ipl2tp.c index 88664c90..1e37b175 100644 --- a/ip/ipl2tp.c +++ b/ip/ipl2tp.c @@ -182,7 +182,7 @@ static int create_session(struct l2tp_parm *p) if (p->peer_cookie_len) addattr_l(&req.n, 1024, L2TP_ATTR_PEER_COOKIE, p->peer_cookie, p->peer_cookie_len); - if (p->ifname && p->ifname[0]) + if (p->ifname) addattrstrz(&req.n, 1024, L2TP_ATTR_IFNAME, p->ifname); if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0) @@ -545,6 +545,8 @@ static int parse_args(int argc, char **argv, int cmd, struct l2tp_parm *p) } } else if (strcmp(*argv, "name") == 0) { NEXT_ARG(); + if (check_ifname(*argv)) + invarg("\"name\" not a valid ifname", *argv); p->ifname = *argv; } else if (strcmp(*argv, "remote") == 0) { NEXT_ARG(); diff --git a/ip/iplink.c b/ip/iplink.c index ff5b56c0..6a96ea9f 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -573,6 +573,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, req->i.ifi_flags &= ~IFF_UP; } else if (strcmp(*argv, "name") == 0) { NEXT_ARG(); + if (check_ifname(*argv)) + invarg("\"name\" not a valid ifname", *argv); *name = *argv; } else if (strcmp(*argv, "index") == 0) { NEXT_ARG(); @@ -848,6 +850,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, NEXT_ARG(); if (*dev) duparg2("dev", *argv); + if (check_ifname(*argv)) + invarg("\"dev\" not a valid ifname", *argv); *dev = *argv; dev_index = ll_name_to_index(*dev); } @@ -870,7 +874,6 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) { - int len; char *dev = NULL; char *name = NULL; char *link = NULL; @@ -960,13 +963,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) } if (name) { - len = strlen(name) + 1; - if (len == 1) - invarg("\"\" is not a valid device identifier\n", - "name"); - if (len > IFNAMSIZ) - invarg("\"name\" too long\n", name); - addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, len); + addattr_l(&req.n, sizeof(req), + IFLA_IFNAME, name, strlen(name) + 1); } if (type) { @@ -1016,7 +1014,6 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) int iplink_get(unsigned int flags, char *name, __u32 filt_mask) { - int len; struct iplink_req req = { .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), .n.nlmsg_flags = NLM_F_REQUEST | flags, @@ -1029,13 +1026,8 @@ int iplink_get(unsigned int flags, char *name, __u32 filt_mask) } answer; if (name) { - len = strlen(name) + 1; - if (len == 1) - invarg("\"\" is not a valid device identifier\n", - "name"); - if (len > IFNAMSIZ) - invarg("\"name\" too long\n", name); - addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, len); + addattr_l(&req.n, sizeof(req), + IFLA_IFNAME, name, strlen(name) + 1); } addattr32(&req.n, sizeof(req), IFLA_EXT_MASK, filt_mask); @@ -1265,6 +1257,8 @@ static int do_set(int argc, char **argv) flags &= ~IFF_UP; } else if (strcmp(*argv, "name") == 0) { NEXT_ARG(); + if (check_ifname(*argv)) + invarg("\"name\" not a valid ifname", *argv); newname = *argv; } else if (matches(*argv, "address") == 0) { NEXT_ARG(); @@ -1355,6 +1349,8 @@ static int do_set(int argc, char **argv) if (dev) duparg2("dev", *argv); + if (check_ifname(*argv)) + invarg("\"dev\" not a valid ifname", *argv); dev = *argv; } argc--; argv++; @@ -1383,9 +1379,6 @@ static int do_set(int argc, char **argv) } if (newname && strcmp(dev, newname)) { - if (strlen(newname) == 0) - invarg("\"\" is not a valid device identifier\n", - "name"); if (do_changename(dev, newname) < 0) return -1; dev = newname; diff --git a/ip/ipmaddr.c b/ip/ipmaddr.c index 85a69e77..5683f6fa 100644 --- a/ip/ipmaddr.c +++ b/ip/ipmaddr.c @@ -284,7 +284,8 @@ static int multiaddr_modify(int cmd, int argc, char **argv) NEXT_ARG(); if (ifr.ifr_name[0]) duparg("dev", *argv); - strncpy(ifr.ifr_name, *argv, IFNAMSIZ); + if (get_ifname(ifr.ifr_name, *argv)) + invarg("\"dev\" not a valid ifname", *argv); } else { if (matches(*argv, "address") == 0) { NEXT_ARG(); diff --git a/ip/iprule.c b/ip/iprule.c index 8313138d..36c57fa7 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -472,11 +472,13 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action) } else if (strcmp(*argv, "dev") == 0 || strcmp(*argv, "iif") == 0) { NEXT_ARG(); - strncpy(filter.iif, *argv, IFNAMSIZ); + if (get_ifname(filter.iif, *argv)) + invarg("\"iif\"/\"dev\" not a valid ifname", *argv); filter.iifmask = 1; } else if (strcmp(*argv, "oif") == 0) { NEXT_ARG(); - strncpy(filter.oif, *argv, IFNAMSIZ); + if (get_ifname(filter.oif, *argv)) + invarg("\"oif\" not a valid ifname", *argv); filter.oifmask = 1; } else if (strcmp(*argv, "l3mdev") == 0) { filter.l3mdev = 1; @@ -695,10 +697,14 @@ static int iprule_modify(int cmd, int argc, char **argv) } else if (strcmp(*argv, "dev") == 0 || strcmp(*argv, "iif") == 0) { NEXT_ARG(); + if (check_ifname(*argv)) + invarg("\"iif\"/\"dev\" not a valid ifname", *argv); addattr_l(&req.n, sizeof(req), FRA_IFNAME, *argv, strlen(*argv)+1); } else if (strcmp(*argv, "oif") == 0) { NEXT_ARG(); + if (check_ifname(*argv)) + invarg("\"oif\" not a valid ifname", *argv); addattr_l(&req.n, sizeof(req), FRA_OIFNAME, *argv, strlen(*argv)+1); } else if (strcmp(*argv, "l3mdev") == 0) { diff --git a/ip/iptunnel.c b/ip/iptunnel.c index 0acfd079..208a1f06 100644 --- a/ip/iptunnel.c +++ b/ip/iptunnel.c @@ -178,7 +178,8 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p) if (p->name[0]) duparg2("name", *argv); - strncpy(p->name, *argv, IFNAMSIZ - 1); + if (get_ifname(p->name, *argv)) + invarg("\"name\" not a valid ifname", *argv); if (cmd == SIOCCHGTUNNEL && count == 0) { struct ip_tunnel_parm old_p = {}; @@ -487,6 +488,8 @@ static int do_prl(int argc, char **argv) count++; } else if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); + if (check_ifname(*argv)) + invarg("\"dev\" not a valid ifname", *argv); medium = *argv; } else { fprintf(stderr, @@ -534,6 +537,8 @@ static int do_6rd(int argc, char **argv) cmd = SIOCDEL6RD; } else if (strcmp(*argv, "dev") == 0) { NEXT_ARG(); + if (check_ifname(*argv)) + invarg("\"dev\" not a valid ifname", *argv); medium = *argv; } else { fprintf(stderr, diff --git a/ip/iptuntap.c b/ip/iptuntap.c index 451f7f0e..b46e452f 100644 --- a/ip/iptuntap.c +++ b/ip/iptuntap.c @@ -176,7 +176,8 @@ static int parse_args(int argc, char **argv, ifr->ifr_flags |= IFF_MULTI_QUEUE; } else if (matches(*argv, "dev") == 0) { NEXT_ARG(); - strncpy(ifr->ifr_name, *argv, IFNAMSIZ-1); + if (get_ifname(ifr->ifr_name, *argv)) + invarg("\"dev\" not a valid ifname", *argv); } else { if (matches(*argv, "name") == 0) { NEXT_ARG(); @@ -184,7 +185,8 @@ static int parse_args(int argc, char **argv, usage(); if (ifr->ifr_name[0]) duparg2("name", *argv); - strncpy(ifr->ifr_name, *argv, IFNAMSIZ); + if (get_ifname(ifr->ifr_name, *argv)) + invarg("\"name\" not a valid ifname", *argv); } count++; argc--; argv++; diff --git a/lib/utils.c b/lib/utils.c index bbd3cbc4..0cf99619 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -20,6 +20,7 @@ #include <sys/socket.h> #include <netinet/in.h> #include <string.h> +#include <ctype.h> #include <netdb.h> #include <arpa/inet.h> #include <asm/types.h> @@ -699,6 +700,34 @@ void duparg2(const char *key, const char *arg) exit(-1); } +int check_ifname(const char *name) +{ + /* These checks mimic kernel checks in dev_valid_name */ + if (*name == '\0') + return -1; + if (strlen(name) >= IFNAMSIZ) + return -1; + + while (*name) { + if (*name == '/' || isspace(*name)) + return -1; + ++name; + } + return 0; +} + +/* buf is assumed to be IFNAMSIZ */ +int get_ifname(char *buf, const char *name) +{ + int ret; + + ret = check_ifname(name); + if (ret == 0) + strncpy(buf, name, IFNAMSIZ); + + return ret; +} + int matches(const char *cmd, const char *pattern) { int len = strlen(cmd); diff --git a/misc/arpd.c b/misc/arpd.c index bfab4454..c2666f76 100644 --- a/misc/arpd.c +++ b/misc/arpd.c @@ -664,7 +664,8 @@ int main(int argc, char **argv) struct ifreq ifr = {}; for (i = 0; i < ifnum; i++) { - strncpy(ifr.ifr_name, ifnames[i], IFNAMSIZ); + if (get_ifname(ifr.ifr_name, ifnames[i])) + invarg("not a valid ifname", ifnames[i]); if (ioctl(udp_sock, SIOCGIFINDEX, &ifr)) { perror("ioctl(SIOCGIFINDEX)"); exit(-1); diff --git a/tc/f_flower.c b/tc/f_flower.c index 99e62a38..b1802107 100644 --- a/tc/f_flower.c +++ b/tc/f_flower.c @@ -630,6 +630,8 @@ static int flower_parse_opt(struct filter_util *qu, char *handle, flags |= TCA_CLS_FLAGS_SKIP_SW; } else if (matches(*argv, "indev") == 0) { NEXT_ARG(); + if (check_ifname(*argv)) + invarg("\"indev\" not a valid ifname", *argv); addattrstrz(n, MAX_MSG, TCA_FLOWER_INDEV, *argv); } else if (matches(*argv, "vlan_id") == 0) { __u16 vid; From b0af8fc1aaedb9998748f72453152a941256dd78 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger <stephen@networkplumber.org> Date: Wed, 11 Oct 2017 10:35:00 -0700 Subject: [PATCH 16/28] tipc: don't need custom CFLAGS Since libmnl CFLAGS are now handled by config.mk Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- tipc/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/tipc/Makefile b/tipc/Makefile index 2212beb0..d3c957e2 100644 --- a/tipc/Makefile +++ b/tipc/Makefile @@ -10,8 +10,6 @@ TIPCOBJ=bearer.o \ peer.o tipc.o TARGETS=tipc -CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags) -LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs) endif From 596b1c94aa38e21b7a8c8562e8b61ccb744255d2 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti <lorenzo@google.com> Date: Tue, 3 Oct 2017 02:03:37 +0900 Subject: [PATCH 17/28] iproute: build more easily on Android iproute2 contains a bunch of kernel headers, including uapi ones. Android's libc uses uapi headers almost directly, and uses a script to fix kernel types that don't match what userspace expects. For example: https://issuetracker.google.com/36987220 reports that our struct ip_mreq_source contains "__be32 imr_multiaddr" rather than "struct in_addr imr_multiaddr". The script addresses this by replacing the uapi struct definition with a #include <bits/ip_mreq.h> which contains the traditional userspace definition. Unfortunately, when we compile iproute2, this definition conflicts with the one in iproute2's linux/in.h. Historically we've just solved this problem by running "git rm" on all the iproute2 include/linux headers that break Android's libc. However, deleting the files in this way makes it harder to keep up with upstream, because every upstream change to an include file causes a merge conflict with the delete. This patch fixes the problem by moving the iproute2 linux headers from include/linux to include/uapi/linux. Tested: compiles on ubuntu trusty (glibc) Signed-off-by: Elliott Hughes <enh@google.com> Signed-off-by: Lorenzo Colitti <lorenzo@google.com> --- Makefile | 2 +- include/{ => uapi}/linux/atm.h | 0 include/{ => uapi}/linux/atmapi.h | 0 include/{ => uapi}/linux/atmarp.h | 0 include/{ => uapi}/linux/atmdev.h | 0 include/{ => uapi}/linux/atmioc.h | 0 include/{ => uapi}/linux/atmsap.h | 0 include/{ => uapi}/linux/bpf.h | 0 include/{ => uapi}/linux/bpf_common.h | 0 include/{ => uapi}/linux/can.h | 0 include/{ => uapi}/linux/can/netlink.h | 0 include/{ => uapi}/linux/can/vxcan.h | 0 include/{ => uapi}/linux/devlink.h | 0 include/{ => uapi}/linux/elf-em.h | 0 include/{ => uapi}/linux/fib_rules.h | 0 include/{ => uapi}/linux/filter.h | 0 include/{ => uapi}/linux/fou.h | 0 include/{ => uapi}/linux/gen_stats.h | 0 include/{ => uapi}/linux/genetlink.h | 0 include/{ => uapi}/linux/hdlc/ioctl.h | 0 include/{ => uapi}/linux/icmpv6.h | 0 include/{ => uapi}/linux/if.h | 0 include/{ => uapi}/linux/if_addr.h | 0 include/{ => uapi}/linux/if_addrlabel.h | 0 include/{ => uapi}/linux/if_alg.h | 0 include/{ => uapi}/linux/if_arp.h | 0 include/{ => uapi}/linux/if_bonding.h | 0 include/{ => uapi}/linux/if_bridge.h | 0 include/{ => uapi}/linux/if_ether.h | 0 include/{ => uapi}/linux/if_link.h | 0 include/{ => uapi}/linux/if_macsec.h | 0 include/{ => uapi}/linux/if_packet.h | 0 include/{ => uapi}/linux/if_tun.h | 0 include/{ => uapi}/linux/if_tunnel.h | 0 include/{ => uapi}/linux/if_vlan.h | 0 include/{ => uapi}/linux/ife.h | 0 include/{ => uapi}/linux/ila.h | 0 include/{ => uapi}/linux/in.h | 0 include/{ => uapi}/linux/in6.h | 0 include/{ => uapi}/linux/in_route.h | 0 include/{ => uapi}/linux/inet_diag.h | 0 include/{ => uapi}/linux/ip.h | 0 include/{ => uapi}/linux/ip6_tunnel.h | 0 include/{ => uapi}/linux/ipsec.h | 0 include/{ => uapi}/linux/kernel.h | 0 include/{ => uapi}/linux/l2tp.h | 0 include/{ => uapi}/linux/libc-compat.h | 0 include/{ => uapi}/linux/limits.h | 0 include/{ => uapi}/linux/lwtunnel.h | 0 include/{ => uapi}/linux/magic.h | 0 include/{ => uapi}/linux/mpls.h | 0 include/{ => uapi}/linux/mpls_iptunnel.h | 0 include/{ => uapi}/linux/neighbour.h | 0 include/{ => uapi}/linux/net_namespace.h | 0 include/{ => uapi}/linux/netconf.h | 0 include/{ => uapi}/linux/netdevice.h | 0 include/{ => uapi}/linux/netfilter.h | 0 include/{ => uapi}/linux/netfilter/ipset/ip_set.h | 0 include/{ => uapi}/linux/netfilter/x_tables.h | 0 include/{ => uapi}/linux/netfilter/xt_set.h | 0 include/{ => uapi}/linux/netfilter/xt_tcpudp.h | 0 include/{ => uapi}/linux/netfilter_ipv4.h | 0 include/{ => uapi}/linux/netfilter_ipv4/ip_tables.h | 0 include/{ => uapi}/linux/netfilter_ipv6.h | 0 include/{ => uapi}/linux/netfilter_ipv6/ip6_tables.h | 0 include/{ => uapi}/linux/netlink.h | 0 include/{ => uapi}/linux/netlink_diag.h | 0 include/{ => uapi}/linux/packet_diag.h | 0 include/{ => uapi}/linux/param.h | 0 include/{ => uapi}/linux/pfkeyv2.h | 0 include/{ => uapi}/linux/pkt_cls.h | 0 include/{ => uapi}/linux/pkt_sched.h | 0 include/{ => uapi}/linux/posix_types.h | 0 include/{ => uapi}/linux/rtnetlink.h | 0 include/{ => uapi}/linux/sctp.h | 0 include/{ => uapi}/linux/seg6.h | 0 include/{ => uapi}/linux/seg6_genl.h | 0 include/{ => uapi}/linux/seg6_hmac.h | 0 include/{ => uapi}/linux/seg6_iptunnel.h | 0 include/{ => uapi}/linux/seg6_local.h | 0 include/{ => uapi}/linux/sock_diag.h | 0 include/{ => uapi}/linux/socket.h | 0 include/{ => uapi}/linux/sockios.h | 0 include/{ => uapi}/linux/stddef.h | 0 include/{ => uapi}/linux/sysinfo.h | 0 include/{ => uapi}/linux/tc_act/tc_bpf.h | 0 include/{ => uapi}/linux/tc_act/tc_connmark.h | 0 include/{ => uapi}/linux/tc_act/tc_csum.h | 0 include/{ => uapi}/linux/tc_act/tc_defact.h | 0 include/{ => uapi}/linux/tc_act/tc_gact.h | 0 include/{ => uapi}/linux/tc_act/tc_ife.h | 0 include/{ => uapi}/linux/tc_act/tc_ipt.h | 0 include/{ => uapi}/linux/tc_act/tc_mirred.h | 0 include/{ => uapi}/linux/tc_act/tc_nat.h | 0 include/{ => uapi}/linux/tc_act/tc_pedit.h | 0 include/{ => uapi}/linux/tc_act/tc_sample.h | 0 include/{ => uapi}/linux/tc_act/tc_skbedit.h | 0 include/{ => uapi}/linux/tc_act/tc_skbmod.h | 0 include/{ => uapi}/linux/tc_act/tc_tunnel_key.h | 0 include/{ => uapi}/linux/tc_act/tc_vlan.h | 0 include/{ => uapi}/linux/tc_ematch/tc_em_cmp.h | 0 include/{ => uapi}/linux/tc_ematch/tc_em_meta.h | 0 include/{ => uapi}/linux/tc_ematch/tc_em_nbyte.h | 0 include/{ => uapi}/linux/tcp.h | 0 include/{ => uapi}/linux/tcp_metrics.h | 0 include/{ => uapi}/linux/tipc.h | 0 include/{ => uapi}/linux/tipc_netlink.h | 0 include/{ => uapi}/linux/types.h | 0 include/{ => uapi}/linux/unix_diag.h | 0 include/{ => uapi}/linux/veth.h | 0 include/{ => uapi}/linux/xfrm.h | 0 111 files changed, 1 insertion(+), 1 deletion(-) rename include/{ => uapi}/linux/atm.h (100%) rename include/{ => uapi}/linux/atmapi.h (100%) rename include/{ => uapi}/linux/atmarp.h (100%) rename include/{ => uapi}/linux/atmdev.h (100%) rename include/{ => uapi}/linux/atmioc.h (100%) rename include/{ => uapi}/linux/atmsap.h (100%) rename include/{ => uapi}/linux/bpf.h (100%) rename include/{ => uapi}/linux/bpf_common.h (100%) rename include/{ => uapi}/linux/can.h (100%) rename include/{ => uapi}/linux/can/netlink.h (100%) rename include/{ => uapi}/linux/can/vxcan.h (100%) rename include/{ => uapi}/linux/devlink.h (100%) rename include/{ => uapi}/linux/elf-em.h (100%) rename include/{ => uapi}/linux/fib_rules.h (100%) rename include/{ => uapi}/linux/filter.h (100%) rename include/{ => uapi}/linux/fou.h (100%) rename include/{ => uapi}/linux/gen_stats.h (100%) rename include/{ => uapi}/linux/genetlink.h (100%) rename include/{ => uapi}/linux/hdlc/ioctl.h (100%) rename include/{ => uapi}/linux/icmpv6.h (100%) rename include/{ => uapi}/linux/if.h (100%) rename include/{ => uapi}/linux/if_addr.h (100%) rename include/{ => uapi}/linux/if_addrlabel.h (100%) rename include/{ => uapi}/linux/if_alg.h (100%) rename include/{ => uapi}/linux/if_arp.h (100%) rename include/{ => uapi}/linux/if_bonding.h (100%) rename include/{ => uapi}/linux/if_bridge.h (100%) rename include/{ => uapi}/linux/if_ether.h (100%) rename include/{ => uapi}/linux/if_link.h (100%) rename include/{ => uapi}/linux/if_macsec.h (100%) rename include/{ => uapi}/linux/if_packet.h (100%) rename include/{ => uapi}/linux/if_tun.h (100%) rename include/{ => uapi}/linux/if_tunnel.h (100%) rename include/{ => uapi}/linux/if_vlan.h (100%) rename include/{ => uapi}/linux/ife.h (100%) rename include/{ => uapi}/linux/ila.h (100%) rename include/{ => uapi}/linux/in.h (100%) rename include/{ => uapi}/linux/in6.h (100%) rename include/{ => uapi}/linux/in_route.h (100%) rename include/{ => uapi}/linux/inet_diag.h (100%) rename include/{ => uapi}/linux/ip.h (100%) rename include/{ => uapi}/linux/ip6_tunnel.h (100%) rename include/{ => uapi}/linux/ipsec.h (100%) rename include/{ => uapi}/linux/kernel.h (100%) rename include/{ => uapi}/linux/l2tp.h (100%) rename include/{ => uapi}/linux/libc-compat.h (100%) rename include/{ => uapi}/linux/limits.h (100%) rename include/{ => uapi}/linux/lwtunnel.h (100%) rename include/{ => uapi}/linux/magic.h (100%) rename include/{ => uapi}/linux/mpls.h (100%) rename include/{ => uapi}/linux/mpls_iptunnel.h (100%) rename include/{ => uapi}/linux/neighbour.h (100%) rename include/{ => uapi}/linux/net_namespace.h (100%) rename include/{ => uapi}/linux/netconf.h (100%) rename include/{ => uapi}/linux/netdevice.h (100%) rename include/{ => uapi}/linux/netfilter.h (100%) rename include/{ => uapi}/linux/netfilter/ipset/ip_set.h (100%) rename include/{ => uapi}/linux/netfilter/x_tables.h (100%) rename include/{ => uapi}/linux/netfilter/xt_set.h (100%) rename include/{ => uapi}/linux/netfilter/xt_tcpudp.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4/ip_tables.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6_tables.h (100%) rename include/{ => uapi}/linux/netlink.h (100%) rename include/{ => uapi}/linux/netlink_diag.h (100%) rename include/{ => uapi}/linux/packet_diag.h (100%) rename include/{ => uapi}/linux/param.h (100%) rename include/{ => uapi}/linux/pfkeyv2.h (100%) rename include/{ => uapi}/linux/pkt_cls.h (100%) rename include/{ => uapi}/linux/pkt_sched.h (100%) rename include/{ => uapi}/linux/posix_types.h (100%) rename include/{ => uapi}/linux/rtnetlink.h (100%) rename include/{ => uapi}/linux/sctp.h (100%) rename include/{ => uapi}/linux/seg6.h (100%) rename include/{ => uapi}/linux/seg6_genl.h (100%) rename include/{ => uapi}/linux/seg6_hmac.h (100%) rename include/{ => uapi}/linux/seg6_iptunnel.h (100%) rename include/{ => uapi}/linux/seg6_local.h (100%) rename include/{ => uapi}/linux/sock_diag.h (100%) rename include/{ => uapi}/linux/socket.h (100%) rename include/{ => uapi}/linux/sockios.h (100%) rename include/{ => uapi}/linux/stddef.h (100%) rename include/{ => uapi}/linux/sysinfo.h (100%) rename include/{ => uapi}/linux/tc_act/tc_bpf.h (100%) rename include/{ => uapi}/linux/tc_act/tc_connmark.h (100%) rename include/{ => uapi}/linux/tc_act/tc_csum.h (100%) rename include/{ => uapi}/linux/tc_act/tc_defact.h (100%) rename include/{ => uapi}/linux/tc_act/tc_gact.h (100%) rename include/{ => uapi}/linux/tc_act/tc_ife.h (100%) rename include/{ => uapi}/linux/tc_act/tc_ipt.h (100%) rename include/{ => uapi}/linux/tc_act/tc_mirred.h (100%) rename include/{ => uapi}/linux/tc_act/tc_nat.h (100%) rename include/{ => uapi}/linux/tc_act/tc_pedit.h (100%) rename include/{ => uapi}/linux/tc_act/tc_sample.h (100%) rename include/{ => uapi}/linux/tc_act/tc_skbedit.h (100%) rename include/{ => uapi}/linux/tc_act/tc_skbmod.h (100%) rename include/{ => uapi}/linux/tc_act/tc_tunnel_key.h (100%) rename include/{ => uapi}/linux/tc_act/tc_vlan.h (100%) rename include/{ => uapi}/linux/tc_ematch/tc_em_cmp.h (100%) rename include/{ => uapi}/linux/tc_ematch/tc_em_meta.h (100%) rename include/{ => uapi}/linux/tc_ematch/tc_em_nbyte.h (100%) rename include/{ => uapi}/linux/tcp.h (100%) rename include/{ => uapi}/linux/tcp_metrics.h (100%) rename include/{ => uapi}/linux/tipc.h (100%) rename include/{ => uapi}/linux/tipc_netlink.h (100%) rename include/{ => uapi}/linux/types.h (100%) rename include/{ => uapi}/linux/unix_diag.h (100%) rename include/{ => uapi}/linux/veth.h (100%) rename include/{ => uapi}/linux/xfrm.h (100%) diff --git a/Makefile b/Makefile index 75c0e570..6ad96104 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ CCOPTS = -O2 WFLAGS := -Wall -Wstrict-prototypes -Wmissing-prototypes WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2 -CFLAGS := $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) $(CFLAGS) +CFLAGS := $(WFLAGS) $(CCOPTS) -I../include -I../include/uapi $(DEFINES) $(CFLAGS) YACCFLAGS = -d -t -v SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma man diff --git a/include/linux/atm.h b/include/uapi/linux/atm.h similarity index 100% rename from include/linux/atm.h rename to include/uapi/linux/atm.h diff --git a/include/linux/atmapi.h b/include/uapi/linux/atmapi.h similarity index 100% rename from include/linux/atmapi.h rename to include/uapi/linux/atmapi.h diff --git a/include/linux/atmarp.h b/include/uapi/linux/atmarp.h similarity index 100% rename from include/linux/atmarp.h rename to include/uapi/linux/atmarp.h diff --git a/include/linux/atmdev.h b/include/uapi/linux/atmdev.h similarity index 100% rename from include/linux/atmdev.h rename to include/uapi/linux/atmdev.h diff --git a/include/linux/atmioc.h b/include/uapi/linux/atmioc.h similarity index 100% rename from include/linux/atmioc.h rename to include/uapi/linux/atmioc.h diff --git a/include/linux/atmsap.h b/include/uapi/linux/atmsap.h similarity index 100% rename from include/linux/atmsap.h rename to include/uapi/linux/atmsap.h diff --git a/include/linux/bpf.h b/include/uapi/linux/bpf.h similarity index 100% rename from include/linux/bpf.h rename to include/uapi/linux/bpf.h diff --git a/include/linux/bpf_common.h b/include/uapi/linux/bpf_common.h similarity index 100% rename from include/linux/bpf_common.h rename to include/uapi/linux/bpf_common.h diff --git a/include/linux/can.h b/include/uapi/linux/can.h similarity index 100% rename from include/linux/can.h rename to include/uapi/linux/can.h diff --git a/include/linux/can/netlink.h b/include/uapi/linux/can/netlink.h similarity index 100% rename from include/linux/can/netlink.h rename to include/uapi/linux/can/netlink.h diff --git a/include/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h similarity index 100% rename from include/linux/can/vxcan.h rename to include/uapi/linux/can/vxcan.h diff --git a/include/linux/devlink.h b/include/uapi/linux/devlink.h similarity index 100% rename from include/linux/devlink.h rename to include/uapi/linux/devlink.h diff --git a/include/linux/elf-em.h b/include/uapi/linux/elf-em.h similarity index 100% rename from include/linux/elf-em.h rename to include/uapi/linux/elf-em.h diff --git a/include/linux/fib_rules.h b/include/uapi/linux/fib_rules.h similarity index 100% rename from include/linux/fib_rules.h rename to include/uapi/linux/fib_rules.h diff --git a/include/linux/filter.h b/include/uapi/linux/filter.h similarity index 100% rename from include/linux/filter.h rename to include/uapi/linux/filter.h diff --git a/include/linux/fou.h b/include/uapi/linux/fou.h similarity index 100% rename from include/linux/fou.h rename to include/uapi/linux/fou.h diff --git a/include/linux/gen_stats.h b/include/uapi/linux/gen_stats.h similarity index 100% rename from include/linux/gen_stats.h rename to include/uapi/linux/gen_stats.h diff --git a/include/linux/genetlink.h b/include/uapi/linux/genetlink.h similarity index 100% rename from include/linux/genetlink.h rename to include/uapi/linux/genetlink.h diff --git a/include/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h similarity index 100% rename from include/linux/hdlc/ioctl.h rename to include/uapi/linux/hdlc/ioctl.h diff --git a/include/linux/icmpv6.h b/include/uapi/linux/icmpv6.h similarity index 100% rename from include/linux/icmpv6.h rename to include/uapi/linux/icmpv6.h diff --git a/include/linux/if.h b/include/uapi/linux/if.h similarity index 100% rename from include/linux/if.h rename to include/uapi/linux/if.h diff --git a/include/linux/if_addr.h b/include/uapi/linux/if_addr.h similarity index 100% rename from include/linux/if_addr.h rename to include/uapi/linux/if_addr.h diff --git a/include/linux/if_addrlabel.h b/include/uapi/linux/if_addrlabel.h similarity index 100% rename from include/linux/if_addrlabel.h rename to include/uapi/linux/if_addrlabel.h diff --git a/include/linux/if_alg.h b/include/uapi/linux/if_alg.h similarity index 100% rename from include/linux/if_alg.h rename to include/uapi/linux/if_alg.h diff --git a/include/linux/if_arp.h b/include/uapi/linux/if_arp.h similarity index 100% rename from include/linux/if_arp.h rename to include/uapi/linux/if_arp.h diff --git a/include/linux/if_bonding.h b/include/uapi/linux/if_bonding.h similarity index 100% rename from include/linux/if_bonding.h rename to include/uapi/linux/if_bonding.h diff --git a/include/linux/if_bridge.h b/include/uapi/linux/if_bridge.h similarity index 100% rename from include/linux/if_bridge.h rename to include/uapi/linux/if_bridge.h diff --git a/include/linux/if_ether.h b/include/uapi/linux/if_ether.h similarity index 100% rename from include/linux/if_ether.h rename to include/uapi/linux/if_ether.h diff --git a/include/linux/if_link.h b/include/uapi/linux/if_link.h similarity index 100% rename from include/linux/if_link.h rename to include/uapi/linux/if_link.h diff --git a/include/linux/if_macsec.h b/include/uapi/linux/if_macsec.h similarity index 100% rename from include/linux/if_macsec.h rename to include/uapi/linux/if_macsec.h diff --git a/include/linux/if_packet.h b/include/uapi/linux/if_packet.h similarity index 100% rename from include/linux/if_packet.h rename to include/uapi/linux/if_packet.h diff --git a/include/linux/if_tun.h b/include/uapi/linux/if_tun.h similarity index 100% rename from include/linux/if_tun.h rename to include/uapi/linux/if_tun.h diff --git a/include/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h similarity index 100% rename from include/linux/if_tunnel.h rename to include/uapi/linux/if_tunnel.h diff --git a/include/linux/if_vlan.h b/include/uapi/linux/if_vlan.h similarity index 100% rename from include/linux/if_vlan.h rename to include/uapi/linux/if_vlan.h diff --git a/include/linux/ife.h b/include/uapi/linux/ife.h similarity index 100% rename from include/linux/ife.h rename to include/uapi/linux/ife.h diff --git a/include/linux/ila.h b/include/uapi/linux/ila.h similarity index 100% rename from include/linux/ila.h rename to include/uapi/linux/ila.h diff --git a/include/linux/in.h b/include/uapi/linux/in.h similarity index 100% rename from include/linux/in.h rename to include/uapi/linux/in.h diff --git a/include/linux/in6.h b/include/uapi/linux/in6.h similarity index 100% rename from include/linux/in6.h rename to include/uapi/linux/in6.h diff --git a/include/linux/in_route.h b/include/uapi/linux/in_route.h similarity index 100% rename from include/linux/in_route.h rename to include/uapi/linux/in_route.h diff --git a/include/linux/inet_diag.h b/include/uapi/linux/inet_diag.h similarity index 100% rename from include/linux/inet_diag.h rename to include/uapi/linux/inet_diag.h diff --git a/include/linux/ip.h b/include/uapi/linux/ip.h similarity index 100% rename from include/linux/ip.h rename to include/uapi/linux/ip.h diff --git a/include/linux/ip6_tunnel.h b/include/uapi/linux/ip6_tunnel.h similarity index 100% rename from include/linux/ip6_tunnel.h rename to include/uapi/linux/ip6_tunnel.h diff --git a/include/linux/ipsec.h b/include/uapi/linux/ipsec.h similarity index 100% rename from include/linux/ipsec.h rename to include/uapi/linux/ipsec.h diff --git a/include/linux/kernel.h b/include/uapi/linux/kernel.h similarity index 100% rename from include/linux/kernel.h rename to include/uapi/linux/kernel.h diff --git a/include/linux/l2tp.h b/include/uapi/linux/l2tp.h similarity index 100% rename from include/linux/l2tp.h rename to include/uapi/linux/l2tp.h diff --git a/include/linux/libc-compat.h b/include/uapi/linux/libc-compat.h similarity index 100% rename from include/linux/libc-compat.h rename to include/uapi/linux/libc-compat.h diff --git a/include/linux/limits.h b/include/uapi/linux/limits.h similarity index 100% rename from include/linux/limits.h rename to include/uapi/linux/limits.h diff --git a/include/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h similarity index 100% rename from include/linux/lwtunnel.h rename to include/uapi/linux/lwtunnel.h diff --git a/include/linux/magic.h b/include/uapi/linux/magic.h similarity index 100% rename from include/linux/magic.h rename to include/uapi/linux/magic.h diff --git a/include/linux/mpls.h b/include/uapi/linux/mpls.h similarity index 100% rename from include/linux/mpls.h rename to include/uapi/linux/mpls.h diff --git a/include/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h similarity index 100% rename from include/linux/mpls_iptunnel.h rename to include/uapi/linux/mpls_iptunnel.h diff --git a/include/linux/neighbour.h b/include/uapi/linux/neighbour.h similarity index 100% rename from include/linux/neighbour.h rename to include/uapi/linux/neighbour.h diff --git a/include/linux/net_namespace.h b/include/uapi/linux/net_namespace.h similarity index 100% rename from include/linux/net_namespace.h rename to include/uapi/linux/net_namespace.h diff --git a/include/linux/netconf.h b/include/uapi/linux/netconf.h similarity index 100% rename from include/linux/netconf.h rename to include/uapi/linux/netconf.h diff --git a/include/linux/netdevice.h b/include/uapi/linux/netdevice.h similarity index 100% rename from include/linux/netdevice.h rename to include/uapi/linux/netdevice.h diff --git a/include/linux/netfilter.h b/include/uapi/linux/netfilter.h similarity index 100% rename from include/linux/netfilter.h rename to include/uapi/linux/netfilter.h diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h similarity index 100% rename from include/linux/netfilter/ipset/ip_set.h rename to include/uapi/linux/netfilter/ipset/ip_set.h diff --git a/include/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h similarity index 100% rename from include/linux/netfilter/x_tables.h rename to include/uapi/linux/netfilter/x_tables.h diff --git a/include/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h similarity index 100% rename from include/linux/netfilter/xt_set.h rename to include/uapi/linux/netfilter/xt_set.h diff --git a/include/linux/netfilter/xt_tcpudp.h b/include/uapi/linux/netfilter/xt_tcpudp.h similarity index 100% rename from include/linux/netfilter/xt_tcpudp.h rename to include/uapi/linux/netfilter/xt_tcpudp.h diff --git a/include/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h similarity index 100% rename from include/linux/netfilter_ipv4.h rename to include/uapi/linux/netfilter_ipv4.h diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/uapi/linux/netfilter_ipv4/ip_tables.h similarity index 100% rename from include/linux/netfilter_ipv4/ip_tables.h rename to include/uapi/linux/netfilter_ipv4/ip_tables.h diff --git a/include/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h similarity index 100% rename from include/linux/netfilter_ipv6.h rename to include/uapi/linux/netfilter_ipv6.h diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6_tables.h rename to include/uapi/linux/netfilter_ipv6/ip6_tables.h diff --git a/include/linux/netlink.h b/include/uapi/linux/netlink.h similarity index 100% rename from include/linux/netlink.h rename to include/uapi/linux/netlink.h diff --git a/include/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h similarity index 100% rename from include/linux/netlink_diag.h rename to include/uapi/linux/netlink_diag.h diff --git a/include/linux/packet_diag.h b/include/uapi/linux/packet_diag.h similarity index 100% rename from include/linux/packet_diag.h rename to include/uapi/linux/packet_diag.h diff --git a/include/linux/param.h b/include/uapi/linux/param.h similarity index 100% rename from include/linux/param.h rename to include/uapi/linux/param.h diff --git a/include/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h similarity index 100% rename from include/linux/pfkeyv2.h rename to include/uapi/linux/pfkeyv2.h diff --git a/include/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h similarity index 100% rename from include/linux/pkt_cls.h rename to include/uapi/linux/pkt_cls.h diff --git a/include/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h similarity index 100% rename from include/linux/pkt_sched.h rename to include/uapi/linux/pkt_sched.h diff --git a/include/linux/posix_types.h b/include/uapi/linux/posix_types.h similarity index 100% rename from include/linux/posix_types.h rename to include/uapi/linux/posix_types.h diff --git a/include/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h similarity index 100% rename from include/linux/rtnetlink.h rename to include/uapi/linux/rtnetlink.h diff --git a/include/linux/sctp.h b/include/uapi/linux/sctp.h similarity index 100% rename from include/linux/sctp.h rename to include/uapi/linux/sctp.h diff --git a/include/linux/seg6.h b/include/uapi/linux/seg6.h similarity index 100% rename from include/linux/seg6.h rename to include/uapi/linux/seg6.h diff --git a/include/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h similarity index 100% rename from include/linux/seg6_genl.h rename to include/uapi/linux/seg6_genl.h diff --git a/include/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h similarity index 100% rename from include/linux/seg6_hmac.h rename to include/uapi/linux/seg6_hmac.h diff --git a/include/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h similarity index 100% rename from include/linux/seg6_iptunnel.h rename to include/uapi/linux/seg6_iptunnel.h diff --git a/include/linux/seg6_local.h b/include/uapi/linux/seg6_local.h similarity index 100% rename from include/linux/seg6_local.h rename to include/uapi/linux/seg6_local.h diff --git a/include/linux/sock_diag.h b/include/uapi/linux/sock_diag.h similarity index 100% rename from include/linux/sock_diag.h rename to include/uapi/linux/sock_diag.h diff --git a/include/linux/socket.h b/include/uapi/linux/socket.h similarity index 100% rename from include/linux/socket.h rename to include/uapi/linux/socket.h diff --git a/include/linux/sockios.h b/include/uapi/linux/sockios.h similarity index 100% rename from include/linux/sockios.h rename to include/uapi/linux/sockios.h diff --git a/include/linux/stddef.h b/include/uapi/linux/stddef.h similarity index 100% rename from include/linux/stddef.h rename to include/uapi/linux/stddef.h diff --git a/include/linux/sysinfo.h b/include/uapi/linux/sysinfo.h similarity index 100% rename from include/linux/sysinfo.h rename to include/uapi/linux/sysinfo.h diff --git a/include/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h similarity index 100% rename from include/linux/tc_act/tc_bpf.h rename to include/uapi/linux/tc_act/tc_bpf.h diff --git a/include/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h similarity index 100% rename from include/linux/tc_act/tc_connmark.h rename to include/uapi/linux/tc_act/tc_connmark.h diff --git a/include/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h similarity index 100% rename from include/linux/tc_act/tc_csum.h rename to include/uapi/linux/tc_act/tc_csum.h diff --git a/include/linux/tc_act/tc_defact.h b/include/uapi/linux/tc_act/tc_defact.h similarity index 100% rename from include/linux/tc_act/tc_defact.h rename to include/uapi/linux/tc_act/tc_defact.h diff --git a/include/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h similarity index 100% rename from include/linux/tc_act/tc_gact.h rename to include/uapi/linux/tc_act/tc_gact.h diff --git a/include/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h similarity index 100% rename from include/linux/tc_act/tc_ife.h rename to include/uapi/linux/tc_act/tc_ife.h diff --git a/include/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h similarity index 100% rename from include/linux/tc_act/tc_ipt.h rename to include/uapi/linux/tc_act/tc_ipt.h diff --git a/include/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h similarity index 100% rename from include/linux/tc_act/tc_mirred.h rename to include/uapi/linux/tc_act/tc_mirred.h diff --git a/include/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h similarity index 100% rename from include/linux/tc_act/tc_nat.h rename to include/uapi/linux/tc_act/tc_nat.h diff --git a/include/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h similarity index 100% rename from include/linux/tc_act/tc_pedit.h rename to include/uapi/linux/tc_act/tc_pedit.h diff --git a/include/linux/tc_act/tc_sample.h b/include/uapi/linux/tc_act/tc_sample.h similarity index 100% rename from include/linux/tc_act/tc_sample.h rename to include/uapi/linux/tc_act/tc_sample.h diff --git a/include/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h similarity index 100% rename from include/linux/tc_act/tc_skbedit.h rename to include/uapi/linux/tc_act/tc_skbedit.h diff --git a/include/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h similarity index 100% rename from include/linux/tc_act/tc_skbmod.h rename to include/uapi/linux/tc_act/tc_skbmod.h diff --git a/include/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h similarity index 100% rename from include/linux/tc_act/tc_tunnel_key.h rename to include/uapi/linux/tc_act/tc_tunnel_key.h diff --git a/include/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h similarity index 100% rename from include/linux/tc_act/tc_vlan.h rename to include/uapi/linux/tc_act/tc_vlan.h diff --git a/include/linux/tc_ematch/tc_em_cmp.h b/include/uapi/linux/tc_ematch/tc_em_cmp.h similarity index 100% rename from include/linux/tc_ematch/tc_em_cmp.h rename to include/uapi/linux/tc_ematch/tc_em_cmp.h diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/uapi/linux/tc_ematch/tc_em_meta.h similarity index 100% rename from include/linux/tc_ematch/tc_em_meta.h rename to include/uapi/linux/tc_ematch/tc_em_meta.h diff --git a/include/linux/tc_ematch/tc_em_nbyte.h b/include/uapi/linux/tc_ematch/tc_em_nbyte.h similarity index 100% rename from include/linux/tc_ematch/tc_em_nbyte.h rename to include/uapi/linux/tc_ematch/tc_em_nbyte.h diff --git a/include/linux/tcp.h b/include/uapi/linux/tcp.h similarity index 100% rename from include/linux/tcp.h rename to include/uapi/linux/tcp.h diff --git a/include/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h similarity index 100% rename from include/linux/tcp_metrics.h rename to include/uapi/linux/tcp_metrics.h diff --git a/include/linux/tipc.h b/include/uapi/linux/tipc.h similarity index 100% rename from include/linux/tipc.h rename to include/uapi/linux/tipc.h diff --git a/include/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h similarity index 100% rename from include/linux/tipc_netlink.h rename to include/uapi/linux/tipc_netlink.h diff --git a/include/linux/types.h b/include/uapi/linux/types.h similarity index 100% rename from include/linux/types.h rename to include/uapi/linux/types.h diff --git a/include/linux/unix_diag.h b/include/uapi/linux/unix_diag.h similarity index 100% rename from include/linux/unix_diag.h rename to include/uapi/linux/unix_diag.h diff --git a/include/linux/veth.h b/include/uapi/linux/veth.h similarity index 100% rename from include/linux/veth.h rename to include/uapi/linux/veth.h diff --git a/include/linux/xfrm.h b/include/uapi/linux/xfrm.h similarity index 100% rename from include/linux/xfrm.h rename to include/uapi/linux/xfrm.h From 237a52731b98d89723cc38a4570efbaece9108de Mon Sep 17 00:00:00 2001 From: Stephen Hemminger <stephen@networkplumber.org> Date: Wed, 11 Oct 2017 10:47:28 -0700 Subject: [PATCH 18/28] rdma: move headers to uapi And update with version from upstream. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- include/{ => uapi}/rdma/rdma_netlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename include/{ => uapi}/rdma/rdma_netlink.h (98%) diff --git a/include/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h similarity index 98% rename from include/rdma/rdma_netlink.h rename to include/uapi/rdma/rdma_netlink.h index 861440a8..13875a3f 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -1,5 +1,5 @@ -#ifndef _UAPI_RDMA_NETLINK_H -#define _UAPI_RDMA_NETLINK_H +#ifndef _RDMA_NETLINK_H +#define _RDMA_NETLINK_H #include <linux/types.h> @@ -304,4 +304,4 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_MAX }; -#endif /* _UAPI_RDMA_NETLINK_H */ +#endif /* _RDMA_NETLINK_H */ From ecd44e680576c793f2a943d00301215f09882c59 Mon Sep 17 00:00:00 2001 From: Petr Vorel <petr.vorel@gmail.com> Date: Sun, 8 Oct 2017 16:39:16 +0200 Subject: [PATCH 19/28] tests: Remove bashisms (s/source/.) Signed-off-by: Petr Vorel <petr.vorel@gmail.com> --- testsuite/tests/ip/link/new_link.t | 2 +- testsuite/tests/ip/link/show_dev_wo_vf_rate.t | 2 +- testsuite/tests/ip/netns/set_nsid.t | 2 +- testsuite/tests/ip/netns/set_nsid_batch.t | 2 +- testsuite/tests/ip/route/add_default_route.t | 4 ++-- testsuite/tests/ip/tunnel/add_tunnel.t | 2 +- testsuite/tests/tc/cls-testbed.t | 2 +- testsuite/tests/tc/dsmark.t | 2 +- testsuite/tests/tc/pedit.t | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/testsuite/tests/ip/link/new_link.t b/testsuite/tests/ip/link/new_link.t index 699adbcd..c17650a2 100755 --- a/testsuite/tests/ip/link/new_link.t +++ b/testsuite/tests/ip/link/new_link.t @@ -1,6 +1,6 @@ #!/bin/sh -source lib/generic.sh +. lib/generic.sh ts_log "[Testing add/del virtual links]" diff --git a/testsuite/tests/ip/link/show_dev_wo_vf_rate.t b/testsuite/tests/ip/link/show_dev_wo_vf_rate.t index a600ba65..5b3c004e 100755 --- a/testsuite/tests/ip/link/show_dev_wo_vf_rate.t +++ b/testsuite/tests/ip/link/show_dev_wo_vf_rate.t @@ -1,6 +1,6 @@ #!/bin/sh -source lib/generic.sh +. lib/generic.sh NL_FILE="tests/ip/link/dev_wo_vf_rate.nl" ts_ip "$0" "Show VF devices w/o VF rate info" -d monitor file $NL_FILE diff --git a/testsuite/tests/ip/netns/set_nsid.t b/testsuite/tests/ip/netns/set_nsid.t index 606d45ab..8f8c7792 100755 --- a/testsuite/tests/ip/netns/set_nsid.t +++ b/testsuite/tests/ip/netns/set_nsid.t @@ -1,6 +1,6 @@ #!/bin/sh -source lib/generic.sh +. lib/generic.sh ts_log "[Testing netns nsid]" diff --git a/testsuite/tests/ip/netns/set_nsid_batch.t b/testsuite/tests/ip/netns/set_nsid_batch.t index abb3f1bb..196fd4b3 100755 --- a/testsuite/tests/ip/netns/set_nsid_batch.t +++ b/testsuite/tests/ip/netns/set_nsid_batch.t @@ -1,6 +1,6 @@ #!/bin/sh -source lib/generic.sh +. lib/generic.sh ts_log "[Testing netns nsid in batch mode]" diff --git a/testsuite/tests/ip/route/add_default_route.t b/testsuite/tests/ip/route/add_default_route.t index e5ea6473..0b566f1f 100755 --- a/testsuite/tests/ip/route/add_default_route.t +++ b/testsuite/tests/ip/route/add_default_route.t @@ -1,6 +1,6 @@ -#!/bin/sh +#!/bin/bash -source lib/generic.sh +. lib/generic.sh ts_log "[Testing add default route]" diff --git a/testsuite/tests/ip/tunnel/add_tunnel.t b/testsuite/tests/ip/tunnel/add_tunnel.t index 18f6e370..3f5a9d3c 100755 --- a/testsuite/tests/ip/tunnel/add_tunnel.t +++ b/testsuite/tests/ip/tunnel/add_tunnel.t @@ -1,6 +1,6 @@ #!/bin/sh -source lib/generic.sh +. lib/generic.sh TUNNEL_NAME="tunnel_test_ip" diff --git a/testsuite/tests/tc/cls-testbed.t b/testsuite/tests/tc/cls-testbed.t index 2afc26fc..d5c21e5c 100755 --- a/testsuite/tests/tc/cls-testbed.t +++ b/testsuite/tests/tc/cls-testbed.t @@ -1,7 +1,7 @@ #!/bin/bash # vim: ft=sh -source lib/generic.sh +. lib/generic.sh QDISCS="cbq htb dsmark" diff --git a/testsuite/tests/tc/dsmark.t b/testsuite/tests/tc/dsmark.t index 6934165e..177585e6 100755 --- a/testsuite/tests/tc/dsmark.t +++ b/testsuite/tests/tc/dsmark.t @@ -1,7 +1,7 @@ #!/bin/bash # vim: ft=sh -source lib/generic.sh +. lib/generic.sh ts_qdisc_available "dsmark" if [ $? -eq 0 ]; then diff --git a/testsuite/tests/tc/pedit.t b/testsuite/tests/tc/pedit.t index e9b6c333..8d531a05 100755 --- a/testsuite/tests/tc/pedit.t +++ b/testsuite/tests/tc/pedit.t @@ -1,6 +1,6 @@ #!/bin/sh -source lib/generic.sh +. lib/generic.sh DEV="$(rand_dev)" ts_ip "$0" "Add $DEV dummy interface" link add dev $DEV type dummy From 4f6b73380d2c84b98b7beffe6ca73f635ab72a06 Mon Sep 17 00:00:00 2001 From: Baruch Siach <baruch@tkos.co.il> Date: Mon, 9 Oct 2017 08:49:44 +0300 Subject: [PATCH 20/28] lib: fix multiple strlcpy definition Some C libraries, like uClibc and musl, provide BSD compatible strlcpy(). Add check_strlcpy() to configure, and avoid defining strlcpy and strlcat when the C library provides them. This fixes the following static link error with uClibc-ng: .../sysroot/usr/lib/libc.a(strlcpy.os): In function `strlcpy': strlcpy.c:(.text+0x0): multiple definition of `strlcpy' ../lib/libutil.a(utils.o):utils.c:(.text+0x1ddc): first defined here collect2: error: ld returned 1 exit status Acked-by: Phil Sutter <phil@nwl.cc> Signed-off-by: Baruch Siach <baruch@tkos.co.il> --- configure | 24 ++++++++++++++++++++++++ lib/utils.c | 2 ++ 2 files changed, 26 insertions(+) diff --git a/configure b/configure index 7be8fb11..f0668ab3 100755 --- a/configure +++ b/configure @@ -326,6 +326,27 @@ EOF rm -f $TMPDIR/dbtest.c $TMPDIR/dbtest } +check_strlcpy() +{ + cat >$TMPDIR/strtest.c <<EOF +#include <string.h> +int main(int argc, char **argv) { + char dst[10]; + strlcpy(dst, "test", sizeof(dst)); + return 0; +} +EOF + $CC -I$INCLUDE -o $TMPDIR/strtest $TMPDIR/strtest.c >/dev/null 2>&1 + if [ $? -eq 0 ] + then + echo "no" + else + echo 'CFLAGS += -DNEED_STRLCPY' >>$CONFIG + echo "yes" + fi + rm -f $TMPDIR/strtest.c $TMPDIR/strtest +} + quiet_config() { cat <<EOF @@ -397,6 +418,9 @@ check_mnl echo -n "Berkeley DB: " check_berkeley_db +echo -n "need for strlcpy: " +check_strlcpy + echo echo -n "docs:" check_docs diff --git a/lib/utils.c b/lib/utils.c index 0cf99619..632fd0dd 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -1260,6 +1260,7 @@ int get_real_family(int rtm_type, int rtm_family) return rtm_family; } +#ifdef NEED_STRLCPY size_t strlcpy(char *dst, const char *src, size_t size) { size_t srclen = strlen(src); @@ -1282,3 +1283,4 @@ size_t strlcat(char *dst, const char *src, size_t size) return dlen + strlcpy(dst + dlen, src, size - dlen); } +#endif From 7c72df5a95174c21bdc4ca09cd15a88c0c10e568 Mon Sep 17 00:00:00 2001 From: Ivan Delalande <colona@arista.com> Date: Fri, 6 Oct 2017 16:48:19 -0700 Subject: [PATCH 21/28] utils: add print_escape_buf to format and print arbitrary bytes Keep it as simple as possible for now: just escape anything that is not isprint-able, is among the "escape" parameter or '\' as an octal escape sequence. This should be pretty easy to extend if any other user needs something more complex in the future. Signed-off-by: Ivan Delalande <colona@arista.com> --- include/utils.h | 2 ++ lib/utils.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/utils.h b/include/utils.h index 76addb32..3d91c50d 100644 --- a/include/utils.h +++ b/include/utils.h @@ -195,6 +195,8 @@ static inline void __jiffies_to_tv(struct timeval *tv, unsigned long jiffies) tv->tv_usec = tvusec - 1000000 * tv->tv_sec; } +void print_escape_buf(const __u8 *buf, size_t len, const char *escape); + int print_timestamp(FILE *fp); void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n); diff --git a/lib/utils.c b/lib/utils.c index 632fd0dd..ac155bf5 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -31,6 +31,7 @@ #include <time.h> #include <sys/time.h> #include <errno.h> +#include <ctype.h> #include "rt_names.h" #include "utils.h" @@ -1047,6 +1048,20 @@ int addr64_n2a(__u64 addr, char *buff, size_t len) return written; } +/* Print buffer and escape bytes that are !isprint or among 'escape' */ +void print_escape_buf(const __u8 *buf, size_t len, const char *escape) +{ + size_t i; + + for (i = 0; i < len; ++i) { + if (isprint(buf[i]) && buf[i] != '\\' && + !strchr(escape, buf[i])) + printf("%c", buf[i]); + else + printf("\\%03o", buf[i]); + } +} + int print_timestamp(FILE *fp) { struct timeval tv; From da9cc6ab90186e5c1b6dd1d194c18c967e1a7869 Mon Sep 17 00:00:00 2001 From: Ivan Delalande <colona@arista.com> Date: Fri, 6 Oct 2017 16:48:20 -0700 Subject: [PATCH 22/28] ss: print MD5 signature keys configured on TCP sockets These keys are reported by kernel 4.14 and later under the INET_DIAG_MD5SIG attribute, when INET_DIAG_INFO is requested (ss -i) and we have CAP_NET_ADMIN. The additional output looks like: md5keys:fe80::/64=signing_key,10.1.2.0/24=foobar,::1/128=Test Signed-off-by: Ivan Delalande <colona@arista.com> --- misc/ss.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/misc/ss.c b/misc/ss.c index dd8dfaa4..09bff8a7 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -2153,6 +2153,16 @@ static void print_skmeminfo(struct rtattr *tb[], int attrtype) printf(")"); } +static void print_md5sig(struct tcp_diag_md5sig *sig) +{ + printf("%s/%d=", + format_host(sig->tcpm_family, + sig->tcpm_family == AF_INET6 ? 16 : 4, + &sig->tcpm_addr), + sig->tcpm_prefixlen); + print_escape_buf(sig->tcpm_key, sig->tcpm_keylen, " ,"); +} + #define TCPI_HAS_OPT(info, opt) !!(info->tcpi_options & (opt)) static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, @@ -2289,6 +2299,17 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, free(s.dctcp); free(s.bbr_info); } + if (tb[INET_DIAG_MD5SIG]) { + struct tcp_diag_md5sig *sig = RTA_DATA(tb[INET_DIAG_MD5SIG]); + int len = RTA_PAYLOAD(tb[INET_DIAG_MD5SIG]); + + printf(" md5keys:"); + print_md5sig(sig++); + for (len -= sizeof(*sig); len > 0; len -= sizeof(*sig)) { + printf(","); + print_md5sig(sig++); + } + } } static const char *format_host_sa(struct sockaddr_storage *sa) From 268a9eee985f3e1d3ef1499b5d91de81f0d1931b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger <stephen@networkplumber.org> Date: Wed, 11 Oct 2017 18:08:15 -0700 Subject: [PATCH 23/28] netem: fix code indentation Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- tc/q_netem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tc/q_netem.c b/tc/q_netem.c index 5a9e7474..cdaddce9 100644 --- a/tc/q_netem.c +++ b/tc/q_netem.c @@ -231,7 +231,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, if (!strcmp(*argv, "random")) { NEXT_ARG(); -random_loss_model: + random_loss_model: if (get_percent(&opt.loss, *argv)) { explain1("loss percent"); return -1; @@ -338,7 +338,7 @@ random_loss_model: return -1; } } else if (matches(*argv, "ecn") == 0) { - present[TCA_NETEM_ECN] = 1; + present[TCA_NETEM_ECN] = 1; } else if (matches(*argv, "reorder") == 0) { NEXT_ARG(); present[TCA_NETEM_REORDER] = 1; @@ -469,7 +469,7 @@ random_loss_model: if (present[TCA_NETEM_CORR] && addattr_l(n, 1024, TCA_NETEM_CORR, &cor, sizeof(cor)) < 0) - return -1; + return -1; if (present[TCA_NETEM_REORDER] && addattr_l(n, 1024, TCA_NETEM_REORDER, &reorder, sizeof(reorder)) < 0) @@ -478,7 +478,7 @@ random_loss_model: if (present[TCA_NETEM_ECN] && addattr_l(n, 1024, TCA_NETEM_ECN, &present[TCA_NETEM_ECN], sizeof(present[TCA_NETEM_ECN])) < 0) - return -1; + return -1; if (present[TCA_NETEM_CORRUPT] && addattr_l(n, 1024, TCA_NETEM_CORRUPT, &corrupt, sizeof(corrupt)) < 0) @@ -491,11 +491,11 @@ random_loss_model: if (loss_type == NETEM_LOSS_GI) { if (addattr_l(n, 1024, NETEM_LOSS_GI, &gimodel, sizeof(gimodel)) < 0) - return -1; + return -1; } else if (loss_type == NETEM_LOSS_GE) { if (addattr_l(n, 1024, NETEM_LOSS_GE, &gemodel, sizeof(gemodel)) < 0) - return -1; + return -1; } else { fprintf(stderr, "loss in the weeds!\n"); return -1; From f1241a7e3b800d8325eb710744289b10f92ef12d Mon Sep 17 00:00:00 2001 From: Petr Vorel <petr.vorel@gmail.com> Date: Sun, 15 Oct 2017 11:59:45 +0200 Subject: [PATCH 24/28] tests: Revert back /bin/sh in shebang This was added by mistake in commit ecd44e68 ("tests: Remove bashisms (s/source/.)") Signed-off-by: Petr Vorel <petr.vorel@gmail.com> --- testsuite/tests/ip/route/add_default_route.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/tests/ip/route/add_default_route.t b/testsuite/tests/ip/route/add_default_route.t index 0b566f1f..569ba1f8 100755 --- a/testsuite/tests/ip/route/add_default_route.t +++ b/testsuite/tests/ip/route/add_default_route.t @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh . lib/generic.sh From e6849a5722dc248a1fe0519c97094000ded3849c Mon Sep 17 00:00:00 2001 From: Petr Vorel <petr.vorel@gmail.com> Date: Fri, 13 Oct 2017 15:57:16 +0200 Subject: [PATCH 25/28] color: Fix ip segfault when using --color switch Commit d0e72011 ("ip: ipaddress.c: add support for json output") introduced passing -1 as enum color_attr. This is not only wrong as no color_attr has value -1, but also causes another segfault in color_fprintf() on this setup as there is no item with index -1 in array of enum attr_colors[]. Using COLOR_CLEAR is valid option. Reproduce with: $ COLORFGBG='0;15' ip -c a NOTE: COLORFGBG is environmental variable used for defining whether user has light or dark background. COLORFGBG="0;15" is used to ask for color set suitable for light background, COLORFGBG="15;0" is used to ask for color set suitable for dark background. Signed-off-by: Petr Vorel <petr.vorel@gmail.com> --- include/json_print.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/json_print.h b/include/json_print.h index b6ce1f9f..596af35a 100644 --- a/include/json_print.h +++ b/include/json_print.h @@ -53,7 +53,7 @@ void close_json_array(enum output_type type, const char *delim); const char *fmt, \ type value) \ { \ - print_color_##type_name(t, -1, key, fmt, value); \ + print_color_##type_name(t, COLOR_CLEAR, key, fmt, value); \ } _PRINT_FUNC(int, int); _PRINT_FUNC(bool, bool); From 24b058a2a4f5248becc3c148637a3644d11a65a9 Mon Sep 17 00:00:00 2001 From: Petr Vorel <petr.vorel@gmail.com> Date: Fri, 13 Oct 2017 15:57:17 +0200 Subject: [PATCH 26/28] color: Fix another ip segfault when using --color switch Commit 959f1428 ("color: add new COLOR_NONE and disable_color function") introducing color enum COLOR_NONE, which is not only duplicite of COLOR_CLEAR, but also caused segfault, when running ip with --color switch, as 'attr + 8' in color_fprintf() access array item out of bounds. Thus removing it and restoring "magic" offset + 7. Reproduce with: $ ip -c a Signed-off-by: Petr Vorel <petr.vorel@gmail.com> --- include/color.h | 1 - lib/color.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/color.h b/include/color.h index 1cd6f7d2..c183ef79 100644 --- a/include/color.h +++ b/include/color.h @@ -2,7 +2,6 @@ #define __COLOR_H__ 1 enum color_attr { - COLOR_NONE, COLOR_IFNAME, COLOR_MAC, COLOR_INET, diff --git a/lib/color.c b/lib/color.c index 79d5e289..05afcb21 100644 --- a/lib/color.c +++ b/lib/color.c @@ -104,13 +104,13 @@ int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...) va_start(args, fmt); - if (!color_is_enabled || attr == COLOR_NONE) { + if (!color_is_enabled || attr == COLOR_CLEAR) { ret = vfprintf(fp, fmt, args); goto end; } ret += fprintf(fp, "%s", - color_codes[attr_colors[is_dark_bg ? attr + 8 : attr]]); + color_codes[attr_colors[is_dark_bg ? attr + 7 : attr]]); ret += vfprintf(fp, fmt, args); ret += fprintf(fp, "%s", color_codes[C_CLEAR]); From 99b89c518e929e32d9b1e9e5623550018f22b552 Mon Sep 17 00:00:00 2001 From: Petr Vorel <petr.vorel@gmail.com> Date: Fri, 13 Oct 2017 15:57:18 +0200 Subject: [PATCH 27/28] color: Cleanup code to remove "magic" offset + 7 Signed-off-by: Petr Vorel <petr.vorel@gmail.com> --- lib/color.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/color.c b/lib/color.c index 05afcb21..497f5e1b 100644 --- a/lib/color.c +++ b/lib/color.c @@ -45,8 +45,8 @@ static const char * const color_codes[] = { NULL, }; -static enum color attr_colors[] = { - /* light background */ +/* light background */ +static enum color attr_colors_light[] = { C_CYAN, C_YELLOW, C_MAGENTA, @@ -54,8 +54,10 @@ static enum color attr_colors[] = { C_GREEN, C_RED, C_CLEAR, +}; - /* dark background */ +/* dark background */ +static enum color attr_colors_dark[] = { C_BOLD_CYAN, C_BOLD_YELLOW, C_BOLD_MAGENTA, @@ -109,8 +111,9 @@ int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...) goto end; } - ret += fprintf(fp, "%s", - color_codes[attr_colors[is_dark_bg ? attr + 7 : attr]]); + ret += fprintf(fp, "%s", color_codes[is_dark_bg ? + attr_colors_dark[attr] : attr_colors_light[attr]]); + ret += vfprintf(fp, fmt, args); ret += fprintf(fp, "%s", color_codes[C_CLEAR]); From 4b73d52f8a81919f511cd47d39251f74f6a37c7d Mon Sep 17 00:00:00 2001 From: Petr Vorel <petr.vorel@gmail.com> Date: Fri, 13 Oct 2017 15:57:19 +0200 Subject: [PATCH 28/28] color: Rename enum COLOR_NONE is more descriptive than COLOR_CLEAR. Signed-off-by: Petr Vorel <petr.vorel@gmail.com> --- include/color.h | 2 +- include/json_print.h | 2 +- lib/color.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/color.h b/include/color.h index c183ef79..7fd685d0 100644 --- a/include/color.h +++ b/include/color.h @@ -8,7 +8,7 @@ enum color_attr { COLOR_INET6, COLOR_OPERSTATE_UP, COLOR_OPERSTATE_DOWN, - COLOR_CLEAR + COLOR_NONE }; void enable_color(void); diff --git a/include/json_print.h b/include/json_print.h index 596af35a..dc4d2bb3 100644 --- a/include/json_print.h +++ b/include/json_print.h @@ -53,7 +53,7 @@ void close_json_array(enum output_type type, const char *delim); const char *fmt, \ type value) \ { \ - print_color_##type_name(t, COLOR_CLEAR, key, fmt, value); \ + print_color_##type_name(t, COLOR_NONE, key, fmt, value); \ } _PRINT_FUNC(int, int); _PRINT_FUNC(bool, bool); diff --git a/lib/color.c b/lib/color.c index 497f5e1b..8d049a01 100644 --- a/lib/color.c +++ b/lib/color.c @@ -106,7 +106,7 @@ int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...) va_start(args, fmt); - if (!color_is_enabled || attr == COLOR_CLEAR) { + if (!color_is_enabled || attr == COLOR_NONE) { ret = vfprintf(fp, fmt, args); goto end; } @@ -130,7 +130,7 @@ enum color_attr ifa_family_color(__u8 ifa_family) case AF_INET6: return COLOR_INET6; default: - return COLOR_CLEAR; + return COLOR_NONE; } } @@ -142,6 +142,6 @@ enum color_attr oper_state_color(__u8 state) case IF_OPER_DOWN: return COLOR_OPERSTATE_DOWN; default: - return COLOR_CLEAR; + return COLOR_NONE; } }