tc: flower: Fix buffer overflow on large labels

Buffer is 64bytes, but label printing can take 66bytes printing in hex, and will overflow when setting the string delimiter ('\0'). Fix that by increasing the print buffer size. Example of overflowing ct_label: ct_label 11111111111111111111111111111111/11111111111111111111111111111111 Fixes: 2fffb1c030 ("tc: flower: Add matching on conntrack info") Signed-off-by: Paul Blakey <paulb@nvidia.com> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
uapi: update to if_ether.h
2021-12-06 13:44:50 -08:00 · 2021-12-03 12:20:02 -08:00 · 2021-11-18 15:01:48 -08:00 · 2021-11-18 15:01:04 -08:00 · 2021-11-18 10:00:19 -08:00 · 2021-11-18 09:56:57 -08:00
678 changed files with 79121 additions and 29680 deletions
--- a/.clang-format
+++ b/.clang-format
@ -0,0 +1,130 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-format configuration file. Intended for clang-format >= 4.
+#
+# For more information, see:
+#
+#   Documentation/process/clang-format.rst
+#   https://clang.llvm.org/docs/ClangFormat.html
+#   https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+#
+---
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+#AlignEscapedNewlines: Left # Unknown to clang-format-4.0
+AlignOperands: true
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: false
+  AfterStruct: false
+  AfterUnion: false
+  #AfterExternBlock: false # Unknown to clang-format-5.0
+  BeforeCatch: false
+  BeforeElse: false
+  IndentBraces: false
+  #SplitEmptyFunction: true # Unknown to clang-format-4.0
+  #SplitEmptyRecord: true # Unknown to clang-format-4.0
+  #SplitEmptyNamespace: true # Unknown to clang-format-4.0
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: false
+#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+#CompactNamespaces: false # Unknown to clang-format-4.0
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 8
+ContinuationIndentWidth: 8
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+#FixNamespaceComments: false # Unknown to clang-format-4.0
+
+# Taken from:
+#   git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \
+#   | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$,  - '\1'," \
+#   | sort | uniq
+ForEachMacros:
+  - 'list_for_each_entry'
+  - 'list_for_each_entry_safe'
+  - 'mnl_attr_for_each_nested'
+  - 'hlist_for_each'
+  - 'hlist_for_each_safe'
+  - 'hlist_for_each_entry'
+
+#IncludeBlocks: Preserve # Unknown to clang-format-5.0
+IncludeCategories:
+  - Regex: '.*'
+    Priority: 1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+#IndentPPDirectives: None # Unknown to clang-format-5.0
+IndentWidth: 8
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: Inner
+#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0
+ObjCBlockIndentWidth: 8
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+
+# Taken from git's rules
+#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+PointerAlignment: Right
+ReflowComments: false
+SortIncludes: false
+#SortUsingDeclarations: false # Unknown to clang-format-4.0
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0
+#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0
+SpaceBeforeParens: ControlStatements
+#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+TabWidth: 8
+UseTab: Always
+...
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
+# locally generated
+Config
 static-syms.h
 config.*
-Config
 *.o
 *.a
 *.so
@ -38,13 +39,3 @@ testsuite/results
 testsuite/iproute2/iproute2-this
 testsuite/tools/generate_nlmsg
 testsuite/tests/ip/link/dev_wo_vf_rate.nl
-
-# doc files generated at runtime
-doc/*.aux
-doc/*.log
-doc/*.toc
-doc/*.ps
-doc/*.dvi
-doc/*.html
-doc/*.pdf
-doc/*.out
--- a/.mailmap
+++ b/.mailmap
@ -0,0 +1,22 @@
+#
+# This list is used by git-shortlog to fix a few botched name translations
+# in the git archive, either because the author's full name was messed up
+# and/or not always written the same way, making contributions from the
+# same person appearing not to be so or badly displayed.
+#
+# Format
+#  Full name <goodaddress> <badaddress> 
+Steve Wise <larrystevenwise@gmail.com> <swise@opengridcomputing.com>
+Steve Wise <larrystevenwise@gmail.com> <swise@chelsio.com>
+
+Stephen Hemminger <stephen@networkplumber.org> <sthemmin@microsoft.com>
+Stephen Hemminger <stephen@networkplumber.org> <shemming@brocade.com>
+Stephen Hemminger <stephen@networkplumber.org> <stephen.hemminger@vyatta.com>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger@vyatta.com>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
+Stephen Hemminger <stephen@networkplumber.org> <osdl.org!shemminger>
+Stephen Hemminger <stephen@networkplumber.org> <osdl.net!shemminger>
+
+David Ahern <dsahern@gmail.com> <dsa@cumulusnetworks.com>
--- a/96
+++ b/96
@ -1,13 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
 # Top level Makefile for iproute2

+-include config.mk
+
+ifeq ("$(origin V)", "command line")
+VERBOSE = $(V)
+endif
+ifndef VERBOSE
+VERBOSE = 0
+endif
+
 ifeq ($(VERBOSE),0)
 MAKEFLAGS += --no-print-directory
 endif

 PREFIX?=/usr
-LIBDIR?=$(PREFIX)/lib
 SBINDIR?=/sbin
 CONFDIR?=/etc/iproute2
+NETNS_RUN_DIR?=/var/run/netns
+NETNS_ETC_DIR?=/etc/netns
 DATADIR?=$(PREFIX)/share
 HDRDIR?=$(PREFIX)/include/iproute2
 DOCDIR?=$(DATADIR)/doc/iproute2
@ -26,74 +37,101 @@ ifneq ($(SHARED_LIBS),y)
 DEFINES+= -DNO_SHARED_LIBS
 endif

-DEFINES+=-DCONFDIR=\"$(CONFDIR)\"
+DEFINES+=-DCONFDIR=\"$(CONFDIR)\" \
+         -DNETNS_RUN_DIR=\"$(NETNS_RUN_DIR)\" \
+         -DNETNS_ETC_DIR=\"$(NETNS_ETC_DIR)\"

-#options for decnet
-ADDLIB+=dnet_ntop.o dnet_pton.o
+#options for AX.25
+ADDLIB+=ax25_ntop.o

-#options for ipx
-ADDLIB+=ipx_ntop.o ipx_pton.o
+#options for AX.25
+ADDLIB+=rose_ntop.o

 #options for mpls
 ADDLIB+=mpls_ntop.o mpls_pton.o

+#options for NETROM
+ADDLIB+=netrom_ntop.o
+
 CC := gcc
 HOSTCC ?= $(CC)
 DEFINES += -D_GNU_SOURCE
 # Turn on transparent support for LFS
 DEFINES += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-CCOPTS = -O2
+CCOPTS = -O2 -pipe
 WFLAGS := -Wall -Wstrict-prototypes  -Wmissing-prototypes
 WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2

-CFLAGS := $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) $(CFLAGS)
+CFLAGS := $(WFLAGS) $(CCOPTS) -I../include -I../include/uapi $(DEFINES) $(CFLAGS)
 YACCFLAGS = -d -t -v

-SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma man
+SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma dcb man vdpa

-LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
+LIBNETLINK=../lib/libutil.a ../lib/libnetlink.a
 LDLIBS += $(LIBNETLINK)

-all: Config
+all: config.mk
 	@set -e; \
 	for i in $(SUBDIRS); \
-	do echo; echo $$i; $(MAKE) $(MFLAGS) -C $$i; done
+	do echo; echo $$i; $(MAKE) -C $$i; done

-Config:
-	sh configure $(KERNEL_INCLUDE)
+.PHONY: clean clobber distclean check cscope version
+
+help:
+	@echo "Make Targets:"
+	@echo " all                 - build binaries"
+	@echo " clean               - remove products of build"
+	@echo " distclean           - remove configuration and build"
+	@echo " install             - install binaries on local machine"
+	@echo " check               - run tests"
+	@echo " cscope              - build cscope database"
+	@echo " version             - update version"
+	@echo ""
+	@echo "Make Arguments:"
+	@echo " V=[0|1]             - set build verbosity level"
+
+config.mk:
+	@if [ ! -f config.mk -o configure -nt config.mk ]; then \
+		sh configure $(KERNEL_INCLUDE); \
+	fi

 install: all
 	install -m 0755 -d $(DESTDIR)$(SBINDIR)
 	install -m 0755 -d $(DESTDIR)$(CONFDIR)
 	install -m 0755 -d $(DESTDIR)$(ARPDDIR)
 	install -m 0755 -d $(DESTDIR)$(HDRDIR)
-	install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples
-	install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv
-	install -m 0644 README.iproute2+tc $(shell find examples -maxdepth 1 -type f) \
-		$(DESTDIR)$(DOCDIR)/examples
-	install -m 0644 $(shell find examples/diffserv -maxdepth 1 -type f) \
-		$(DESTDIR)$(DOCDIR)/examples/diffserv
-	@for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done
+	@for i in $(SUBDIRS);  do $(MAKE) -C $$i install; done
 	install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
 	install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR)
 	install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR)
+	install -m 0644 bash-completion/devlink $(DESTDIR)$(BASH_COMPDIR)
 	install -m 0644 include/bpf_elf.h $(DESTDIR)$(HDRDIR)

-snapshot:
-	echo "static const char SNAPSHOT[] = \""`date +%y%m%d`"\";" \
-		> include/SNAPSHOT.h
+version:
+	echo "static const char version[] = \""`git describe --tags --long`"\";" \
+		> include/version.h

 clean:
-	@for i in $(SUBDIRS) doc; \
-	do $(MAKE) $(MFLAGS) -C $$i clean; done
+	@for i in $(SUBDIRS) testsuite; \
+	do $(MAKE) -C $$i clean; done

 clobber:
-	touch Config
-	$(MAKE) $(MFLAGS) clean
-	rm -f Config cscope.*
+	touch config.mk
+	$(MAKE) clean
+	rm -f config.mk cscope.*

 distclean: clobber

+check: all
+	$(MAKE) -C testsuite
+	$(MAKE) -C testsuite alltests
+	@if command -v man >/dev/null 2>&1; then \
+		echo "Checking manpages for syntax errors..."; \
+		$(MAKE) -C man check; \
+	else \
+		echo "man not installed, skipping checks for syntax errors."; \
+	fi
+
 cscope:
 	cscope -b -q -R -Iinclude -sip -slib -smisc -snetem -stc

--- a/33
+++ b/33
@ -1,40 +1,39 @@
 This is a set of utilities for Linux networking.

 Information:
-    http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2
+    https://wiki.linuxfoundation.org/networking/iproute2

 Download:
    http://www.kernel.org/pub/linux/utils/net/iproute2/

-Repository:
-    git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git
+Stable version repository:
+    git://git.kernel.org/pub/scm/network/iproute2/iproute2.git
+
+Development repository:
+    git://git.kernel.org/pub/scm/network/iproute2/iproute2-next.git

 How to compile this.
 --------------------
 1. libdbm

-arpd needs to have the db4 development libraries. For Debian
-users this is the package with a name like libdb4.x-dev.
+arpd needs to have the berkeleydb development libraries. For Debian
+users this is the package with a name like libdbX.X-dev.
 DBM_INCLUDE points to the directory with db_185.h which
 is the include file used by arpd to get to the old format Berkeley
 database routines.  Often this is in the db-devel package.

 2. make

-The makefile will automatically build a Config file which
-contains whether or not ATM is available, etc.
+The makefile will automatically build a config.mk file which
+contains definitions of libraries that may or may not be available
+on the system such as: ATM, ELF, MNL, and SELINUX.

-3. To make documentation, cd to doc/ directory , then
-   look at start of Makefile and set correct values for
-   PAGESIZE=a4		, ie: a4 , letter ...	(string)
-   PAGESPERPAGE=2	, ie: 1 , 2 ...		(numeric)
-   and make there. It assumes, that latex, dvips and psnup
-   are in your path.
+3. include/uapi

-4. This package includes matching sanitized kernel headers because
-   the build environment may not have up to date versions. See Makefile
-   if you have special requirements and need to point at different
-   kernel include files.
+This package includes matching sanitized kernel headers because
+the build environment may not have up to date versions. See Makefile
+if you have special requirements and need to point at different
+kernel include files.

 Stephen Hemminger
 stephen@networkplumber.org
--- a/README.decnet
+++ b/README.decnet
@ -1,33 +0,0 @@
-
-Here are a few quick points about DECnet support...
-
- o iproute2 is the tool of choice for configuring the DECnet support for
-   Linux. For many features, it is the only tool which can be used to
-   configure them.
-
- o No name resolution is available as yet, all addresses must be
-   entered numerically.
-
- o Remember to set the hardware address of the interface using: 
-
-   ip link set ethX address xx:xx:xx:xx:xx:xx
-      (where xx:xx:xx:xx:xx:xx is the MAC address for your DECnet node
-       address)
-
-   if your Ethernet card won't listen to more than one unicast
-   mac address at once. If the Linux DECnet stack doesn't talk to
-   any other DECnet nodes, then check this with tcpdump and if its
-   a problem, change the mac address (but do this _before_ starting
-   any other network protocol on the interface)
-
- o Whilst you can use ip addr add to add more than one DECnet address to an
-   interface, don't expect addresses which are not the same as the
-   kernels node address to work properly with 2.4 kernels. This should
-   be fine with 2.6 kernels as the routing code has been extensively
-   modified and improved.
-
- o The DECnet support is currently self contained. It does not depend on
-   the libdnet library.
-
-Steve Whitehouse <steve@chygwyn.com>
-
--- a/README.devel
+++ b/README.devel
@ -4,12 +4,15 @@ development. Most new features require a kernel and a utility component.
 Please submit both to the Linux networking mailing list
   <netdev@vger.kernel.org>

-The current source is in the git repository:
-    git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git
+The current source for the stable version is in the git repository:
+    git://git.kernel.org/pub/scm/network/iproute2/iproute2.git

-The master branch contains the source corresponding to the current
-code in the mainline Linux kernel (ie follows Linus). The net-next
-branch is a temporary branch that tracks the code intended for the
-next release; it corresponds with networking development branch in
-the kernel.
+The development git repository is available at the following address:
+    git://git.kernel.org/pub/scm/network/iproute2/iproute2-next.git

+The stable repository contains the source corresponding to the
+current code in the Linux networking tree (net), which in turn is
+aligned on the mainline Linux kernel (ie follows Linus).
+The iproute2-next repository tracks the code intended for the next
+release; it corresponds with networking development tree (net-next)
+in the kernel.
--- a/README.distribution
+++ b/README.distribution
@ -1,95 +0,0 @@
-I. About the distribution tables
-
-The table used for "synthesizing" the distribution is essentially a scaled,
-translated, inverse to the cumulative distribution function.
-
-Here's how to think about it: Let F() be the cumulative distribution
-function for a probability distribution X.  We'll assume we've scaled
-things so that X has mean 0 and standard deviation 1, though that's not
-so important here.  Then:
-
-	F(x) = P(X <= x) = \int_{-inf}^x f
-
-where f is the probability density function.
-
-F is monotonically increasing, so has an inverse function G, with range
-0 to 1.  Here, G(t) = the x such that P(X <= x) = t.  (In general, G may
-have singularities if X has point masses, i.e., points x such that
-P(X = x) > 0.)
-
-Now we create a tabular representation of G as follows:  Choose some table
-size N, and for the ith entry, put in G(i/N).  Let's call this table T.
-
-The claim now is, I can create a (discrete) random variable Y whose
-distribution has the same approximate "shape" as X, simply by letting
-Y = T(U), where U is a discrete uniform random variable with range 1 to N.
-To see this, it's enough to show that Y's cumulative distribution function,
-(let's call it H), is a discrete approximation to F.  But
-
-	H(x) = P(Y <= x)
-	     = (# of entries in T <= x) / N   -- as Y chosen uniformly from T
-	     = i/N, where i is the largest integer such that G(i/N) <= x
-	     = i/N, where i is the largest integer such that i/N <= F(x)
-	     		-- since G and F are inverse functions (and F is
-	     		   increasing)
-	     = floor(N*F(x))/N
-
-as desired.
-
-II. How to create distribution tables (in theory)
-
-How can we create this table in practice? In some cases, F may have a
-simple expression which allows evaluating its inverse directly.  The
-Pareto distribution is one example of this.  In other cases, and
-especially for matching an experimentally observed distribution, it's
-easiest simply to create a table for F and "invert" it.  Here, we give
-a concrete example, namely how the new "experimental" distribution was
-created.
-
-1. Collect enough data points to characterize the distribution.  Here, I
-collected 25,000 "ping" roundtrip times to a "distant" point (time.nist.gov).
-That's far more data than is really necessary, but it was fairly painless to
-collect it, so...
-
-2. Normalize the data so that it has mean 0 and standard deviation 1.
-
-3. Determine the cumulative distribution.  The code I wrote creates a table
-covering the range -10 to +10, with granularity .00005.  Obviously, this
-is absurdly over-precise, but since it's a one-time only computation, I
-figured it hardly mattered.
-
-4. Invert the table: for each table entry F(x) = y, make the y*TABLESIZE
-(here, 4096) entry be x*TABLEFACTOR (here, 8192).  This creates a table
-for the ("normalized") inverse of size TABLESIZE, covering its domain 0
-to 1 with granularity 1/TABLESIZE.  Note that even with the granularity
-used in creating the table for F, it's possible not all the entries in
-the table for G will be filled in.  So, make a pass through the
-inverse's table, filling in any missing entries by linear interpolation.
-
-III. How to create distribution tables (in practice)
-
-If you want to do all this yourself, I've provided several tools to help:
-
-1. maketable does the steps 2-4 above, and then generates the appropriate
-header file.  So if you have your own time distribution, you can generate
-the header simply by:
-
-	maketable < time.values > header.h
-
-2. As explained in the other README file, the somewhat sleazy way I have
-of generating correlated values needs correction.  You can generate your
-own correction tables by compiling makesigtable and makemutable with
-your header file.  Check the Makefile to see how this is done.
-
-3. Warning: maketable, makesigtable and especially makemutable do
-enormous amounts of floating point arithmetic.  Don't try running
-these on an old 486.  (NIST Net itself will run fine on such a
-system, since in operation, it just needs to do a few simple integral
-calculations.  But getting there takes some work.)
-
-4. The tables produced are all normalized for mean 0 and standard
-deviation 1.  How do you know what values to use for real?  Here, I've
-provided a simple "stats" utility.  Give it a series of floating point
-values, and it will return their mean (mu), standard deviation (sigma),
-and correlation coefficient (rho).  You can then plug these values
-directly into NIST Net.
--- a/README.iproute2+tc
+++ b/README.iproute2+tc
@ -1,123 +0,0 @@
-iproute2+tc*
-
-It's the first release of Linux traffic control engine.
-
-
-NOTES.
-* csz scheduler is inoperational at the moment, and probably
-  never will be repaired but replaced with h-pfq scheduler.
-* To use "fw" classifier you will need ipfwchains patch.
-* No manual available. Ask me, if you have problems (only try to guess
-  answer yourself at first 8)).
-
-
-Micro-manual how to start it the first time
-------------------------------------------
-
-A. Attach CBQ to eth1:
-
-tc qdisc add dev eth1 root handle 1: cbq bandwidth 10Mbit allot 1514 cell 8 \
-avpkt 1000 mpu 64
-
-B. Add root class:
-
-tc class add dev eth1 parent 1:0 classid 1:1 cbq bandwidth 10Mbit rate 10Mbit \
-allot 1514 cell 8 weight 1Mbit prio 8 maxburst 20 avpkt 1000
-
-C. Add default interactive class:
-
-tc class add dev eth1 parent 1:1 classid 1:2 cbq bandwidth 10Mbit rate 1Mbit \
-allot 1514 cell 8 weight 100Kbit prio 3 maxburst 20 avpkt 1000 split 1:0 \
-defmap c0
-
-D. Add default class:
-
-tc class add dev eth1 parent 1:1 classid 1:3 cbq bandwidth 10Mbit rate 8Mbit \
-allot 1514 cell 8 weight 800Kbit prio 7 maxburst 20 avpkt 1000 split 1:0 \
-defmap 3f
-
-etc. etc. etc. Well, it is enough to start 8) The rest can be guessed 8)
-Look also at more elaborated example, ready to start rsvpd,
-in rsvp/cbqinit.eth1.
-
-
-Terminology and advices about setting CBQ parameters may be found in Sally Floyd
-papers. 
-
-
-Pairs X:Y are class handles, X:0 are qdisc handles.
-weight should be proportional to rate for leaf classes
-(I choosed it ten times less, but it is not necessary)
-
-defmap is bitmap of logical priorities served by this class.
-
-E. Another qdiscs are simpler. F.e. let's join TBF on class 1:2
-
-tc qdisc add dev eth1 parent 1:2 tbf rate 64Kbit buffer 5Kb/8 limit 10Kb
-
-F. Look at all that we created:
-
-tc qdisc ls dev eth1
-tc class ls dev eth1
-
-G. Install "route" classifier on root of cbq and map destination from realm
-1 to class 1:2
-
-tc filter add dev eth1 parent 1:0 protocol ip prio 100 route to 1 classid 1:2
-
-H. Assign routes to 10.11.12.0/24 to realm 1
-
-ip route add 10.11.12.0/24 dev eth1 via whatever realm 1
-
-etc. The same thing can be made with rules.
-I still did not test ipchains, but they should work too.
-
-
-Setup and code example of BPF classifier and action can be found under
-examples/bpf/, which should explain everything for getting started.
-
-
-Setup of rsvp and u32 classifiers is more hairy.
-If you read RSVP specs, you will understand how rsvp classifier
-works easily. What's about u32... That's example:
-
-
-#! /bin/sh
-
-TC=/home/root/tc
-
-# Setup classifier root on eth1 root (it is cbq)
-$TC filter add dev eth1 parent 1:0 prio 5 protocol ip u32
-
-# Create hash table of 256 slots with ID 1:
-$TC filter add dev eth1 parent 1:0 prio 5 handle 1: u32 divisor 256
-
-# Add to 6th slot of hash table rule to select tcp/telnet to 193.233.7.75
-# direct it to class 1:4 and prescribe to fall to best effort,
-# if traffic violate TBF (32kbit,5K)
-$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:6: \
-	match ip dst 193.233.7.75 \
-	match tcp dst 0x17 0xffff \
-	flowid 1:4 \
-	police rate 32kbit buffer 5kb/8 mpu 64 mtu 1514 index 1
-
-# Add to 1th slot of hash table rule to select icmp to 193.233.7.75
-# direct it to class 1:4 and prescribe to fall to best effort,
-# if traffic violate TBF (10kbit,5K)
-$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:: \
-	sample ip protocol 1 0xff \
-	match ip dst 193.233.7.75 \
-	flowid 1:4 \
-	police rate 10kbit buffer 5kb/8 mpu 64 mtu 1514 index 2
-
-# Lookup hash table, if it is not fragmented frame
-# Use protocol as hash key
-$TC filter add dev eth1 parent 1:0 prio 5 handle ::1 u32 ht 800:: \
-	match ip nofrag \
-	offset mask 0x0F00 shift 6 \
-	hashkey mask 0x00ff0000 at 8 \
-	link 1:
-
-
-Alexey Kuznetsov
-kuznet@ms2.inr.ac.ru
--- a/README.lnstat
+++ b/README.lnstat
@ -1,81 +0,0 @@
-lnstat - linux networking statistics
-(C) 2004 Harald Welte <laforge@gnumonks.org
-======================================================================
-
-This tool is a generalized and more feature-complete replacement for the old
-'rtstat' program.
-
-In addition to routing cache statistics, it supports any kind of statistics
-the linux kernel exports via a file in /proc/net/stat.  In a stock 2.6.9
-kernel, this is 
-	per-protocol neighbour cache statistics 
-		(ipv4, ipv6, atm, decnet)
-	routing cache statistics
-		(ipv4)
-	connection tracking statistics
-		(ipv4)
-
-Please note that lnstat will adopt to any additional statistics that might be
-added to the kernel at some later point
-
-I personally always like examples more than any reference documentation, so I
-list the following examples.  If somebody wants to do a manpage, feel free
-to send me a patch :)
-
-EXAMPLES:
-
-In order to get a list of supported statistics files, you can run
-
-	lnstat -d
-
-It will display something like
- 
-/proc/net/stat/arp_cache:
-         1: entries
-         2: allocs
-         3: destroys
-[...]
-/proc/net/stat/rt_cache:
-         1: entries
-         2: in_hit
-         3: in_slow_tot
-
-You can now select the files/keys you are interested by something like
-
-	lnstat -k arp_cache:entries,rt_cache:in_hit,arp_cache:destroys
-
-arp_cach|rt_cache|arp_cach|
- entries|  in_hit|destroys|
-       6|       6|       0|
-       6|       0|       0|
-       6|       2|       0|
-
-
-You can specify the interval (e.g. 10 seconds) by:
-	
-	lnstat -i 10
-
-You can specify to only use one particular statistics file:
-
-	lnstat -f ip_conntrack
-
-You can specify individual field widths 
-
-	lnstat -k arp_cache:entries,rt_cache:entries -w 20,8
-
-You can specify not to print a header at all
-	
-	lnstat -s 0
-
-You can specify to print a header only at start of the program
-
-	lnstat -s 1
-
-You can specify to print a header at start and every 20 lines:
-
-	lnstat -s 20
-
-You can specify the number of samples you want to take (e.g. 5):
-	
-	lnstat -c 5
-
--- a/bash-completion/devlink
+++ b/bash-completion/devlink
--- a/bash-completion/tc
+++ b/bash-completion/tc
@ -3,8 +3,8 @@
 # Copyright 2016 Quentin Monnet <quentin.monnet@6wind.com>

 QDISC_KIND=' choke codel bfifo pfifo pfifo_head_drop fq fq_codel gred hhf \
-            mqprio multiq netem pfifo_fast pie red rr sfb sfq tbf atm cbq drr \
-            dsmark hfsc htb prio qfq '
+            mqprio multiq netem pfifo_fast pie fq_pie red rr sfb sfq tbf atm \
+            cbq drr dsmark hfsc htb prio qfq '
 FILTER_KIND=' basic bpf cgroup flow flower fw route rsvp tcindex u32 matchall '
 ACTION_KIND=' gact mirred bpf sample '

@ -302,7 +302,7 @@ _tc_qdisc_options()
            ;;
        gred)
            _tc_once_attr 'setup vqs default grio vq prio limit min max avpkt \
-                burst probability bandwidth'
+                burst probability bandwidth ecn harddrop'
            return 0
            ;;
        hhf)
@ -323,6 +323,15 @@ _tc_qdisc_options()
            _tc_once_attr 'limit target tupdate alpha beta'
            _tc_one_of_list 'bytemode nobytemode'
            _tc_one_of_list 'ecn noecn'
+            _tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
+            return 0
+            ;;
+        fq_pie)
+            _tc_once_attr 'limit flows target tupdate \
+                alpha beta quantum memory_limit ecn_prob'
+            _tc_one_of_list 'ecn noecn'
+            _tc_one_of_list 'bytemode nobytemode'
+            _tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
            return 0
            ;;
        red)
@ -450,7 +459,7 @@ _tc_filter_options()
            return 0
            ;;
        matchall)
-            _tc_once_attr 'action skip_sw skip_hw'
+            _tc_once_attr 'action classid skip_sw skip_hw'
            return 0
            ;;
        flower)
--- a/bridge/Makefile
+++ b/bridge/Makefile
@ -1,15 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
 BROBJ = bridge.o fdb.o monitor.o link.o mdb.o vlan.o

-include ../Config
-
-ifeq ($(IP_CONFIG_SETNS),y)
-	CFLAGS += -DHAVE_SETNS
-endif
-
-ifeq ($(HAVE_MNL),y)
-	CFLAGS += -DHAVE_LIBMNL $(shell $(PKG_CONFIG) libmnl --cflags)
-	LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs)
-endif
+include ../config.mk

 all: bridge

--- a/bridge/br_common.h
+++ b/bridge/br_common.h
@ -1,27 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
 #define MDB_RTA(r) \
 		((struct rtattr *)(((char *)(r)) + RTA_ALIGN(sizeof(struct br_mdb_entry))))

 #define MDB_RTR_RTA(r) \
 		((struct rtattr *)(((char *)(r)) + RTA_ALIGN(sizeof(__u32))))

-extern int print_linkinfo(const struct sockaddr_nl *who,
-			  struct nlmsghdr *n,
-			  void *arg);
-extern int print_fdb(const struct sockaddr_nl *who,
-		     struct nlmsghdr *n, void *arg);
-extern int print_mdb(const struct sockaddr_nl *who,
-		     struct nlmsghdr *n, void *arg);
+void print_vlan_info(struct rtattr *tb, int ifindex);
+int print_linkinfo(struct nlmsghdr *n, void *arg);
+int print_mdb_mon(struct nlmsghdr *n, void *arg);
+int print_fdb(struct nlmsghdr *n, void *arg);
+void print_stp_state(__u8 state);
+int parse_stp_state(const char *arg);
+int print_vlan_rtm(struct nlmsghdr *n, void *arg, bool monitor,
+		   bool global_only);
+void br_print_router_port_stats(struct rtattr *pattr);

-extern int do_fdb(int argc, char **argv);
-extern int do_mdb(int argc, char **argv);
-extern int do_monitor(int argc, char **argv);
-extern int do_vlan(int argc, char **argv);
-extern int do_link(int argc, char **argv);
+int do_fdb(int argc, char **argv);
+int do_mdb(int argc, char **argv);
+int do_monitor(int argc, char **argv);
+int do_vlan(int argc, char **argv);
+int do_link(int argc, char **argv);

 extern int preferred_family;
 extern int show_stats;
 extern int show_details;
 extern int timestamp;
 extern int compress_vlans;
-extern int json_output;
+extern int json;
 extern struct rtnl_handle rth;
--- a/bridge/bridge.c
+++ b/bridge/bridge.c
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Get/set/delete bridge with netlink
 *
@ -11,23 +12,23 @@
 #include <string.h>
 #include <errno.h>

-#include "SNAPSHOT.h"
+#include "version.h"
 #include "utils.h"
 #include "br_common.h"
 #include "namespace.h"
+#include "color.h"

 struct rtnl_handle rth = { .fd = -1 };
 int preferred_family = AF_UNSPEC;
-int resolve_hosts;
 int oneline;
 int show_stats;
 int show_details;
+static int color;
 int compress_vlans;
-int json_output;
+int json;
 int timestamp;
-char *batch_file;
+static const char *batch_file;
 int force;
-const char *_SL_;

 static void usage(void) __attribute__((noreturn));

@ -36,10 +37,10 @@ static void usage(void)
 	fprintf(stderr,
 "Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n"
 "       bridge [ -force ] -batch filename\n"
-"where	OBJECT := { link | fdb | mdb | vlan | monitor }\n"
-"	OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n"
-"		     -o[neline] | -t[imestamp] | -n[etns] name |\n"
-"		     -c[ompressvlans] -j{son} }\n");
+"where  OBJECT := { link | fdb | mdb | vlan | monitor }\n"
+"       OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n"
+"                    -o[neline] | -t[imestamp] | -n[etns] name |\n"
+"                    -c[ompressvlans] -color -p[retty] -j[son] }\n");
 	exit(-1);
 }

@ -76,45 +77,23 @@ static int do_cmd(const char *argv0, int argc, char **argv)
 	return -1;
 }

+static int br_batch_cmd(int argc, char *argv[], void *data)
+{
+	return do_cmd(argv[0], argc, argv);
+}
+
 static int batch(const char *name)
 {
-	char *line = NULL;
-	size_t len = 0;
-	int ret = EXIT_SUCCESS;
-
-	if (name && strcmp(name, "-") != 0) {
-		if (freopen(name, "r", stdin) == NULL) {
-			fprintf(stderr,
-				"Cannot open file \"%s\" for reading: %s\n",
-				name, strerror(errno));
-			return EXIT_FAILURE;
-		}
-	}
+	int ret;

 	if (rtnl_open(&rth, 0) < 0) {
 		fprintf(stderr, "Cannot open rtnetlink\n");
 		return EXIT_FAILURE;
 	}

-	cmdlineno = 0;
-	while (getcmdline(&line, &len, stdin) != -1) {
-		char *largv[100];
-		int largc;
+	rtnl_set_strict_dump(&rth);

-		largc = makeargs(line, largv, 100);
-		if (largc == 0)
-			continue;       /* blank line */
-
-		if (do_cmd(largv[0], largc, largv)) {
-			fprintf(stderr, "Command failed %s:%d\n",
-				name, cmdlineno);
-			ret = EXIT_FAILURE;
-			if (!force)
-				break;
-		}
-	}
-	if (line)
-		free(line);
+	ret = do_batch(name, force, br_batch_cmd, NULL);

 	rtnl_close(&rth);
 	return ret;
@ -138,7 +117,7 @@ main(int argc, char **argv)
 		if (matches(opt, "-help") == 0) {
 			usage();
 		} else if (matches(opt, "-Version") == 0) {
-			printf("bridge utility, 0.0\n");
+			printf("bridge utility, %s\n", version);
 			exit(0);
 		} else if (matches(opt, "-stats") == 0 ||
 			   matches(opt, "-statistics") == 0) {
@ -170,12 +149,15 @@ main(int argc, char **argv)
 			NEXT_ARG();
 			if (netns_switch(argv[1]))
 				exit(-1);
+		} else if (matches_color(opt, &color)) {
 		} else if (matches(opt, "-compressvlans") == 0) {
 			++compress_vlans;
 		} else if (matches(opt, "-force") == 0) {
 			++force;
 		} else if (matches(opt, "-json") == 0) {
-			++json_output;
+			++json;
+		} else if (matches(opt, "-pretty") == 0) {
+			++pretty;
 		} else if (matches(opt, "-batch") == 0) {
 			argc--;
 			argv++;
@ -193,12 +175,16 @@ main(int argc, char **argv)

 	_SL_ = oneline ? "\\" : "\n";

+	check_enable_color(color, json);
+
 	if (batch_file)
 		return batch(batch_file);

 	if (rtnl_open(&rth, 0) < 0)
 		exit(1);

+	rtnl_set_strict_dump(&rth);
+
 	if (argc > 1)
 		return do_cmd(argv[1], argc-1, argv+1);

--- a/bridge/fdb.c
+++ b/bridge/fdb.c
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Get/set/delete fdb table with netlink
 *
@ -21,25 +22,29 @@
 #include <linux/neighbour.h>
 #include <string.h>
 #include <limits.h>
-#include <json_writer.h>
 #include <stdbool.h>

+#include "json_print.h"
 #include "libnetlink.h"
 #include "br_common.h"
 #include "rt_names.h"
 #include "utils.h"

-static unsigned int filter_index, filter_vlan, filter_state;
-
-json_writer_t *jw_global;
+static unsigned int filter_index, filter_dynamic, filter_master,
+	filter_state, filter_vlan;

 static void usage(void)
 {
-	fprintf(stderr, "Usage: bridge fdb { add | append | del | replace } ADDR dev DEV\n"
-			"              [ self ] [ master ] [ use ] [ router ]\n"
-			"              [ local | static | dynamic ] [ dst IPADDR ] [ vlan VID ]\n"
-			"              [ port PORT] [ vni VNI ] [ via DEV ]\n");
-	fprintf(stderr, "       bridge fdb [ show [ br BRDEV ] [ brport DEV ] [ vlan VID ] [ state STATE ] ]\n");
+	fprintf(stderr,
+		"Usage: bridge fdb { add | append | del | replace } ADDR dev DEV\n"
+		"              [ self ] [ master ] [ use ] [ router ] [ extern_learn ]\n"
+		"              [ sticky ] [ local | static | dynamic ] [ vlan VID ]\n"
+		"              { [ dst IPADDR ] [ port PORT] [ vni VNI ] | [ nhid NHID ] }\n"
+		"	       [ via DEV ] [ src_vni VNI ]\n"
+		"       bridge fdb [ show [ br BRDEV ] [ brport DEV ] [ vlan VID ]\n"
+		"              [ state STATE ] [ dynamic ] ]\n"
+		"       bridge fdb get [ to ] LLADDR [ br BRDEV ] { brport | dev } DEV\n"
+		"              [ vlan VID ] [ vni VNI ] [ self ] [ master ] [ dynamic ]\n");
 	exit(-1);
 }

@ -59,7 +64,10 @@ static const char *state_n2a(unsigned int s)
 	if (s & NUD_REACHABLE)
 		return "";

-	sprintf(buf, "state=%#x", s);
+	if (is_json_context())
+		sprintf(buf, "%#x", s);
+	else
+		sprintf(buf, "state=%#x", s);
 	return buf;
 }

@ -81,24 +89,58 @@ static int state_a2n(unsigned int *s, const char *arg)
 	return 0;
 }

-static void start_json_fdb_flags_array(bool *fdb_flags)
+static void fdb_print_flags(FILE *fp, unsigned int flags)
 {
-	if (*fdb_flags)
-		return;
-	jsonw_name(jw_global, "flags");
-	jsonw_start_array(jw_global);
-	*fdb_flags = true;
+	open_json_array(PRINT_JSON,
+			is_json_context() ?  "flags" : "");
+
+	if (flags & NTF_SELF)
+		print_string(PRINT_ANY, NULL, "%s ", "self");
+
+	if (flags & NTF_ROUTER)
+		print_string(PRINT_ANY, NULL, "%s ", "router");
+
+	if (flags & NTF_EXT_LEARNED)
+		print_string(PRINT_ANY, NULL, "%s ", "extern_learn");
+
+	if (flags & NTF_OFFLOADED)
+		print_string(PRINT_ANY, NULL, "%s ", "offload");
+
+	if (flags & NTF_MASTER)
+		print_string(PRINT_ANY, NULL, "%s ", "master");
+
+	if (flags & NTF_STICKY)
+		print_string(PRINT_ANY, NULL, "%s ", "sticky");
+
+	close_json_array(PRINT_JSON, NULL);
 }

-int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+static void fdb_print_stats(FILE *fp, const struct nda_cacheinfo *ci)
+{
+	static int hz;
+
+	if (!hz)
+		hz = get_user_hz();
+
+	if (is_json_context()) {
+		print_uint(PRINT_JSON, "used", NULL,
+				 ci->ndm_used / hz);
+		print_uint(PRINT_JSON, "updated", NULL,
+				ci->ndm_updated / hz);
+	} else {
+		fprintf(fp, "used %d/%d ", ci->ndm_used / hz,
+					ci->ndm_updated / hz);
+
+	}
+}
+
+int print_fdb(struct nlmsghdr *n, void *arg)
 {
 	FILE *fp = arg;
 	struct ndmsg *r = NLMSG_DATA(n);
 	int len = n->nlmsg_len;
 	struct rtattr *tb[NDA_MAX+1];
 	__u16 vid = 0;
-	bool fdb_flags = false;
-	const char *state_s;

 	if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH) {
 		fprintf(stderr, "Not RTM_NEWNEIGH: %08x %08x %08x\n",
@ -130,181 +172,143 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 	if (filter_vlan && filter_vlan != vid)
 		return 0;

-	if (jw_global) {
-		jsonw_pretty(jw_global, 1);
-		jsonw_start_object(jw_global);
-	}
+	if (filter_dynamic && (r->ndm_state & NUD_PERMANENT))
+		return 0;

-	if (n->nlmsg_type == RTM_DELNEIGH) {
-		if (jw_global)
-			jsonw_string_field(jw_global, "opCode", "deleted");
-		else
-			fprintf(fp, "Deleted ");
-	}
+	open_json_object(NULL);
+	if (n->nlmsg_type == RTM_DELNEIGH)
+		print_bool(PRINT_ANY, "deleted", "Deleted ", true);

 	if (tb[NDA_LLADDR]) {
+		const char *lladdr;
 		SPRINT_BUF(b1);
-		ll_addr_n2a(RTA_DATA(tb[NDA_LLADDR]),
-			    RTA_PAYLOAD(tb[NDA_LLADDR]),
-			    ll_index_to_type(r->ndm_ifindex),
-			    b1, sizeof(b1));
-		if (jw_global)
-			jsonw_string_field(jw_global, "mac", b1);
-		else
-			fprintf(fp, "%s ", b1);
+
+		lladdr = ll_addr_n2a(RTA_DATA(tb[NDA_LLADDR]),
+				     RTA_PAYLOAD(tb[NDA_LLADDR]),
+				     ll_index_to_type(r->ndm_ifindex),
+				     b1, sizeof(b1));
+
+		print_color_string(PRINT_ANY, COLOR_MAC,
+				   "mac", "%s ", lladdr);
 	}

 	if (!filter_index && r->ndm_ifindex) {
-		if (jw_global)
-			jsonw_string_field(jw_global, "dev",
-					   ll_index_to_name(r->ndm_ifindex));
-		else
-			fprintf(fp, "dev %s ",
-				ll_index_to_name(r->ndm_ifindex));
+		print_string(PRINT_FP, NULL, "dev ", NULL);
+
+		print_color_string(PRINT_ANY, COLOR_IFNAME,
+				   "ifname", "%s ",
+				   ll_index_to_name(r->ndm_ifindex));
 	}

 	if (tb[NDA_DST]) {
 		int family = AF_INET;
-		const char *abuf_s;
+		const char *dst;

 		if (RTA_PAYLOAD(tb[NDA_DST]) == sizeof(struct in6_addr))
 			family = AF_INET6;

-		abuf_s = format_host(family,
-				     RTA_PAYLOAD(tb[NDA_DST]),
-				     RTA_DATA(tb[NDA_DST]));
-		if (jw_global)
-			jsonw_string_field(jw_global, "dst", abuf_s);
-		else
-			fprintf(fp, "dst %s ", abuf_s);
+		dst = format_host(family,
+				  RTA_PAYLOAD(tb[NDA_DST]),
+				  RTA_DATA(tb[NDA_DST]));
+
+		print_string(PRINT_FP, NULL, "dst ", NULL);
+
+		print_color_string(PRINT_ANY,
+				   ifa_family_color(family),
+				   "dst", "%s ", dst);
 	}

-	if (vid) {
-		if (jw_global)
-			jsonw_uint_field(jw_global, "vlan", vid);
-		else
-			fprintf(fp, "vlan %hu ", vid);
-	}
+	if (vid)
+		print_uint(PRINT_ANY,
+				 "vlan", "vlan %hu ", vid);

-	if (tb[NDA_PORT]) {
-		if (jw_global)
-			jsonw_uint_field(jw_global, "port",
-					 rta_getattr_be16(tb[NDA_PORT]));
-		else
-			fprintf(fp, "port %d ",
-				rta_getattr_be16(tb[NDA_PORT]));
-	}
+	if (tb[NDA_PORT])
+		print_uint(PRINT_ANY,
+				 "port", "port %u ",
+				 rta_getattr_be16(tb[NDA_PORT]));

-	if (tb[NDA_VNI]) {
-		if (jw_global)
-			jsonw_uint_field(jw_global, "vni",
-					 rta_getattr_u32(tb[NDA_VNI]));
-		else
-			fprintf(fp, "vni %d ",
-				rta_getattr_u32(tb[NDA_VNI]));
-	}
+	if (tb[NDA_VNI])
+		print_uint(PRINT_ANY,
+				 "vni", "vni %u ",
+				 rta_getattr_u32(tb[NDA_VNI]));
+
+	if (tb[NDA_SRC_VNI])
+		print_uint(PRINT_ANY,
+				 "src_vni", "src_vni %u ",
+				rta_getattr_u32(tb[NDA_SRC_VNI]));

 	if (tb[NDA_IFINDEX]) {
 		unsigned int ifindex = rta_getattr_u32(tb[NDA_IFINDEX]);

-		if (ifindex) {
-			char ifname[IF_NAMESIZE];
-
-			if (!tb[NDA_LINK_NETNSID] &&
-			    if_indextoname(ifindex, ifname)) {
-				if (jw_global)
-					jsonw_string_field(jw_global, "viaIf",
-							   ifname);
-				else
-					fprintf(fp, "via %s ", ifname);
-			} else {
-				if (jw_global)
-					jsonw_uint_field(jw_global, "viaIfIndex",
-							 ifindex);
-				else
-					fprintf(fp, "via ifindex %u ", ifindex);
-			}
-		}
-	}
-
-	if (tb[NDA_LINK_NETNSID]) {
-		if (jw_global)
-			jsonw_uint_field(jw_global, "linkNetNsId",
-					 rta_getattr_u32(tb[NDA_LINK_NETNSID]));
+		if (tb[NDA_LINK_NETNSID])
+			print_uint(PRINT_ANY,
+					 "viaIfIndex", "via ifindex %u ",
+					 ifindex);
 		else
-			fprintf(fp, "link-netnsid %d ",
-				rta_getattr_u32(tb[NDA_LINK_NETNSID]));
+			print_string(PRINT_ANY,
+					   "viaIf", "via %s ",
+					   ll_index_to_name(ifindex));
 	}

-	if (show_stats && tb[NDA_CACHEINFO]) {
-		struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]);
-		int hz = get_user_hz();
+	if (tb[NDA_NH_ID])
+		print_uint(PRINT_ANY, "nhid", "nhid %u ",
+			   rta_getattr_u32(tb[NDA_NH_ID]));

-		if (jw_global) {
-			jsonw_uint_field(jw_global, "used",
-				ci->ndm_used/hz);
-			jsonw_uint_field(jw_global, "updated",
-				ci->ndm_updated/hz);
-		} else {
-			fprintf(fp, "used %d/%d ", ci->ndm_used/hz,
-					ci->ndm_updated/hz);
-		}
+	if (tb[NDA_LINK_NETNSID])
+		print_uint(PRINT_ANY,
+				 "linkNetNsId", "link-netnsid %d ",
+				 rta_getattr_u32(tb[NDA_LINK_NETNSID]));
+
+	if (show_stats && tb[NDA_CACHEINFO])
+		fdb_print_stats(fp, RTA_DATA(tb[NDA_CACHEINFO]));
+
+	fdb_print_flags(fp, r->ndm_flags);
+
+
+	if (tb[NDA_MASTER])
+		print_string(PRINT_ANY, "master", "master %s ",
+			     ll_index_to_name(rta_getattr_u32(tb[NDA_MASTER])));
+
+	print_string(PRINT_ANY, "state", "%s\n",
+			   state_n2a(r->ndm_state));
+	close_json_object();
+	fflush(fp);
+	return 0;
+}
+
+static int fdb_linkdump_filter(struct nlmsghdr *nlh, int reqlen)
+{
+	int err;
+
+	if (filter_index) {
+		struct ifinfomsg *ifm = NLMSG_DATA(nlh);
+
+		ifm->ifi_index = filter_index;
 	}

-	if (jw_global) {
-		if (r->ndm_flags & NTF_SELF) {
-			start_json_fdb_flags_array(&fdb_flags);
-			jsonw_string(jw_global, "self");
-		}
-		if (r->ndm_flags & NTF_ROUTER) {
-			start_json_fdb_flags_array(&fdb_flags);
-			jsonw_string(jw_global, "router");
-		}
-		if (r->ndm_flags & NTF_EXT_LEARNED) {
-			start_json_fdb_flags_array(&fdb_flags);
-			jsonw_string(jw_global, "extern_learn");
-		}
-		if (r->ndm_flags & NTF_OFFLOADED) {
-			start_json_fdb_flags_array(&fdb_flags);
-			jsonw_string(jw_global, "offload");
-		}
-		if (r->ndm_flags & NTF_MASTER)
-			jsonw_string(jw_global, "master");
-		if (fdb_flags)
-			jsonw_end_array(jw_global);
-
-		if (tb[NDA_MASTER])
-			jsonw_string_field(jw_global,
-					   "master",
-					   ll_index_to_name(rta_getattr_u32(tb[NDA_MASTER])));
-
-	} else {
-		if (r->ndm_flags & NTF_SELF)
-			fprintf(fp, "self ");
-		if (r->ndm_flags & NTF_ROUTER)
-			fprintf(fp, "router ");
-		if (r->ndm_flags & NTF_EXT_LEARNED)
-			fprintf(fp, "extern_learn ");
-		if (r->ndm_flags & NTF_OFFLOADED)
-			fprintf(fp, "offload ");
-		if (tb[NDA_MASTER]) {
-			fprintf(fp, "master %s ",
-				ll_index_to_name(rta_getattr_u32(tb[NDA_MASTER])));
-		} else if (r->ndm_flags & NTF_MASTER) {
-			fprintf(fp, "master ");
-		}
+	if (filter_master) {
+		err = addattr32(nlh, reqlen, IFLA_MASTER, filter_master);
+		if (err)
+			return err;
 	}

-	state_s = state_n2a(r->ndm_state);
-	if (jw_global) {
-		if (state_s[0])
-			jsonw_string_field(jw_global, "state", state_s);
+	return 0;
+}

-		jsonw_end_object(jw_global);
-	} else {
-		fprintf(fp, "%s\n", state_s);
+static int fdb_dump_filter(struct nlmsghdr *nlh, int reqlen)
+{
+	int err;

-		fflush(fp);
+	if (filter_index) {
+		struct ndmsg *ndm = NLMSG_DATA(nlh);
+
+		ndm->ndm_ifindex = filter_index;
+	}
+
+	if (filter_master) {
+		err = addattr32(nlh, reqlen, NDA_MASTER, filter_master);
+		if (err)
+			return err;
 	}

 	return 0;
@ -312,18 +316,9 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)

 static int fdb_show(int argc, char **argv)
 {
-	struct {
-		struct nlmsghdr	n;
-		struct ifinfomsg	ifm;
-		char			buf[256];
-	} req = {
-		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
-		.ifm.ifi_family = PF_BRIDGE,
-	};
-
 	char *filter_dev = NULL;
 	char *br = NULL;
-	int msg_size = sizeof(struct ifinfomsg);
+	int rc;

 	while (argc > 0) {
 		if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
@ -344,6 +339,8 @@ static int fdb_show(int argc, char **argv)
 			if (state_a2n(&state, *argv))
 				invarg("invalid state", *argv);
 			filter_state |= state;
+		} else if (strcmp(*argv, "dynamic") == 0) {
+			filter_dynamic = 1;
 		} else {
 			if (matches(*argv, "help") == 0)
 				usage();
@ -358,42 +355,32 @@ static int fdb_show(int argc, char **argv)
 			fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
 			return -1;
 		}
-		addattr32(&req.n, sizeof(req), IFLA_MASTER, br_ifindex);
-		msg_size += RTA_LENGTH(4);
+		filter_master = br_ifindex;
 	}

 	/*we'll keep around filter_dev for older kernels */
 	if (filter_dev) {
-		filter_index = if_nametoindex(filter_dev);
-		if (filter_index == 0) {
-			fprintf(stderr, "Cannot find device \"%s\"\n",
-				filter_dev);
-			return -1;
-		}
-		req.ifm.ifi_index = filter_index;
+		filter_index = ll_name_to_index(filter_dev);
+		if (!filter_index)
+			return nodev(filter_dev);
 	}

-	if (rtnl_dump_request(&rth, RTM_GETNEIGH, &req.ifm, msg_size) < 0) {
+	if (rth.flags & RTNL_HANDLE_F_STRICT_CHK)
+		rc = rtnl_neighdump_req(&rth, PF_BRIDGE, fdb_dump_filter);
+	else
+		rc = rtnl_fdb_linkdump_req_filter_fn(&rth, fdb_linkdump_filter);
+	if (rc < 0) {
 		perror("Cannot send dump request");
 		exit(1);
 	}

-	if (json_output) {
-		jw_global = jsonw_new(stdout);
-		if (!jw_global) {
-			fprintf(stderr, "Error allocation json object\n");
-			exit(1);
-		}
-		jsonw_start_array(jw_global);
-	}
+	new_json_obj(json);
 	if (rtnl_dump_filter(&rth, print_fdb, stdout) < 0) {
 		fprintf(stderr, "Dump terminated\n");
 		exit(1);
 	}
-	if (jw_global) {
-		jsonw_end_array(jw_global);
-		jsonw_destroy(&jw_global);
-	}
+	delete_json_obj();
+	fflush(stdout);

 	return 0;
 }
@ -418,9 +405,11 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 	inet_prefix dst;
 	unsigned long port = 0;
 	unsigned long vni = ~0;
+	unsigned long src_vni = ~0;
 	unsigned int via = 0;
 	char *endptr;
 	short vid = -1;
+	__u32 nhid = 0;

 	while (argc > 0) {
 		if (strcmp(*argv, "dev") == 0) {
@ -432,6 +421,10 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 				duparg2("dst", *argv);
 			get_addr(&dst, *argv, preferred_family);
 			dst_ok = 1;
+		} else if (strcmp(*argv, "nhid") == 0) {
+			NEXT_ARG();
+			if (get_u32(&nhid, *argv, 0))
+				invarg("\"id\" value is invalid\n", *argv);
 		} else if (strcmp(*argv, "port") == 0) {

 			NEXT_ARG();
@ -451,11 +444,17 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 			if ((endptr && *endptr) ||
 			    (vni >> 24) || vni == ULONG_MAX)
 				invarg("invalid VNI\n", *argv);
+		} else if (strcmp(*argv, "src_vni") == 0) {
+			NEXT_ARG();
+			src_vni = strtoul(*argv, &endptr, 0);
+			if ((endptr && *endptr) ||
+			    (src_vni >> 24) || src_vni == ULONG_MAX)
+				invarg("invalid src VNI\n", *argv);
 		} else if (strcmp(*argv, "via") == 0) {
 			NEXT_ARG();
-			via = if_nametoindex(*argv);
-			if (via == 0)
-				invarg("invalid device\n", *argv);
+			via = ll_name_to_index(*argv);
+			if (!via)
+				exit(nodev(*argv));
 		} else if (strcmp(*argv, "self") == 0) {
 			req.ndm.ndm_flags |= NTF_SELF;
 		} else if (matches(*argv, "master") == 0) {
@ -478,10 +477,14 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 			vid = atoi(*argv);
 		} else if (matches(*argv, "use") == 0) {
 			req.ndm.ndm_flags |= NTF_USE;
+		} else if (matches(*argv, "extern_learn") == 0) {
+			req.ndm.ndm_flags |= NTF_EXT_LEARNED;
+		} else if (matches(*argv, "sticky") == 0) {
+			req.ndm.ndm_flags |= NTF_STICKY;
 		} else {
-			if (strcmp(*argv, "to") == 0) {
+			if (strcmp(*argv, "to") == 0)
 				NEXT_ARG();
-			}
+
 			if (matches(*argv, "help") == 0)
 				usage();
 			if (addr)
@ -496,6 +499,11 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 		return -1;
 	}

+	if (nhid && (dst_ok || port || vni != ~0)) {
+		fprintf(stderr, "dst, port, vni are mutually exclusive with nhid\n");
+		return -1;
+	}
+
 	/* Assume self */
 	if (!(req.ndm.ndm_flags&(NTF_SELF|NTF_MASTER)))
 		req.ndm.ndm_flags |= NTF_SELF;
@ -517,6 +525,8 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)

 	if (vid >= 0)
 		addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
+	if (nhid > 0)
+		addattr32(&req.n, sizeof(req), NDA_NH_ID, nhid);

 	if (port) {
 		unsigned short dport;
@ -526,17 +536,132 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 	}
 	if (vni != ~0)
 		addattr32(&req.n, sizeof(req), NDA_VNI, vni);
+	if (src_vni != ~0)
+		addattr32(&req.n, sizeof(req), NDA_SRC_VNI, src_vni);
 	if (via)
 		addattr32(&req.n, sizeof(req), NDA_IFINDEX, via);

 	req.ndm.ndm_ifindex = ll_name_to_index(d);
-	if (req.ndm.ndm_ifindex == 0) {
-		fprintf(stderr, "Cannot find device \"%s\"\n", d);
+	if (!req.ndm.ndm_ifindex)
+		return nodev(d);
+
+	if (rtnl_talk(&rth, &req.n, NULL) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int fdb_get(int argc, char **argv)
+{
+	struct {
+		struct nlmsghdr	n;
+		struct ndmsg		ndm;
+		char			buf[1024];
+	} req = {
+		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
+		.n.nlmsg_flags = NLM_F_REQUEST,
+		.n.nlmsg_type = RTM_GETNEIGH,
+		.ndm.ndm_family = AF_BRIDGE,
+	};
+	char  *d = NULL, *br = NULL;
+	struct nlmsghdr *answer;
+	unsigned long vni = ~0;
+	char abuf[ETH_ALEN];
+	int br_ifindex = 0;
+	char *addr = NULL;
+	short vlan = -1;
+	char *endptr;
+
+	while (argc > 0) {
+		if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
+			NEXT_ARG();
+			d = *argv;
+		} else if (strcmp(*argv, "br") == 0) {
+			NEXT_ARG();
+			br = *argv;
+		} else if (strcmp(*argv, "dev") == 0) {
+			NEXT_ARG();
+			d = *argv;
+		} else if (strcmp(*argv, "vni") == 0) {
+			NEXT_ARG();
+			vni = strtoul(*argv, &endptr, 0);
+			if ((endptr && *endptr) ||
+			    (vni >> 24) || vni == ULONG_MAX)
+				invarg("invalid VNI\n", *argv);
+		} else if (strcmp(*argv, "self") == 0) {
+			req.ndm.ndm_flags |= NTF_SELF;
+		} else if (matches(*argv, "master") == 0) {
+			req.ndm.ndm_flags |= NTF_MASTER;
+		} else if (matches(*argv, "vlan") == 0) {
+			if (vlan >= 0)
+				duparg2("vlan", *argv);
+			NEXT_ARG();
+			vlan = atoi(*argv);
+		} else if (matches(*argv, "dynamic") == 0) {
+			filter_dynamic = 1;
+		} else {
+			if (strcmp(*argv, "to") == 0)
+				NEXT_ARG();
+
+			if (matches(*argv, "help") == 0)
+				usage();
+			if (addr)
+				duparg2("to", *argv);
+			addr = *argv;
+		}
+		argc--; argv++;
+	}
+
+	if ((d == NULL && br == NULL) || addr == NULL) {
+		fprintf(stderr, "Device or master and address are required arguments.\n");
 		return -1;
 	}

-	if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
+	if (sscanf(addr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+		   abuf, abuf+1, abuf+2,
+		   abuf+3, abuf+4, abuf+5) != 6) {
+		fprintf(stderr, "Invalid mac address %s\n", addr);
 		return -1;
+	}
+
+	addattr_l(&req.n, sizeof(req), NDA_LLADDR, abuf, ETH_ALEN);
+
+	if (vlan >= 0)
+		addattr16(&req.n, sizeof(req), NDA_VLAN, vlan);
+
+	if (vni != ~0)
+		addattr32(&req.n, sizeof(req), NDA_VNI, vni);
+
+	if (d) {
+		req.ndm.ndm_ifindex = ll_name_to_index(d);
+		if (!req.ndm.ndm_ifindex) {
+			fprintf(stderr, "Cannot find device \"%s\"\n", d);
+			return -1;
+		}
+	}
+
+	if (br) {
+		br_ifindex = ll_name_to_index(br);
+		if (!br_ifindex) {
+			fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
+			return -1;
+		}
+		addattr32(&req.n, sizeof(req), NDA_MASTER, br_ifindex);
+	}
+
+	if (rtnl_talk(&rth, &req.n, &answer) < 0)
+		return -2;
+
+	/*
+	 * Initialize a json_writer and open an array object
+	 * if -json was specified.
+	 */
+	new_json_obj(json);
+	if (print_fdb(answer, stdout) < 0) {
+		fprintf(stderr, "An error :-)\n");
+		return -1;
+	}
+	delete_json_obj();

 	return 0;
 }
@ -554,6 +679,8 @@ int do_fdb(int argc, char **argv)
 			return fdb_modify(RTM_NEWNEIGH, NLM_F_CREATE|NLM_F_REPLACE, argc-1, argv+1);
 		if (matches(*argv, "delete") == 0)
 			return fdb_modify(RTM_DELNEIGH, 0, argc-1, argv+1);
+		if (matches(*argv, "get") == 0)
+			return fdb_get(argc-1, argv+1);
 		if (matches(*argv, "show") == 0 ||
 		    matches(*argv, "lst") == 0 ||
 		    matches(*argv, "list") == 0)
--- a/bridge/link.c
+++ b/bridge/link.c
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */

 #include <stdio.h>
 #include <stdlib.h>
@ -11,13 +12,14 @@
 #include <string.h>
 #include <stdbool.h>

+#include "json_print.h"
 #include "libnetlink.h"
 #include "utils.h"
 #include "br_common.h"

 static unsigned int filter_index;

-static const char *port_states[] = {
+static const char *stp_states[] = {
 	[BR_STATE_DISABLED] = "disabled",
 	[BR_STATE_LISTENING] = "listening",
 	[BR_STATE_LEARNING] = "learning",
@ -25,17 +27,21 @@ static const char *port_states[] = {
 	[BR_STATE_BLOCKING] = "blocking",
 };

-extern char *if_indextoname(unsigned int __ifindex, char *__ifname);
+static const char *hw_mode[] = {
+	"VEB", "VEPA"
+};

-static void print_link_flags(FILE *fp, unsigned int flags)
+static void print_link_flags(FILE *fp, unsigned int flags, unsigned int mdown)
 {
-	fprintf(fp, "<");
+	open_json_array(PRINT_ANY, is_json_context() ? "flags" : "<");
 	if (flags & IFF_UP && !(flags & IFF_RUNNING))
-		fprintf(fp, "NO-CARRIER%s", flags ? "," : "");
+		print_string(PRINT_ANY, NULL,
+			     flags ? "%s," : "%s", "NO-CARRIER");
 	flags &= ~IFF_RUNNING;
-#define _PF(f) if (flags&IFF_##f) { \
-		  flags &= ~IFF_##f ; \
-		  fprintf(fp, #f "%s", flags ? "," : ""); }
+
+#define _PF(f) if (flags&IFF_##f) {					\
+		flags &= ~IFF_##f ;					\
+		print_string(PRINT_ANY, NULL, flags ? "%s," : "%s", #f); }
 	_PF(LOOPBACK);
 	_PF(BROADCAST);
 	_PF(POINTOPOINT);
@ -56,54 +62,152 @@ static void print_link_flags(FILE *fp, unsigned int flags)
 	_PF(ECHO);
 #undef _PF
 	if (flags)
-		fprintf(fp, "%x", flags);
-	fprintf(fp, "> ");
+		print_hex(PRINT_ANY, NULL, "%x", flags);
+	if (mdown)
+		print_string(PRINT_ANY, NULL, ",%s", "M-DOWN");
+	close_json_array(PRINT_ANY, "> ");
 }

-static const char *oper_states[] = {
-	"UNKNOWN", "NOTPRESENT", "DOWN", "LOWERLAYERDOWN",
-	"TESTING", "DORMANT",	 "UP"
-};
-
-static const char *hw_mode[] = {"VEB", "VEPA"};
-
-static void print_operstate(FILE *f, __u8 state)
-{
-	if (state >= ARRAY_SIZE(oper_states))
-		fprintf(f, "state %#x ", state);
-	else
-		fprintf(f, "state %s ", oper_states[state]);
-}
-
-static void print_portstate(FILE *f, __u8 state)
+void print_stp_state(__u8 state)
 {
 	if (state <= BR_STATE_BLOCKING)
-		fprintf(f, "state %s ", port_states[state]);
+		print_string(PRINT_ANY, "state",
+			     "state %s ", stp_states[state]);
 	else
-		fprintf(f, "state (%d) ", state);
+		print_uint(PRINT_ANY, "state",
+			     "state (%d) ", state);
 }

-static void print_onoff(FILE *f, char *flag, __u8 val)
+int parse_stp_state(const char *arg)
 {
-	fprintf(f, "%s %s ", flag, val ? "on" : "off");
+	size_t nstates = ARRAY_SIZE(stp_states);
+	int state;
+
+	for (state = 0; state < nstates; state++)
+		if (strcmp(stp_states[state], arg) == 0)
+			break;
+
+	if (state == nstates)
+		state = -1;
+
+	return state;
 }

-static void print_hwmode(FILE *f, __u16 mode)
+static void print_hwmode(__u16 mode)
 {
 	if (mode >= ARRAY_SIZE(hw_mode))
-		fprintf(f, "hwmode %#hx ", mode);
+		print_0xhex(PRINT_ANY, "hwmode",
+			    "hwmode %#llx ", mode);
 	else
-		fprintf(f, "hwmode %s ", hw_mode[mode]);
+		print_string(PRINT_ANY, "hwmode",
+			     "hwmode %s ", hw_mode[mode]);
 }

-int print_linkinfo(const struct sockaddr_nl *who,
-		   struct nlmsghdr *n, void *arg)
+static void print_protinfo(FILE *fp, struct rtattr *attr)
+{
+	if (attr->rta_type & NLA_F_NESTED) {
+		struct rtattr *prtb[IFLA_BRPORT_MAX + 1];
+
+		parse_rtattr_nested(prtb, IFLA_BRPORT_MAX, attr);
+
+		if (prtb[IFLA_BRPORT_STATE])
+			print_stp_state(rta_getattr_u8(prtb[IFLA_BRPORT_STATE]));
+
+		if (prtb[IFLA_BRPORT_PRIORITY])
+			print_uint(PRINT_ANY, "priority",
+				   "priority %u ",
+				   rta_getattr_u16(prtb[IFLA_BRPORT_PRIORITY]));
+
+		if (prtb[IFLA_BRPORT_COST])
+			print_uint(PRINT_ANY, "cost",
+				   "cost %u ",
+				   rta_getattr_u32(prtb[IFLA_BRPORT_COST]));
+
+		if (!show_details)
+			return;
+
+		if (!is_json_context())
+			fprintf(fp, "%s    ", _SL_);
+
+		if (prtb[IFLA_BRPORT_MODE])
+			print_on_off(PRINT_ANY, "hairpin", "hairpin %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_MODE]));
+		if (prtb[IFLA_BRPORT_GUARD])
+			print_on_off(PRINT_ANY, "guard", "guard %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_GUARD]));
+		if (prtb[IFLA_BRPORT_PROTECT])
+			print_on_off(PRINT_ANY, "root_block", "root_block %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT]));
+		if (prtb[IFLA_BRPORT_FAST_LEAVE])
+			print_on_off(PRINT_ANY, "fastleave", "fastleave %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE]));
+		if (prtb[IFLA_BRPORT_LEARNING])
+			print_on_off(PRINT_ANY, "learning", "learning %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING]));
+		if (prtb[IFLA_BRPORT_LEARNING_SYNC])
+			print_on_off(PRINT_ANY, "learning_sync", "learning_sync %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC]));
+		if (prtb[IFLA_BRPORT_UNICAST_FLOOD])
+			print_on_off(PRINT_ANY, "flood", "flood %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD]));
+		if (prtb[IFLA_BRPORT_MCAST_FLOOD])
+			print_on_off(PRINT_ANY, "mcast_flood", "mcast_flood %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_FLOOD]));
+		if (prtb[IFLA_BRPORT_MCAST_TO_UCAST])
+			print_on_off(PRINT_ANY, "mcast_to_unicast", "mcast_to_unicast %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_TO_UCAST]));
+		if (prtb[IFLA_BRPORT_NEIGH_SUPPRESS])
+			print_on_off(PRINT_ANY, "neigh_suppress", "neigh_suppress %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_NEIGH_SUPPRESS]));
+		if (prtb[IFLA_BRPORT_VLAN_TUNNEL])
+			print_on_off(PRINT_ANY, "vlan_tunnel", "vlan_tunnel %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_VLAN_TUNNEL]));
+
+		if (prtb[IFLA_BRPORT_BACKUP_PORT]) {
+			int ifidx;
+
+			ifidx = rta_getattr_u32(prtb[IFLA_BRPORT_BACKUP_PORT]);
+			print_string(PRINT_ANY,
+				     "backup_port", "backup_port %s ",
+				     ll_index_to_name(ifidx));
+		}
+
+		if (prtb[IFLA_BRPORT_ISOLATED])
+			print_on_off(PRINT_ANY, "isolated", "isolated %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_ISOLATED]));
+	} else
+		print_stp_state(rta_getattr_u8(attr));
+}
+
+
+/*
+ * This is reported by HW devices that have some bridging
+ * capabilities.
+ */
+static void print_af_spec(struct rtattr *attr, int ifindex)
+{
+	struct rtattr *aftb[IFLA_BRIDGE_MAX+1];
+
+	parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, attr);
+
+	if (aftb[IFLA_BRIDGE_MODE])
+		print_hwmode(rta_getattr_u16(aftb[IFLA_BRIDGE_MODE]));
+
+	if (!show_details)
+		return;
+
+	if (aftb[IFLA_BRIDGE_VLAN_INFO])
+		print_vlan_info(aftb[IFLA_BRIDGE_VLAN_INFO], ifindex);
+}
+
+int print_linkinfo(struct nlmsghdr *n, void *arg)
 {
 	FILE *fp = arg;
-	int len = n->nlmsg_len;
 	struct ifinfomsg *ifi = NLMSG_DATA(n);
 	struct rtattr *tb[IFLA_MAX+1];
-	char b1[IFNAMSIZ];
+	unsigned int m_flag = 0;
+	int len = n->nlmsg_len;
+	const char *name;

 	len -= NLMSG_LENGTH(sizeof(*ifi));
 	if (len < 0) {
@ -119,140 +223,65 @@ int print_linkinfo(const struct sockaddr_nl *who,

 	parse_rtattr_flags(tb, IFLA_MAX, IFLA_RTA(ifi), len, NLA_F_NESTED);

-	if (tb[IFLA_IFNAME] == NULL) {
-		fprintf(stderr, "BUG: nil ifname\n");
+	name = get_ifname_rta(ifi->ifi_index, tb[IFLA_IFNAME]);
+	if (!name)
 		return -1;
-	}

+	open_json_object(NULL);
 	if (n->nlmsg_type == RTM_DELLINK)
-		fprintf(fp, "Deleted ");
+		print_bool(PRINT_ANY, "deleted", "Deleted ", true);

-	fprintf(fp, "%d: %s ", ifi->ifi_index,
-		tb[IFLA_IFNAME] ? rta_getattr_str(tb[IFLA_IFNAME]) : "<nil>");
-
-	if (tb[IFLA_OPERSTATE])
-		print_operstate(fp, rta_getattr_u8(tb[IFLA_OPERSTATE]));
-
-	if (tb[IFLA_LINK]) {
-		SPRINT_BUF(b1);
-		int iflink = rta_getattr_u32(tb[IFLA_LINK]);
-
-		if (iflink == 0)
-			fprintf(fp, "@NONE: ");
-		else
-			fprintf(fp, "@%s: ",
-				if_indextoname(iflink, b1));
-	} else
-		fprintf(fp, ": ");
-
-	print_link_flags(fp, ifi->ifi_flags);
+	print_int(PRINT_ANY, "ifindex", "%d: ", ifi->ifi_index);
+	m_flag = print_name_and_link("%s: ", name, tb);
+	print_link_flags(fp, ifi->ifi_flags, m_flag);

 	if (tb[IFLA_MTU])
-		fprintf(fp, "mtu %u ", rta_getattr_u32(tb[IFLA_MTU]));
+		print_int(PRINT_ANY,
+			  "mtu", "mtu %u ",
+			  rta_getattr_u32(tb[IFLA_MTU]));

-	if (tb[IFLA_MASTER])
-		fprintf(fp, "master %s ",
-			if_indextoname(rta_getattr_u32(tb[IFLA_MASTER]), b1));
+	if (tb[IFLA_MASTER]) {
+		int master = rta_getattr_u32(tb[IFLA_MASTER]);

-	if (tb[IFLA_PROTINFO]) {
-		if (tb[IFLA_PROTINFO]->rta_type & NLA_F_NESTED) {
-			struct rtattr *prtb[IFLA_BRPORT_MAX+1];
-
-			parse_rtattr_nested(prtb, IFLA_BRPORT_MAX,
-					    tb[IFLA_PROTINFO]);
-
-			if (prtb[IFLA_BRPORT_STATE])
-				print_portstate(fp,
-						rta_getattr_u8(prtb[IFLA_BRPORT_STATE]));
-			if (prtb[IFLA_BRPORT_PRIORITY])
-				fprintf(fp, "priority %hu ",
-					rta_getattr_u16(prtb[IFLA_BRPORT_PRIORITY]));
-			if (prtb[IFLA_BRPORT_COST])
-				fprintf(fp, "cost %u ",
-					rta_getattr_u32(prtb[IFLA_BRPORT_COST]));
-
-			if (show_details) {
-				fprintf(fp, "%s    ", _SL_);
-
-				if (prtb[IFLA_BRPORT_MODE])
-					print_onoff(fp, "hairpin",
-						    rta_getattr_u8(prtb[IFLA_BRPORT_MODE]));
-				if (prtb[IFLA_BRPORT_GUARD])
-					print_onoff(fp, "guard",
-						    rta_getattr_u8(prtb[IFLA_BRPORT_GUARD]));
-				if (prtb[IFLA_BRPORT_PROTECT])
-					print_onoff(fp, "root_block",
-						    rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT]));
-				if (prtb[IFLA_BRPORT_FAST_LEAVE])
-					print_onoff(fp, "fastleave",
-						    rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE]));
-				if (prtb[IFLA_BRPORT_LEARNING])
-					print_onoff(fp, "learning",
-						    rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING]));
-				if (prtb[IFLA_BRPORT_LEARNING_SYNC])
-					print_onoff(fp, "learning_sync",
-						    rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC]));
-				if (prtb[IFLA_BRPORT_UNICAST_FLOOD])
-					print_onoff(fp, "flood",
-						    rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD]));
-				if (prtb[IFLA_BRPORT_MCAST_FLOOD])
-					print_onoff(fp, "mcast_flood",
-						    rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_FLOOD]));
-			}
-		} else
-			print_portstate(fp, rta_getattr_u8(tb[IFLA_PROTINFO]));
+		print_string(PRINT_ANY, "master", "master %s ",
+			     ll_index_to_name(master));
 	}

-	if (tb[IFLA_AF_SPEC]) {
-		/* This is reported by HW devices that have some bridging
-		 * capabilities.
-		 */
-		struct rtattr *aftb[IFLA_BRIDGE_MAX+1];
+	if (tb[IFLA_PROTINFO])
+		print_protinfo(fp, tb[IFLA_PROTINFO]);

-		parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, tb[IFLA_AF_SPEC]);
+	if (tb[IFLA_AF_SPEC])
+		print_af_spec(tb[IFLA_AF_SPEC], ifi->ifi_index);

-		if (aftb[IFLA_BRIDGE_MODE])
-			print_hwmode(fp, rta_getattr_u16(aftb[IFLA_BRIDGE_MODE]));
-	}
-
-	fprintf(fp, "\n");
+	print_string(PRINT_FP, NULL, "%s", "\n");
+	close_json_object();
 	fflush(fp);
 	return 0;
 }

 static void usage(void)
 {
-	fprintf(stderr, "Usage: bridge link set dev DEV [ cost COST ] [ priority PRIO ] [ state STATE ]\n");
-	fprintf(stderr, "                               [ guard {on | off} ]\n");
-	fprintf(stderr, "                               [ hairpin {on | off} ]\n");
-	fprintf(stderr, "                               [ fastleave {on | off} ]\n");
-	fprintf(stderr,	"                               [ root_block {on | off} ]\n");
-	fprintf(stderr,	"                               [ learning {on | off} ]\n");
-	fprintf(stderr,	"                               [ learning_sync {on | off} ]\n");
-	fprintf(stderr,	"                               [ flood {on | off} ]\n");
-	fprintf(stderr,	"                               [ mcast_flood {on | off} ]\n");
-	fprintf(stderr, "                               [ hwmode {vepa | veb} ]\n");
-	fprintf(stderr, "                               [ self ] [ master ]\n");
-	fprintf(stderr, "       bridge link show [dev DEV]\n");
+	fprintf(stderr,
+		"Usage: bridge link set dev DEV [ cost COST ] [ priority PRIO ] [ state STATE ]\n"
+		"                               [ guard {on | off} ]\n"
+		"                               [ hairpin {on | off} ]\n"
+		"                               [ fastleave {on | off} ]\n"
+		"                               [ root_block {on | off} ]\n"
+		"                               [ learning {on | off} ]\n"
+		"                               [ learning_sync {on | off} ]\n"
+		"                               [ flood {on | off} ]\n"
+		"                               [ mcast_flood {on | off} ]\n"
+		"                               [ mcast_to_unicast {on | off} ]\n"
+		"                               [ neigh_suppress {on | off} ]\n"
+		"                               [ vlan_tunnel {on | off} ]\n"
+		"                               [ isolated {on | off} ]\n"
+		"                               [ hwmode {vepa | veb} ]\n"
+		"                               [ backup_port DEVICE ] [ nobackup_port ]\n"
+		"                               [ self ] [ master ]\n"
+		"       bridge link show [dev DEV]\n");
 	exit(-1);
 }

-static bool on_off(char *arg, __s8 *attr, char *val)
-{
-	if (strcmp(val, "on") == 0)
-		*attr = 1;
-	else if (strcmp(val, "off") == 0)
-		*attr = 0;
-	else {
-		fprintf(stderr,
-			"Error: argument of \"%s\" must be \"on\" or \"off\"\n",
-			arg);
-		return false;
-	}
-
-	return true;
-}
-
 static int brlink_modify(int argc, char **argv)
 {
 	struct {
@ -266,10 +295,15 @@ static int brlink_modify(int argc, char **argv)
 		.ifm.ifi_family = PF_BRIDGE,
 	};
 	char *d = NULL;
+	int backup_port_idx = -1;
+	__s8 neigh_suppress = -1;
 	__s8 learning = -1;
 	__s8 learning_sync = -1;
 	__s8 flood = -1;
+	__s8 vlan_tunnel = -1;
 	__s8 mcast_flood = -1;
+	__s8 mcast_to_unicast = -1;
+	__s8 isolated = -1;
 	__s8 hairpin = -1;
 	__s8 bpdu_guard = -1;
 	__s8 fast_leave = -1;
@ -280,6 +314,7 @@ static int brlink_modify(int argc, char **argv)
 	__s16 mode = -1;
 	__u16 flags = 0;
 	struct rtattr *nest;
+	int ret;

 	while (argc > 0) {
 		if (strcmp(*argv, "dev") == 0) {
@ -287,36 +322,49 @@ static int brlink_modify(int argc, char **argv)
 			d = *argv;
 		} else if (strcmp(*argv, "guard") == 0) {
 			NEXT_ARG();
-			if (!on_off("guard", &bpdu_guard, *argv))
-				return -1;
+			bpdu_guard = parse_on_off("guard", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "hairpin") == 0) {
 			NEXT_ARG();
-			if (!on_off("hairping", &hairpin, *argv))
-				return -1;
+			hairpin = parse_on_off("hairpin", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "fastleave") == 0) {
 			NEXT_ARG();
-			if (!on_off("fastleave", &fast_leave, *argv))
-				return -1;
+			fast_leave = parse_on_off("fastleave", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "root_block") == 0) {
 			NEXT_ARG();
-			if (!on_off("root_block", &root_block, *argv))
-				return -1;
+			root_block = parse_on_off("root_block", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "learning") == 0) {
 			NEXT_ARG();
-			if (!on_off("learning", &learning, *argv))
-				return -1;
+			learning = parse_on_off("learning", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "learning_sync") == 0) {
 			NEXT_ARG();
-			if (!on_off("learning_sync", &learning_sync, *argv))
-				return -1;
+			learning_sync = parse_on_off("learning_sync", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "flood") == 0) {
 			NEXT_ARG();
-			if (!on_off("flood", &flood, *argv))
-				return -1;
+			flood = parse_on_off("flood", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "mcast_flood") == 0) {
 			NEXT_ARG();
-			if (!on_off("mcast_flood", &mcast_flood, *argv))
-				return -1;
+			mcast_flood = parse_on_off("mcast_flood", *argv, &ret);
+			if (ret)
+				return ret;
+		} else if (strcmp(*argv, "mcast_to_unicast") == 0) {
+			NEXT_ARG();
+			mcast_to_unicast = parse_on_off("mcast_to_unicast", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "cost") == 0) {
 			NEXT_ARG();
 			cost = atoi(*argv);
@ -326,14 +374,11 @@ static int brlink_modify(int argc, char **argv)
 		} else if (strcmp(*argv, "state") == 0) {
 			NEXT_ARG();
 			char *endptr;
-			size_t nstates = ARRAY_SIZE(port_states);

 			state = strtol(*argv, &endptr, 10);
 			if (!(**argv != '\0' && *endptr == '\0')) {
-				for (state = 0; state < nstates; state++)
-					if (strcmp(port_states[state], *argv) == 0)
-						break;
-				if (state == nstates) {
+				state = parse_stp_state(*argv);
+				if (state == -1) {
 					fprintf(stderr,
 						"Error: invalid STP port state\n");
 					return -1;
@ -355,6 +400,31 @@ static int brlink_modify(int argc, char **argv)
 			flags |= BRIDGE_FLAGS_SELF;
 		} else if (strcmp(*argv, "master") == 0) {
 			flags |= BRIDGE_FLAGS_MASTER;
+		} else if (strcmp(*argv, "neigh_suppress") == 0) {
+			NEXT_ARG();
+			neigh_suppress = parse_on_off("neigh_suppress", *argv, &ret);
+			if (ret)
+				return ret;
+		} else if (strcmp(*argv, "vlan_tunnel") == 0) {
+			NEXT_ARG();
+			vlan_tunnel = parse_on_off("vlan_tunnel", *argv, &ret);
+			if (ret)
+				return ret;
+		} else if (strcmp(*argv, "isolated") == 0) {
+			NEXT_ARG();
+			isolated = parse_on_off("isolated", *argv, &ret);
+			if (ret)
+				return ret;
+		} else if (strcmp(*argv, "backup_port") == 0) {
+			NEXT_ARG();
+			backup_port_idx = ll_name_to_index(*argv);
+			if (!backup_port_idx) {
+				fprintf(stderr, "Error: device %s does not exist\n",
+					*argv);
+				return -1;
+			}
+		} else if (strcmp(*argv, "nobackup_port") == 0) {
+			backup_port_idx = 0;
 		} else {
 			usage();
 		}
@ -392,6 +462,9 @@ static int brlink_modify(int argc, char **argv)
 	if (mcast_flood >= 0)
 		addattr8(&req.n, sizeof(req), IFLA_BRPORT_MCAST_FLOOD,
 			 mcast_flood);
+	if (mcast_to_unicast >= 0)
+		addattr8(&req.n, sizeof(req), IFLA_BRPORT_MCAST_TO_UCAST,
+			 mcast_to_unicast);
 	if (learning >= 0)
 		addattr8(&req.n, sizeof(req), IFLA_BRPORT_LEARNING, learning);
 	if (learning_sync >= 0)
@ -407,6 +480,19 @@ static int brlink_modify(int argc, char **argv)
 	if (state >= 0)
 		addattr8(&req.n, sizeof(req), IFLA_BRPORT_STATE, state);

+	if (neigh_suppress != -1)
+		addattr8(&req.n, sizeof(req), IFLA_BRPORT_NEIGH_SUPPRESS,
+			 neigh_suppress);
+	if (vlan_tunnel != -1)
+		addattr8(&req.n, sizeof(req), IFLA_BRPORT_VLAN_TUNNEL,
+			 vlan_tunnel);
+	if (isolated != -1)
+		addattr8(&req.n, sizeof(req), IFLA_BRPORT_ISOLATED, isolated);
+
+	if (backup_port_idx != -1)
+		addattr32(&req.n, sizeof(req), IFLA_BRPORT_BACKUP_PORT,
+			  backup_port_idx);
+
 	addattr_nest_end(&req.n, nest);

 	/* IFLA_AF_SPEC nested attribute. Contains IFLA_BRIDGE_FLAGS that
@ -426,7 +512,7 @@ static int brlink_modify(int argc, char **argv)
 		addattr_nest_end(&req.n, nest);
 	}

-	if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
+	if (rtnl_talk(&rth, &req.n, NULL) < 0)
 		return -1;

 	return 0;
@ -447,22 +533,34 @@ static int brlink_show(int argc, char **argv)
 	}

 	if (filter_dev) {
-		if ((filter_index = ll_name_to_index(filter_dev)) == 0) {
-			fprintf(stderr, "Cannot find device \"%s\"\n",
-				filter_dev);
-			return -1;
+		filter_index = ll_name_to_index(filter_dev);
+		if (!filter_index)
+			return nodev(filter_dev);
+	}
+
+	if (show_details) {
+		if (rtnl_linkdump_req_filter(&rth, PF_BRIDGE,
+					     (compress_vlans ?
+					      RTEXT_FILTER_BRVLAN_COMPRESSED :
+					      RTEXT_FILTER_BRVLAN)) < 0) {
+			perror("Cannon send dump request");
+			exit(1);
+		}
+	} else {
+		if (rtnl_linkdump_req(&rth, PF_BRIDGE) < 0) {
+			perror("Cannon send dump request");
+			exit(1);
 		}
 	}

-	if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETLINK) < 0) {
-		perror("Cannon send dump request");
-		exit(1);
-	}
-
+	new_json_obj(json);
 	if (rtnl_dump_filter(&rth, print_linkinfo, stdout) < 0) {
 		fprintf(stderr, "Dump terminated\n");
 		exit(1);
 	}
+
+	delete_json_obj();
+	fflush(stdout);
 	return 0;
 }

--- a/bridge/mdb.c
+++ b/bridge/mdb.c
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Get mdb table with netlink
 */
@ -15,9 +16,10 @@
 #include <arpa/inet.h>

 #include "libnetlink.h"
+#include "utils.h"
 #include "br_common.h"
 #include "rt_names.h"
-#include "utils.h"
+#include "json_print.h"

 #ifndef MDBA_RTA
 #define MDBA_RTA(r) \
@ -28,8 +30,9 @@ static unsigned int filter_index, filter_vlan;

 static void usage(void)
 {
-	fprintf(stderr, "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [permanent | temp] [vid VID]\n");
-	fprintf(stderr, "       bridge mdb {show} [ dev DEV ] [ vid VID ]\n");
+	fprintf(stderr,
+		"Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n"
+		"       bridge mdb {show} [ dev DEV ] [ vid VID ]\n");
 	exit(-1);
 }

@ -38,81 +41,213 @@ static bool is_temp_mcast_rtr(__u8 type)
 	return type == MDB_RTR_TYPE_TEMP_QUERY || type == MDB_RTR_TYPE_TEMP;
 }

-static void __print_router_port_stats(FILE *f, struct rtattr *pattr)
+static const char *format_timer(__u32 ticks, int align)
+{
+	struct timeval tv;
+	static char tbuf[32];
+
+	__jiffies_to_tv(&tv, ticks);
+	if (align)
+		snprintf(tbuf, sizeof(tbuf), "%4lu.%.2lu",
+			 (unsigned long)tv.tv_sec,
+			 (unsigned long)tv.tv_usec / 10000);
+	else
+		snprintf(tbuf, sizeof(tbuf), "%lu.%.2lu",
+			 (unsigned long)tv.tv_sec,
+			 (unsigned long)tv.tv_usec / 10000);
+
+	return tbuf;
+}
+
+void br_print_router_port_stats(struct rtattr *pattr)
 {
 	struct rtattr *tb[MDBA_ROUTER_PATTR_MAX + 1];
-	struct timeval tv;
-	__u8 type;

 	parse_rtattr(tb, MDBA_ROUTER_PATTR_MAX, MDB_RTR_RTA(RTA_DATA(pattr)),
 		     RTA_PAYLOAD(pattr) - RTA_ALIGN(sizeof(uint32_t)));
+
 	if (tb[MDBA_ROUTER_PATTR_TIMER]) {
-		__jiffies_to_tv(&tv,
-				rta_getattr_u32(tb[MDBA_ROUTER_PATTR_TIMER]));
-		fprintf(f, " %4i.%.2i",
-			(int)tv.tv_sec, (int)tv.tv_usec/10000);
+		__u32 timer = rta_getattr_u32(tb[MDBA_ROUTER_PATTR_TIMER]);
+
+		print_string(PRINT_ANY, "timer", " %s",
+			     format_timer(timer, 1));
 	}
+
 	if (tb[MDBA_ROUTER_PATTR_TYPE]) {
-		type = rta_getattr_u8(tb[MDBA_ROUTER_PATTR_TYPE]);
-		fprintf(f, " %s",
-			is_temp_mcast_rtr(type) ? "temp" : "permanent");
+		__u8 type = rta_getattr_u8(tb[MDBA_ROUTER_PATTR_TYPE]);
+
+		print_string(PRINT_ANY, "type", " %s",
+			     is_temp_mcast_rtr(type) ? "temp" : "permanent");
 	}
 }

-static void br_print_router_ports(FILE *f, struct rtattr *attr, __u32 brifidx)
+static void br_print_router_ports(FILE *f, struct rtattr *attr,
+				  const char *brifname)
 {
-	uint32_t *port_ifindex;
+	int rem = RTA_PAYLOAD(attr);
 	struct rtattr *i;
-	int rem;

-	if (!show_stats)
-		fprintf(f, "router ports on %s: ", ll_index_to_name(brifidx));
+	if (is_json_context())
+		open_json_array(PRINT_JSON, brifname);
+	else if (!show_stats)
+		fprintf(f, "router ports on %s: ", brifname);

-	rem = RTA_PAYLOAD(attr);
 	for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
-		port_ifindex = RTA_DATA(i);
-		if (show_stats) {
+		uint32_t *port_ifindex = RTA_DATA(i);
+		const char *port_ifname = ll_index_to_name(*port_ifindex);
+
+		if (is_json_context()) {
+			open_json_object(NULL);
+			print_string(PRINT_JSON, "port", NULL, port_ifname);
+
+			if (show_stats)
+				br_print_router_port_stats(i);
+			close_json_object();
+		} else if (show_stats) {
 			fprintf(f, "router ports on %s: %s",
-				ll_index_to_name(brifidx),
-				ll_index_to_name(*port_ifindex));
-			__print_router_port_stats(f, i);
+				brifname, port_ifname);
+
+			br_print_router_port_stats(i);
 			fprintf(f, "\n");
 		} else {
-			fprintf(f, "%s ", ll_index_to_name(*port_ifindex));
+			fprintf(f, "%s ", port_ifname);
 		}
 	}
+
 	if (!show_stats)
-		fprintf(f, "\n");
+		print_nl();
+
+	close_json_array(PRINT_JSON, NULL);
 }

-static void print_mdb_entry(FILE *f, int ifindex, struct br_mdb_entry *e,
+static void print_src_entry(struct rtattr *src_attr, int af, const char *sep)
+{
+	struct rtattr *stb[MDBA_MDB_SRCATTR_MAX + 1];
+	SPRINT_BUF(abuf);
+	const char *addr;
+	__u32 timer_val;
+
+	parse_rtattr_nested(stb, MDBA_MDB_SRCATTR_MAX, src_attr);
+	if (!stb[MDBA_MDB_SRCATTR_ADDRESS] || !stb[MDBA_MDB_SRCATTR_TIMER])
+		return;
+
+	addr = inet_ntop(af, RTA_DATA(stb[MDBA_MDB_SRCATTR_ADDRESS]), abuf,
+			 sizeof(abuf));
+	if (!addr)
+		return;
+	timer_val = rta_getattr_u32(stb[MDBA_MDB_SRCATTR_TIMER]);
+
+	open_json_object(NULL);
+	print_string(PRINT_FP, NULL, "%s", sep);
+	print_color_string(PRINT_ANY, ifa_family_color(af),
+			   "address", "%s", addr);
+	print_string(PRINT_ANY, "timer", "/%s", format_timer(timer_val, 0));
+	close_json_object();
+}
+
+static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e,
 			    struct nlmsghdr *n, struct rtattr **tb)
 {
+	const void *grp, *src;
+	const char *addr;
 	SPRINT_BUF(abuf);
-	const void *src;
+	const char *dev;
 	int af;

 	if (filter_vlan && e->vid != filter_vlan)
 		return;
-	af = e->addr.proto == htons(ETH_P_IP) ? AF_INET : AF_INET6;
-	src = af == AF_INET ? (const void *)&e->addr.u.ip4 :
-			      (const void *)&e->addr.u.ip6;
-	if (n->nlmsg_type == RTM_DELMDB)
-		fprintf(f, "Deleted ");
-	fprintf(f, "dev %s port %s grp %s %s %s", ll_index_to_name(ifindex),
-		ll_index_to_name(e->ifindex),
-		inet_ntop(af, src, abuf, sizeof(abuf)),
-		(e->state & MDB_PERMANENT) ? "permanent" : "temp",
-		(e->flags & MDB_FLAGS_OFFLOAD) ? "offload" : "");
-	if (e->vid)
-		fprintf(f, " vid %hu", e->vid);
-	if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) {
-		struct timeval tv;

-		__jiffies_to_tv(&tv, rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]));
-		fprintf(f, "%4i.%.2i", (int)tv.tv_sec, (int)tv.tv_usec/10000);
+	if (!e->addr.proto) {
+		af = AF_PACKET;
+		grp = &e->addr.u.mac_addr;
+	} else if (e->addr.proto == htons(ETH_P_IP)) {
+		af = AF_INET;
+		grp = &e->addr.u.ip4;
+	} else {
+		af = AF_INET6;
+		grp = &e->addr.u.ip6;
 	}
-	fprintf(f, "\n");
+	dev = ll_index_to_name(ifindex);
+
+	open_json_object(NULL);
+
+	print_int(PRINT_JSON, "index", NULL, ifindex);
+	print_color_string(PRINT_ANY, COLOR_IFNAME, "dev", "dev %s", dev);
+	print_string(PRINT_ANY, "port", " port %s",
+		     ll_index_to_name(e->ifindex));
+
+	/* The ETH_ALEN argument is ignored for all cases but AF_PACKET */
+	addr = rt_addr_n2a_r(af, ETH_ALEN, grp, abuf, sizeof(abuf));
+	if (!addr)
+		return;
+
+	print_color_string(PRINT_ANY, ifa_family_color(af),
+			    "grp", " grp %s", addr);
+
+	if (tb && tb[MDBA_MDB_EATTR_SOURCE]) {
+		src = (const void *)RTA_DATA(tb[MDBA_MDB_EATTR_SOURCE]);
+		print_color_string(PRINT_ANY, ifa_family_color(af),
+				   "src", " src %s",
+				   inet_ntop(af, src, abuf, sizeof(abuf)));
+	}
+	print_string(PRINT_ANY, "state", " %s",
+			   (e->state & MDB_PERMANENT) ? "permanent" : "temp");
+	if (show_details && tb) {
+		if (tb[MDBA_MDB_EATTR_GROUP_MODE]) {
+			__u8 mode = rta_getattr_u8(tb[MDBA_MDB_EATTR_GROUP_MODE]);
+
+			print_string(PRINT_ANY, "filter_mode", " filter_mode %s",
+				     mode == MCAST_INCLUDE ? "include" :
+							     "exclude");
+		}
+		if (tb[MDBA_MDB_EATTR_SRC_LIST]) {
+			struct rtattr *i, *attr = tb[MDBA_MDB_EATTR_SRC_LIST];
+			const char *sep = " ";
+			int rem;
+
+			open_json_array(PRINT_ANY, is_json_context() ?
+								"source_list" :
+								" source_list");
+			rem = RTA_PAYLOAD(attr);
+			for (i = RTA_DATA(attr); RTA_OK(i, rem);
+			     i = RTA_NEXT(i, rem)) {
+				print_src_entry(i, af, sep);
+				sep = ",";
+			}
+			close_json_array(PRINT_JSON, NULL);
+		}
+		if (tb[MDBA_MDB_EATTR_RTPROT]) {
+			__u8 rtprot = rta_getattr_u8(tb[MDBA_MDB_EATTR_RTPROT]);
+			SPRINT_BUF(rtb);
+
+			print_string(PRINT_ANY, "protocol", " proto %s ",
+				     rtnl_rtprot_n2a(rtprot, rtb, sizeof(rtb)));
+		}
+	}
+
+	open_json_array(PRINT_JSON, "flags");
+	if (e->flags & MDB_FLAGS_OFFLOAD)
+		print_string(PRINT_ANY, NULL, " %s", "offload");
+	if (e->flags & MDB_FLAGS_FAST_LEAVE)
+		print_string(PRINT_ANY, NULL, " %s", "fast_leave");
+	if (e->flags & MDB_FLAGS_STAR_EXCL)
+		print_string(PRINT_ANY, NULL, " %s", "added_by_star_ex");
+	if (e->flags & MDB_FLAGS_BLOCKED)
+		print_string(PRINT_ANY, NULL, " %s", "blocked");
+	close_json_array(PRINT_JSON, NULL);
+
+	if (e->vid)
+		print_uint(PRINT_ANY, "vid", " vid %u", e->vid);
+
+	if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) {
+		__u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]);
+
+		print_string(PRINT_ANY, "timer", " %s",
+			     format_timer(timer, 1));
+	}
+
+	print_nl();
+	close_json_object();
 }

 static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr,
@ -126,21 +261,61 @@ static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr,
 	rem = RTA_PAYLOAD(attr);
 	for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
 		e = RTA_DATA(i);
-		parse_rtattr(etb, MDBA_MDB_EATTR_MAX, MDB_RTA(RTA_DATA(i)),
-			     RTA_PAYLOAD(i) - RTA_ALIGN(sizeof(*e)));
+		parse_rtattr_flags(etb, MDBA_MDB_EATTR_MAX, MDB_RTA(RTA_DATA(i)),
+				   RTA_PAYLOAD(i) - RTA_ALIGN(sizeof(*e)),
+				   NLA_F_NESTED);
 		print_mdb_entry(f, ifindex, e, n, etb);
 	}
 }

-int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+static void print_mdb_entries(FILE *fp, struct nlmsghdr *n,
+			      int ifindex,  struct rtattr *mdb)
+{
+	int rem = RTA_PAYLOAD(mdb);
+	struct rtattr *i;
+
+	for (i = RTA_DATA(mdb); RTA_OK(i, rem); i = RTA_NEXT(i, rem))
+		br_print_mdb_entry(fp, ifindex, i, n);
+}
+
+static void print_router_entries(FILE *fp, struct nlmsghdr *n,
+				 int ifindex, struct rtattr *router)
+{
+	const char *brifname = ll_index_to_name(ifindex);
+
+	if (n->nlmsg_type == RTM_GETMDB) {
+		if (show_details)
+			br_print_router_ports(fp, router, brifname);
+	} else {
+		struct rtattr *i = RTA_DATA(router);
+		uint32_t *port_ifindex = RTA_DATA(i);
+		const char *port_name = ll_index_to_name(*port_ifindex);
+
+		if (is_json_context()) {
+			open_json_array(PRINT_JSON, brifname);
+			open_json_object(NULL);
+
+			print_string(PRINT_JSON, "port", NULL,
+				     port_name);
+			close_json_object();
+			close_json_array(PRINT_JSON, NULL);
+		} else {
+			fprintf(fp, "router port dev %s master %s\n",
+				port_name, brifname);
+		}
+	}
+}
+
+static int __parse_mdb_nlmsg(struct nlmsghdr *n, struct rtattr **tb)
 {
-	FILE *fp = arg;
 	struct br_port_msg *r = NLMSG_DATA(n);
 	int len = n->nlmsg_len;
-	struct rtattr *tb[MDBA_MAX+1], *i;

-	if (n->nlmsg_type != RTM_GETMDB && n->nlmsg_type != RTM_NEWMDB && n->nlmsg_type != RTM_DELMDB) {
-		fprintf(stderr, "Not RTM_GETMDB, RTM_NEWMDB or RTM_DELMDB: %08x %08x %08x\n",
+	if (n->nlmsg_type != RTM_GETMDB &&
+	    n->nlmsg_type != RTM_NEWMDB &&
+	    n->nlmsg_type != RTM_DELMDB) {
+		fprintf(stderr,
+			"Not RTM_GETMDB, RTM_NEWMDB or RTM_DELMDB: %08x %08x %08x\n",
 			n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);

 		return 0;
@ -157,32 +332,62 @@ int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)

 	parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));

-	if (tb[MDBA_MDB]) {
-		int rem = RTA_PAYLOAD(tb[MDBA_MDB]);
+	return 1;
+}

-		for (i = RTA_DATA(tb[MDBA_MDB]); RTA_OK(i, rem); i = RTA_NEXT(i, rem))
-			br_print_mdb_entry(fp, r->ifindex, i, n);
-	}
+static int print_mdbs(struct nlmsghdr *n, void *arg)
+{
+	struct br_port_msg *r = NLMSG_DATA(n);
+	struct rtattr *tb[MDBA_MAX+1];
+	FILE *fp = arg;
+	int ret;

-	if (tb[MDBA_ROUTER]) {
-		if (n->nlmsg_type == RTM_GETMDB) {
-			if (show_details)
-				br_print_router_ports(fp, tb[MDBA_ROUTER],
-						      r->ifindex);
-		} else {
-			uint32_t *port_ifindex;
+	ret = __parse_mdb_nlmsg(n, tb);
+	if (ret != 1)
+		return ret;

-			i = RTA_DATA(tb[MDBA_ROUTER]);
-			port_ifindex = RTA_DATA(i);
-			if (n->nlmsg_type == RTM_DELMDB)
-				fprintf(fp, "Deleted ");
-			fprintf(fp, "router port dev %s master %s\n",
-				ll_index_to_name(*port_ifindex),
-				ll_index_to_name(r->ifindex));
-		}
-	}
+	if (tb[MDBA_MDB])
+		print_mdb_entries(fp, n, r->ifindex, tb[MDBA_MDB]);

-	fflush(fp);
+	return 0;
+}
+
+static int print_rtrs(struct nlmsghdr *n, void *arg)
+{
+	struct br_port_msg *r = NLMSG_DATA(n);
+	struct rtattr *tb[MDBA_MAX+1];
+	FILE *fp = arg;
+	int ret;
+
+	ret = __parse_mdb_nlmsg(n, tb);
+	if (ret != 1)
+		return ret;
+
+	if (tb[MDBA_ROUTER])
+		print_router_entries(fp, n, r->ifindex, tb[MDBA_ROUTER]);
+
+	return 0;
+}
+
+int print_mdb_mon(struct nlmsghdr *n, void *arg)
+{
+	struct br_port_msg *r = NLMSG_DATA(n);
+	struct rtattr *tb[MDBA_MAX+1];
+	FILE *fp = arg;
+	int ret;
+
+	ret = __parse_mdb_nlmsg(n, tb);
+	if (ret != 1)
+		return ret;
+
+	if (n->nlmsg_type == RTM_DELMDB)
+		print_bool(PRINT_ANY, "deleted", "Deleted ", true);
+
+	if (tb[MDBA_MDB])
+		print_mdb_entries(fp, n, r->ifindex, tb[MDBA_MDB]);
+
+	if (tb[MDBA_ROUTER])
+		print_router_entries(fp, n, r->ifindex, tb[MDBA_ROUTER]);

 	return 0;
 }
@ -207,27 +412,66 @@ static int mdb_show(int argc, char **argv)
 	}

 	if (filter_dev) {
-		filter_index = if_nametoindex(filter_dev);
-		if (filter_index == 0) {
-			fprintf(stderr, "Cannot find device \"%s\"\n",
-				filter_dev);
-			return -1;
-		}
+		filter_index = ll_name_to_index(filter_dev);
+		if (!filter_index)
+			return nodev(filter_dev);
 	}

-	if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETMDB) < 0) {
+	new_json_obj(json);
+	open_json_object(NULL);
+
+	/* get mdb entries */
+	if (rtnl_mdbdump_req(&rth, PF_BRIDGE) < 0) {
 		perror("Cannot send dump request");
 		return -1;
 	}

-	if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) {
+	open_json_array(PRINT_JSON, "mdb");
+	if (rtnl_dump_filter(&rth, print_mdbs, stdout) < 0) {
 		fprintf(stderr, "Dump terminated\n");
 		return -1;
 	}
+	close_json_array(PRINT_JSON, NULL);
+
+	/* get router ports */
+	if (rtnl_mdbdump_req(&rth, PF_BRIDGE) < 0) {
+		perror("Cannot send dump request");
+		return -1;
+	}
+
+	open_json_object("router");
+	if (rtnl_dump_filter(&rth, print_rtrs, stdout) < 0) {
+		fprintf(stderr, "Dump terminated\n");
+		return -1;
+	}
+	close_json_object();
+
+	close_json_object();
+	delete_json_obj();
+	fflush(stdout);

 	return 0;
 }

+static int mdb_parse_grp(const char *grp, struct br_mdb_entry *e)
+{
+	if (inet_pton(AF_INET, grp, &e->addr.u.ip4)) {
+		e->addr.proto = htons(ETH_P_IP);
+		return 0;
+	}
+	if (inet_pton(AF_INET6, grp, &e->addr.u.ip6)) {
+		e->addr.proto = htons(ETH_P_IPV6);
+		return 0;
+	}
+	if (ll_addr_a2n((char *)e->addr.u.mac_addr, sizeof(e->addr.u.mac_addr),
+			grp) == ETH_ALEN) {
+		e->addr.proto = 0;
+		return 0;
+	}
+
+	return -1;
+}
+
 static int mdb_modify(int cmd, int flags, int argc, char **argv)
 {
 	struct {
@ -240,8 +484,8 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
 		.n.nlmsg_type = cmd,
 		.bpm.family = PF_BRIDGE,
 	};
+	char *d = NULL, *p = NULL, *grp = NULL, *src = NULL;
 	struct br_mdb_entry entry = {};
-	char *d = NULL, *p = NULL, *grp = NULL;
 	short vid = 0;

 	while (argc > 0) {
@ -262,6 +506,9 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
 		} else if (strcmp(*argv, "vid") == 0) {
 			NEXT_ARG();
 			vid = atoi(*argv);
+		} else if (strcmp(*argv, "src") == 0) {
+			NEXT_ARG();
+			src = *argv;
 		} else {
 			if (matches(*argv, "help") == 0)
 				usage();
@ -275,30 +522,40 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
 	}

 	req.bpm.ifindex = ll_name_to_index(d);
-	if (req.bpm.ifindex == 0) {
-		fprintf(stderr, "Cannot find device \"%s\"\n", d);
-		return -1;
-	}
+	if (!req.bpm.ifindex)
+		return nodev(d);

 	entry.ifindex = ll_name_to_index(p);
-	if (entry.ifindex == 0) {
-		fprintf(stderr, "Cannot find device \"%s\"\n", p);
+	if (!entry.ifindex)
+		return nodev(p);
+
+	if (mdb_parse_grp(grp, &entry)) {
+		fprintf(stderr, "Invalid address \"%s\"\n", grp);
 		return -1;
 	}

-	if (!inet_pton(AF_INET, grp, &entry.addr.u.ip4)) {
-		if (!inet_pton(AF_INET6, grp, &entry.addr.u.ip6)) {
-			fprintf(stderr, "Invalid address \"%s\"\n", grp);
-			return -1;
-		} else
-			entry.addr.proto = htons(ETH_P_IPV6);
-	} else
-		entry.addr.proto = htons(ETH_P_IP);
-
 	entry.vid = vid;
 	addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry));
+	if (src) {
+		struct rtattr *nest = addattr_nest(&req.n, sizeof(req),
+						   MDBA_SET_ENTRY_ATTRS);
+		struct in6_addr src_ip6;
+		__be32 src_ip4;

-	if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
+		nest->rta_type |= NLA_F_NESTED;
+		if (!inet_pton(AF_INET, src, &src_ip4)) {
+			if (!inet_pton(AF_INET6, src, &src_ip6)) {
+				fprintf(stderr, "Invalid source address \"%s\"\n", src);
+				return -1;
+			}
+			addattr_l(&req.n, sizeof(req), MDBE_ATTR_SOURCE, &src_ip6, sizeof(src_ip6));
+		} else {
+			addattr32(&req.n, sizeof(req), MDBE_ATTR_SOURCE, src_ip4);
+		}
+		addattr_nest_end(&req.n, nest);
+	}
+
+	if (rtnl_talk(&rth, &req.n, NULL) < 0)
 		return -1;

 	return 0;
--- a/bridge/monitor.c
+++ b/bridge/monitor.c
@ -27,16 +27,15 @@


 static void usage(void) __attribute__((noreturn));
-int prefix_banner;
+static int prefix_banner;

 static void usage(void)
 {
-	fprintf(stderr, "Usage: bridge monitor [file | link | fdb | mdb | all]\n");
+	fprintf(stderr, "Usage: bridge monitor [file | link | fdb | mdb | vlan | all]\n");
 	exit(-1);
 }

-static int accept_msg(const struct sockaddr_nl *who,
-		      struct rtnl_ctrl_data *ctrl,
+static int accept_msg(struct rtnl_ctrl_data *ctrl,
 		      struct nlmsghdr *n, void *arg)
 {
 	FILE *fp = arg;
@ -50,24 +49,30 @@ static int accept_msg(const struct sockaddr_nl *who,
 		if (prefix_banner)
 			fprintf(fp, "[LINK]");

-		return print_linkinfo(who, n, arg);
+		return print_linkinfo(n, arg);

 	case RTM_NEWNEIGH:
 	case RTM_DELNEIGH:
 		if (prefix_banner)
 			fprintf(fp, "[NEIGH]");
-		return print_fdb(who, n, arg);
+		return print_fdb(n, arg);

 	case RTM_NEWMDB:
 	case RTM_DELMDB:
 		if (prefix_banner)
 			fprintf(fp, "[MDB]");
-		return print_mdb(who, n, arg);
+		return print_mdb_mon(n, arg);

 	case NLMSG_TSTAMP:
 		print_nlmsg_timestamp(fp, n);
 		return 0;

+	case RTM_NEWVLAN:
+	case RTM_DELVLAN:
+		if (prefix_banner)
+			fprintf(fp, "[VLAN]");
+		return print_vlan_rtm(n, arg, true, false);
+
 	default:
 		return 0;
 	}
@ -80,6 +85,7 @@ int do_monitor(int argc, char **argv)
 	int llink = 0;
 	int lneigh = 0;
 	int lmdb = 0;
+	int lvlan = 0;

 	rtnl_close(&rth);

@ -96,8 +102,12 @@ int do_monitor(int argc, char **argv)
 		} else if (matches(*argv, "mdb") == 0) {
 			lmdb = 1;
 			groups = 0;
+		} else if (matches(*argv, "vlan") == 0) {
+			lvlan = 1;
+			groups = 0;
 		} else if (strcmp(*argv, "all") == 0) {
 			groups = ~RTMGRP_TC;
+			lvlan = 1;
 			prefix_banner = 1;
 		} else if (matches(*argv, "help") == 0) {
 			usage();
@ -135,6 +145,12 @@ int do_monitor(int argc, char **argv)

 	if (rtnl_open(&rth, groups) < 0)
 		exit(1);
+
+	if (lvlan && rtnl_add_nl_group(&rth, RTNLGRP_BRVLAN) < 0) {
+		fprintf(stderr, "Failed to add bridge vlan group to list\n");
+		exit(1);
+	}
+
 	ll_init_map(&rth);

 	if (rtnl_listen(&rth, accept_msg, stdout) < 0)
--- a/bridge/vlan.c
+++ b/bridge/vlan.c
--- a/431
+++ b/431
@ -1,38 +1,28 @@
-#! /bin/bash
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
 # This is not an autoconf generated configure
-#
-INCLUDE=${1:-"$PWD/include"}
+
+INCLUDE="$PWD/include"
+PREFIX="/usr"
+LIBDIR="\${prefix}/lib"
+
+# Output file which is input to Makefile
+CONFIG=config.mk

 # Make a temp directory in build tree.
 TMPDIR=$(mktemp -d config.XXXXXX)
 trap 'status=$?; rm -rf $TMPDIR; exit $status' EXIT HUP INT QUIT TERM

-check_prog()
-{
-    echo -n "$2"
-    command -v $1 >/dev/null 2>&1 && (echo "$3:=y" >> Config; echo "yes") || (echo "no"; return 1)
-}
-
-check_docs()
-{
-    if check_prog latex " latex: " HAVE_LATEX; then
-        check_prog pdflatex " pdflatex: " HAVE_PDFLATEX || echo " WARNING: no PDF docs can be built from LaTeX files"
-        check_prog sgml2latex " sgml2latex: " HAVE_SGML2LATEX || echo " WARNING: no LaTeX files can be build from SGML files"
-    else
-        echo " WARNING: no docs can be built from LaTeX files"
-    fi
-
-    check_prog sgml2html " sgml2html: " HAVE_SGML2HTML || echo " WARNING: no HTML docs can be built from SGML"
-}
-
 check_toolchain()
 {
    : ${PKG_CONFIG:=pkg-config}
    : ${AR=ar}
    : ${CC=gcc}
-    echo "PKG_CONFIG:=${PKG_CONFIG}" >>Config
-    echo "AR:=${AR}" >>Config
-    echo "CC:=${CC}" >>Config
+    : ${YACC=bison}
+    echo "PKG_CONFIG:=${PKG_CONFIG}" >>$CONFIG
+    echo "AR:=${AR}" >>$CONFIG
+    echo "CC:=${CC}" >>$CONFIG
+    echo "YACC:=${YACC}" >>$CONFIG
 }

 check_atm()
@ -46,10 +36,8 @@ int main(int argc, char **argv) {
 }
 EOF

-    $CC -I$INCLUDE -o $TMPDIR/atmtest $TMPDIR/atmtest.c -latm >/dev/null 2>&1
-    if [ $? -eq 0 ]
-    then
-	echo "TC_CONFIG_ATM:=y" >>Config
+    if $CC -I$INCLUDE -o $TMPDIR/atmtest $TMPDIR/atmtest.c -latm >/dev/null 2>&1; then
+	echo "TC_CONFIG_ATM:=y" >>$CONFIG
 	echo yes
    else
 	echo no
@ -59,9 +47,8 @@ EOF

 check_xtables()
 {
-	if ! ${PKG_CONFIG} xtables --exists
-	then
-		echo "TC_CONFIG_NO_XT:=y" >>Config
+	if ! ${PKG_CONFIG} xtables --exists; then
+		echo "TC_CONFIG_NO_XT:=y" >>$CONFIG
 	fi
 }

@ -88,9 +75,8 @@ int main(int argc, char **argv)
 EOF

    if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL \
-	$(${PKG_CONFIG} xtables --cflags --libs) -ldl >/dev/null 2>&1
-    then
-	echo "TC_CONFIG_XT:=y" >>Config
+	$(${PKG_CONFIG} xtables --cflags --libs) -ldl >/dev/null 2>&1; then
+	echo "TC_CONFIG_XT:=y" >>$CONFIG
 	echo "using xtables"
    fi
    rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
@ -98,13 +84,10 @@ EOF

 check_xt_old()
 {
-    # bail if previous XT checks has already succeded.
-    if grep -q TC_CONFIG_XT Config
-    then
-	return
-    fi
+    # bail if previous XT checks has already succeeded.
+    grep -q TC_CONFIG_XT $CONFIG && return

-    #check if we dont need our internal header ..
+    #check if we don't need our internal header ..
    cat >$TMPDIR/ipttest.c <<EOF
 #include <xtables.h>
 char *lib_dir;
@ -126,10 +109,8 @@ int main(int argc, char **argv) {

 EOF

-    $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1
-    if [ $? -eq 0 ]
-    then
-	echo "TC_CONFIG_XT_OLD:=y" >>Config
+    if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1; then
+	echo "TC_CONFIG_XT_OLD:=y" >>$CONFIG
 	echo "using old xtables (no need for xt-internal.h)"
    fi
    rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
@ -137,11 +118,8 @@ EOF

 check_xt_old_internal_h()
 {
-    # bail if previous XT checks has already succeded.
-    if grep -q TC_CONFIG_XT Config
-    then
-	return
-    fi
+    # bail if previous XT checks has already succeeded.
+    grep -q TC_CONFIG_XT $CONFIG && return

    #check if we need our own internal.h
    cat >$TMPDIR/ipttest.c <<EOF
@ -165,20 +143,25 @@ int main(int argc, char **argv) {
 }

 EOF
-	$CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1
-
-	if [ $? -eq 0 ]
-	then
+	if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1; then
 	    echo "using old xtables with xt-internal.h"
-	    echo "TC_CONFIG_XT_OLD_H:=y" >>Config
+	    echo "TC_CONFIG_XT_OLD_H:=y" >>$CONFIG
 	fi
 	rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
 }

+check_lib_dir()
+{
+	LIBDIR=$(echo $LIBDIR | sed "s|\${prefix}|$PREFIX|")
+
+	echo -n "lib directory: "
+	echo "$LIBDIR"
+	echo "LIBDIR:=$LIBDIR" >> $CONFIG
+}
+
 check_ipt()
 {
-	if ! grep TC_CONFIG_XT Config > /dev/null
-	then
+	if ! grep TC_CONFIG_XT $CONFIG > /dev/null; then
 		echo "using iptables"
 	fi
 }
@ -188,16 +171,16 @@ check_ipt_lib_dir()
 	IPT_LIB_DIR=$(${PKG_CONFIG} --variable=xtlibdir xtables)
 	if [ -n "$IPT_LIB_DIR" ]; then
 		echo $IPT_LIB_DIR
-		echo "IPT_LIB_DIR:=$IPT_LIB_DIR" >> Config
+		echo "IPT_LIB_DIR:=$IPT_LIB_DIR" >> $CONFIG
 		return
 	fi

-	for dir in /lib /usr/lib /usr/local/lib
-	do
-		for file in $dir/{xtables,iptables}/lib*t_*so ; do
+	for dir in /lib /usr/lib /usr/local/lib; do
+		for file in "xtables" "iptables"; do
+			file="$dir/$file/lib*t_*so"
 			if [ -f $file ]; then
 				echo ${file%/*}
-				echo "IPT_LIB_DIR:=${file%/*}" >> Config
+				echo "IPT_LIB_DIR:=${file%/*}" >> $CONFIG
 				return
 			fi
 		done
@ -215,17 +198,41 @@ int main(int argc, char **argv)
 	return 0;
 }
 EOF
-    $CC -I$INCLUDE -o $TMPDIR/setnstest $TMPDIR/setnstest.c >/dev/null 2>&1
-    if [ $? -eq 0 ]
-    then
-	echo "IP_CONFIG_SETNS:=y" >>Config
+    if $CC -I$INCLUDE -o $TMPDIR/setnstest $TMPDIR/setnstest.c >/dev/null 2>&1; then
+	echo "IP_CONFIG_SETNS:=y" >>$CONFIG
 	echo "yes"
+	echo "CFLAGS += -DHAVE_SETNS" >>$CONFIG
    else
 	echo "no"
    fi
    rm -f $TMPDIR/setnstest.c $TMPDIR/setnstest
 }

+check_name_to_handle_at()
+{
+    cat >$TMPDIR/name_to_handle_at_test.c <<EOF
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+int main(int argc, char **argv)
+{
+	struct file_handle *fhp;
+	int mount_id, flags, dirfd;
+	char *pathname;
+	name_to_handle_at(dirfd, pathname, fhp, &mount_id, flags);
+	return 0;
+}
+EOF
+    if $CC -I$INCLUDE -o $TMPDIR/name_to_handle_at_test $TMPDIR/name_to_handle_at_test.c >/dev/null 2>&1; then
+        echo "yes"
+        echo "CFLAGS += -DHAVE_HANDLE_AT" >>$CONFIG
+    else
+        echo "no"
+    fi
+    rm -f $TMPDIR/name_to_handle_at_test.c $TMPDIR/name_to_handle_at_test
+}
+
 check_ipset()
 {
    cat >$TMPDIR/ipsettest.c <<EOF
@ -237,7 +244,7 @@ typedef unsigned short ip_set_id_t;
 #include <linux/netfilter/xt_set.h>

 struct xt_set_info info;
-#if IPSET_PROTOCOL == 6
+#if IPSET_PROTOCOL == 6 || IPSET_PROTOCOL == 7
 int main(void)
 {
 	return IPSET_MAXNAMELEN;
@ -247,9 +254,8 @@ int main(void)
 #endif
 EOF

-    if $CC -I$INCLUDE -o $TMPDIR/ipsettest $TMPDIR/ipsettest.c >/dev/null 2>&1
-    then
-	echo "TC_CONFIG_IPSET:=y" >>Config
+    if $CC -I$INCLUDE -o $TMPDIR/ipsettest $TMPDIR/ipsettest.c >/dev/null 2>&1; then
+	echo "TC_CONFIG_IPSET:=y" >>$CONFIG
 	echo "yes"
    else
 	echo "no"
@ -259,34 +265,131 @@ EOF

 check_elf()
 {
-    cat >$TMPDIR/elftest.c <<EOF
-#include <libelf.h>
-#include <gelf.h>
-int main(void)
-{
-	Elf_Scn *scn __attribute__((__unused__));
-	GElf_Shdr shdr  __attribute__((__unused__));;
-	return elf_version(EV_CURRENT);
-}
-EOF
-
-    if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1
-    then
-	echo "HAVE_ELF:=y" >>Config
+    if ${PKG_CONFIG} libelf --exists; then
+	echo "HAVE_ELF:=y" >>$CONFIG
 	echo "yes"
+
+	echo 'CFLAGS += -DHAVE_ELF' `${PKG_CONFIG} libelf --cflags` >> $CONFIG
+	echo 'LDLIBS += ' `${PKG_CONFIG} libelf --libs` >>$CONFIG
    else
 	echo "no"
    fi
-    rm -f $TMPDIR/elftest.c $TMPDIR/elftest
+}
+
+have_libbpf_basic()
+{
+    cat >$TMPDIR/libbpf_test.c <<EOF
+#include <bpf/libbpf.h>
+int main(int argc, char **argv) {
+    bpf_program__set_autoload(NULL, false);
+    bpf_map__ifindex(NULL);
+    bpf_map__set_pin_path(NULL, NULL);
+    bpf_object__open_file(NULL, NULL);
+    return 0;
+}
+EOF
+
+    $CC -o $TMPDIR/libbpf_test $TMPDIR/libbpf_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1
+    local ret=$?
+
+    rm -f $TMPDIR/libbpf_test.c $TMPDIR/libbpf_test
+    return $ret
+}
+
+have_libbpf_sec_name()
+{
+    cat >$TMPDIR/libbpf_sec_test.c <<EOF
+#include <bpf/libbpf.h>
+int main(int argc, char **argv) {
+    void *ptr;
+    bpf_program__section_name(NULL);
+    return 0;
+}
+EOF
+
+    $CC -o $TMPDIR/libbpf_sec_test $TMPDIR/libbpf_sec_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1
+    local ret=$?
+
+    rm -f $TMPDIR/libbpf_sec_test.c $TMPDIR/libbpf_sec_test
+    return $ret
+}
+
+check_force_libbpf_on()
+{
+    # if set LIBBPF_FORCE=on but no libbpf support, just exist the config
+    # process to make sure we don't build without libbpf.
+    if [ "$LIBBPF_FORCE" = on ]; then
+        echo "	LIBBPF_FORCE=on set, but couldn't find a usable libbpf"
+        exit 1
+    fi
+}
+
+check_libbpf()
+{
+    # if set LIBBPF_FORCE=off, disable libbpf entirely
+    if [ "$LIBBPF_FORCE" = off ]; then
+        echo "no"
+        return
+    fi
+
+    if ! ${PKG_CONFIG} libbpf --exists && [ -z "$LIBBPF_DIR" ] ; then
+        echo "no"
+        check_force_libbpf_on
+        return
+    fi
+
+    if [ $(uname -m) = x86_64 ]; then
+        local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib64"
+    else
+        local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib"
+    fi
+
+    if [ -n "$LIBBPF_DIR" ]; then
+        LIBBPF_CFLAGS="-I${LIBBPF_DIR}/usr/include"
+        LIBBPF_LDLIBS="${LIBBPF_LIBDIR}/libbpf.a -lz -lelf"
+        LIBBPF_VERSION=$(PKG_CONFIG_LIBDIR=${LIBBPF_LIBDIR}/pkgconfig ${PKG_CONFIG} libbpf --modversion)
+    else
+        LIBBPF_CFLAGS=$(${PKG_CONFIG} libbpf --cflags)
+        LIBBPF_LDLIBS=$(${PKG_CONFIG} libbpf --libs)
+        LIBBPF_VERSION=$(${PKG_CONFIG} libbpf --modversion)
+    fi
+
+    if ! have_libbpf_basic; then
+        echo "no"
+        echo "	libbpf version $LIBBPF_VERSION is too low, please update it to at least 0.1.0"
+        check_force_libbpf_on
+        return
+    else
+        echo "HAVE_LIBBPF:=y" >> $CONFIG
+        echo 'CFLAGS += -DHAVE_LIBBPF ' $LIBBPF_CFLAGS >> $CONFIG
+        echo "CFLAGS += -DLIBBPF_VERSION=\\\"$LIBBPF_VERSION\\\"" >> $CONFIG
+        echo 'LDLIBS += ' $LIBBPF_LDLIBS >> $CONFIG
+
+        if [ -z "$LIBBPF_DIR" ]; then
+            echo "CFLAGS += -DLIBBPF_DYNAMIC" >> $CONFIG
+        fi
+    fi
+
+    # bpf_program__title() is deprecated since libbpf 0.2.0, use
+    # bpf_program__section_name() instead if we support
+    if have_libbpf_sec_name; then
+        echo "HAVE_LIBBPF_SECTION_NAME:=y" >> $CONFIG
+        echo 'CFLAGS += -DHAVE_LIBBPF_SECTION_NAME ' >> $CONFIG
+    fi
+
+    echo "yes"
+    echo "	libbpf version $LIBBPF_VERSION"
 }

 check_selinux()
 # SELinux is a compile time option in the ss utility
 {
-	if ${PKG_CONFIG} libselinux --exists
-	then
-		echo "HAVE_SELINUX:=y" >>Config
+	if ${PKG_CONFIG} libselinux --exists; then
+		echo "HAVE_SELINUX:=y" >>$CONFIG
 		echo "yes"
+
+		echo 'LDLIBS +=' `${PKG_CONFIG} --libs libselinux` >>$CONFIG
+		echo 'CFLAGS += -DHAVE_SELINUX' `${PKG_CONFIG} --cflags libselinux` >>$CONFIG
 	else
 		echo "no"
 	fi
@ -294,10 +397,12 @@ check_selinux()

 check_mnl()
 {
-	if ${PKG_CONFIG} libmnl --exists
-	then
-		echo "HAVE_MNL:=y" >>Config
+	if ${PKG_CONFIG} libmnl --exists; then
+		echo "HAVE_MNL:=y" >>$CONFIG
 		echo "yes"
+
+		echo 'CFLAGS += -DHAVE_LIBMNL' `${PKG_CONFIG} libmnl --cflags` >>$CONFIG
+		echo 'LDLIBS +=' `${PKG_CONFIG} libmnl --libs` >> $CONFIG
 	else
 		echo "no"
 	fi
@ -314,10 +419,8 @@ int main(int argc, char **argv) {
 	return 0;
 }
 EOF
-    $CC -I$INCLUDE -o $TMPDIR/dbtest $TMPDIR/dbtest.c -ldb >/dev/null 2>&1
-    if [ $? -eq 0 ]
-    then
-	echo "HAVE_BERKELEY_DB:=y" >>Config
+    if $CC -I$INCLUDE -o $TMPDIR/dbtest $TMPDIR/dbtest.c -ldb >/dev/null 2>&1; then
+	echo "HAVE_BERKELEY_DB:=y" >>$CONFIG
 	echo "yes"
    else
 	echo "no"
@ -325,6 +428,44 @@ EOF
    rm -f $TMPDIR/dbtest.c $TMPDIR/dbtest
 }

+check_strlcpy()
+{
+    cat >$TMPDIR/strtest.c <<EOF
+#include <string.h>
+int main(int argc, char **argv) {
+	char dst[10];
+	strlcpy(dst, "test", sizeof(dst));
+	return 0;
+}
+EOF
+    if $CC -I$INCLUDE -o $TMPDIR/strtest $TMPDIR/strtest.c >/dev/null 2>&1; then
+	echo "no"
+    else
+	if ${PKG_CONFIG} libbsd --exists; then
+		echo 'CFLAGS += -DHAVE_LIBBSD' `${PKG_CONFIG} libbsd --cflags` >>$CONFIG
+		echo 'LDLIBS +=' `${PKG_CONFIG} libbsd --libs` >> $CONFIG
+		echo "no"
+	else
+		echo 'CFLAGS += -DNEED_STRLCPY' >>$CONFIG
+		echo "yes"
+	fi
+    fi
+    rm -f $TMPDIR/strtest.c $TMPDIR/strtest
+}
+
+check_cap()
+{
+	if ${PKG_CONFIG} libcap --exists; then
+		echo "HAVE_CAP:=y" >>$CONFIG
+		echo "yes"
+
+		echo 'CFLAGS += -DHAVE_LIBCAP' `${PKG_CONFIG} libcap --cflags` >>$CONFIG
+		echo 'LDLIBS +=' `${PKG_CONFIG} libcap --libs` >> $CONFIG
+	else
+		echo "no"
+	fi
+}
+
 quiet_config()
 {
 	cat <<EOF
@ -351,8 +492,78 @@ endif
 EOF
 }

-echo "# Generated config based on" $INCLUDE >Config
-quiet_config >> Config
+usage()
+{
+	cat <<EOF
+Usage: $0 [OPTIONS]
+	--include_dir <dir>		Path to iproute2 include dir
+	--libdir <dir>			Path to iproute2 lib dir
+	--libbpf_dir <dir>		Path to libbpf DESTDIR
+	--libbpf_force <on|off>		Enable/disable libbpf by force. Available options:
+					  on: require link against libbpf, quit config if no libbpf support
+					  off: disable libbpf probing
+	--prefix <dir>			Path prefix of the lib files to install
+	-h | --help			Show this usage info
+EOF
+	exit $1
+}
+
+# Compat with the old INCLUDE path setting method.
+if [ $# -eq 1 ] && [ "$(echo $1 | cut -c 1)" != '-' ]; then
+	INCLUDE="$1"
+else
+	while [ "$#" -gt 0 ]; do
+		case "$1" in
+			--include_dir)
+				shift
+				INCLUDE="$1" ;;
+			--include_dir=*)
+				INCLUDE="${1#*=}" ;;
+			--libdir)
+				shift
+				LIBDIR="$1" ;;
+			--libdir=*)
+				LIBDIR="${1#*=}" ;;
+			--libbpf_dir)
+				shift
+				LIBBPF_DIR="$1" ;;
+			--libbpf_dir=*)
+				LIBBPF_DIR="${1#*=}" ;;
+			--libbpf_force)
+				shift
+				LIBBPF_FORCE="$1" ;;
+			--libbpf_force=*)
+				LIBBPF_FORCE="${1#*=}" ;;
+			--prefix)
+				shift
+				PREFIX="$1" ;;
+			--prefix=*)
+				PREFIX="${1#*=}" ;;
+			-h | --help)
+				usage 0 ;;
+			--*)
+				;;
+			*)
+				usage 1 ;;
+		esac
+		[ "$#" -gt 0 ] && shift
+	done
+fi
+
+[ -d "$INCLUDE" ] || usage 1
+if [ "${LIBBPF_DIR-unused}" != "unused" ]; then
+	[ -d "$LIBBPF_DIR" ] || usage 1
+fi
+if [ "${LIBBPF_FORCE-unused}" != "unused" ]; then
+	if [ "$LIBBPF_FORCE" != 'on' ] && [ "$LIBBPF_FORCE" != 'off' ]; then
+		usage 1
+	fi
+fi
+[ -z "$PREFIX" ] && usage 1
+[ -z "$LIBDIR" ] && usage 1
+
+echo "# Generated config based on" $INCLUDE >$CONFIG
+quiet_config >> $CONFIG

 check_toolchain

@ -362,8 +573,7 @@ echo -n " ATM	"
 check_atm

 check_xtables
-if ! grep -q TC_CONFIG_NO_XT Config
-then
+if ! grep -q TC_CONFIG_NO_XT $CONFIG; then
 	echo -n " IPT	"
 	check_xt
 	check_xt_old
@ -375,8 +585,8 @@ then
 fi

 echo
-if ! grep -q TC_CONFIG_NO_XT Config
-then
+check_lib_dir
+if ! grep -q TC_CONFIG_NO_XT $CONFIG; then
 	echo -n "iptables modules directory: "
 	check_ipt_lib_dir
 fi
@ -384,9 +594,15 @@ fi
 echo -n "libc has setns: "
 check_setns

+echo -n "libc has name_to_handle_at: "
+check_name_to_handle_at
+
 echo -n "SELinux support: "
 check_selinux

+echo -n "libbpf support: "
+check_libbpf
+
 echo -n "ELF support: "
 check_elf

@ -396,11 +612,12 @@ check_mnl
 echo -n "Berkeley DB: "
 check_berkeley_db

-echo
-echo -n "docs:"
-check_docs
-echo
+echo -n "need for strlcpy: "
+check_strlcpy

-echo >> Config
-echo "%.o: %.c" >> Config
-echo '	$(QUIET_CC)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $<' >> Config
+echo -n "libcap support: "
+check_cap
+
+echo >> $CONFIG
+echo "%.o: %.c" >> $CONFIG
+echo '	$(QUIET_CC)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(CPPFLAGS) -c -o $@ $<' >> $CONFIG
--- a/dcb/.gitignore
+++ b/dcb/.gitignore
@ -0,0 +1 @@
+dcb
--- a/dcb/Makefile
+++ b/dcb/Makefile
@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../config.mk
+
+TARGETS :=
+
+ifeq ($(HAVE_MNL),y)
+
+DCBOBJ = dcb.o \
+         dcb_app.o \
+         dcb_buffer.o \
+         dcb_dcbx.o \
+         dcb_ets.o \
+         dcb_maxrate.o \
+         dcb_pfc.o
+TARGETS += dcb
+LDLIBS += -lm
+
+endif
+
+all: $(TARGETS) $(LIBS)
+
+dcb: $(DCBOBJ) $(LIBNETLINK)
+	$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
+
+install: all
+	for i in $(TARGETS); \
+	do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \
+	done
+
+clean:
+	rm -f $(DCBOBJ) $(TARGETS)
--- a/dcb/dcb.c
+++ b/dcb/dcb.c
@ -0,0 +1,611 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+#include <libmnl/libmnl.h>
+#include <getopt.h>
+
+#include "dcb.h"
+#include "mnl_utils.h"
+#include "namespace.h"
+#include "utils.h"
+#include "version.h"
+
+static int dcb_init(struct dcb *dcb)
+{
+	dcb->buf = malloc(MNL_SOCKET_BUFFER_SIZE);
+	if (dcb->buf == NULL) {
+		perror("Netlink buffer allocation");
+		return -1;
+	}
+
+	dcb->nl = mnlu_socket_open(NETLINK_ROUTE);
+	if (dcb->nl == NULL) {
+		perror("Open netlink socket");
+		goto err_socket_open;
+	}
+
+	new_json_obj_plain(dcb->json_output);
+	return 0;
+
+err_socket_open:
+	free(dcb->buf);
+	return -1;
+}
+
+static void dcb_fini(struct dcb *dcb)
+{
+	delete_json_obj_plain();
+	mnl_socket_close(dcb->nl);
+	free(dcb->buf);
+}
+
+static struct dcb *dcb_alloc(void)
+{
+	struct dcb *dcb;
+
+	dcb = calloc(1, sizeof(*dcb));
+	if (!dcb)
+		return NULL;
+	return dcb;
+}
+
+static void dcb_free(struct dcb *dcb)
+{
+	free(dcb);
+}
+
+struct dcb_get_attribute {
+	struct dcb *dcb;
+	int attr;
+	void *payload;
+	__u16 payload_len;
+};
+
+static int dcb_get_attribute_attr_ieee_cb(const struct nlattr *attr, void *data)
+{
+	struct dcb_get_attribute *ga = data;
+
+	if (mnl_attr_get_type(attr) != ga->attr)
+		return MNL_CB_OK;
+
+	ga->payload = mnl_attr_get_payload(attr);
+	ga->payload_len = mnl_attr_get_payload_len(attr);
+	return MNL_CB_STOP;
+}
+
+static int dcb_get_attribute_attr_cb(const struct nlattr *attr, void *data)
+{
+	if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE)
+		return MNL_CB_OK;
+
+	return mnl_attr_parse_nested(attr, dcb_get_attribute_attr_ieee_cb, data);
+}
+
+static int dcb_get_attribute_cb(const struct nlmsghdr *nlh, void *data)
+{
+	return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_get_attribute_attr_cb, data);
+}
+
+static int dcb_get_attribute_bare_cb(const struct nlmsghdr *nlh, void *data)
+{
+	/* Bare attributes (e.g. DCB_ATTR_DCBX) are not wrapped inside an IEEE
+	 * container, so this does not have to go through unpacking in
+	 * dcb_get_attribute_attr_cb().
+	 */
+	return mnl_attr_parse(nlh, sizeof(struct dcbmsg),
+			      dcb_get_attribute_attr_ieee_cb, data);
+}
+
+struct dcb_set_attribute_response {
+	int response_attr;
+};
+
+static int dcb_set_attribute_attr_cb(const struct nlattr *attr, void *data)
+{
+	struct dcb_set_attribute_response *resp = data;
+	uint16_t len;
+	uint8_t err;
+
+	if (mnl_attr_get_type(attr) != resp->response_attr)
+		return MNL_CB_OK;
+
+	len = mnl_attr_get_payload_len(attr);
+	if (len != 1) {
+		fprintf(stderr, "Response attribute expected to have size 1, not %d\n", len);
+		return MNL_CB_ERROR;
+	}
+
+	err = mnl_attr_get_u8(attr);
+	if (err) {
+		fprintf(stderr, "Error when attempting to set attribute: %s\n",
+			strerror(err));
+		return MNL_CB_ERROR;
+	}
+
+	return MNL_CB_STOP;
+}
+
+static int dcb_set_attribute_cb(const struct nlmsghdr *nlh, void *data)
+{
+	return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_set_attribute_attr_cb, data);
+}
+
+static int dcb_talk(struct dcb *dcb, struct nlmsghdr *nlh, mnl_cb_t cb, void *data)
+{
+	int ret;
+
+	ret = mnl_socket_sendto(dcb->nl, nlh, nlh->nlmsg_len);
+	if (ret < 0) {
+		perror("mnl_socket_sendto");
+		return -1;
+	}
+
+	return mnlu_socket_recv_run(dcb->nl, nlh->nlmsg_seq, dcb->buf, MNL_SOCKET_BUFFER_SIZE,
+				    cb, data);
+}
+
+static struct nlmsghdr *dcb_prepare(struct dcb *dcb, const char *dev,
+				    uint32_t nlmsg_type, uint8_t dcb_cmd)
+{
+	struct dcbmsg dcbm = {
+		.cmd = dcb_cmd,
+	};
+	struct nlmsghdr *nlh;
+
+	nlh = mnlu_msg_prepare(dcb->buf, nlmsg_type, NLM_F_REQUEST, &dcbm, sizeof(dcbm));
+	mnl_attr_put_strz(nlh, DCB_ATTR_IFNAME, dev);
+	return nlh;
+}
+
+static int __dcb_get_attribute(struct dcb *dcb, int command,
+			       const char *dev, int attr,
+			       void **payload_p, __u16 *payload_len_p,
+			       int (*get_attribute_cb)(const struct nlmsghdr *nlh,
+						       void *data))
+{
+	struct dcb_get_attribute ga;
+	struct nlmsghdr *nlh;
+	int ret;
+
+	nlh = dcb_prepare(dcb, dev, RTM_GETDCB, command);
+
+	ga = (struct dcb_get_attribute) {
+		.dcb = dcb,
+		.attr = attr,
+		.payload = NULL,
+	};
+	ret = dcb_talk(dcb, nlh, get_attribute_cb, &ga);
+	if (ret) {
+		perror("Attribute read");
+		return ret;
+	}
+	if (ga.payload == NULL) {
+		perror("Attribute not found");
+		return -ENOENT;
+	}
+
+	*payload_p = ga.payload;
+	*payload_len_p = ga.payload_len;
+	return 0;
+}
+
+int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr,
+			 void **payload_p, __u16 *payload_len_p)
+{
+	return __dcb_get_attribute(dcb, DCB_CMD_IEEE_GET, dev, attr,
+				   payload_p, payload_len_p,
+				   dcb_get_attribute_cb);
+}
+
+int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr,
+			   void **payload_p, __u16 *payload_len_p)
+{
+	return __dcb_get_attribute(dcb, cmd, dev, attr,
+				   payload_p, payload_len_p,
+				   dcb_get_attribute_bare_cb);
+}
+
+int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr, void *data, size_t data_len)
+{
+	__u16 payload_len;
+	void *payload;
+	int ret;
+
+	ret = dcb_get_attribute_va(dcb, dev, attr, &payload, &payload_len);
+	if (ret)
+		return ret;
+
+	if (payload_len != data_len) {
+		fprintf(stderr, "Wrong len %d, expected %zd\n", payload_len, data_len);
+		return -EINVAL;
+	}
+
+	memcpy(data, payload, data_len);
+	return 0;
+}
+
+static int __dcb_set_attribute(struct dcb *dcb, int command, const char *dev,
+			       int (*cb)(struct dcb *, struct nlmsghdr *, void *),
+			       void *data, int response_attr)
+{
+	struct dcb_set_attribute_response resp = {
+		.response_attr = response_attr,
+	};
+	struct nlmsghdr *nlh;
+	int ret;
+
+	nlh = dcb_prepare(dcb, dev, RTM_SETDCB, command);
+
+	ret = cb(dcb, nlh, data);
+	if (ret)
+		return ret;
+
+	ret = dcb_talk(dcb, nlh, dcb_set_attribute_cb, &resp);
+	if (ret) {
+		perror("Attribute write");
+		return ret;
+	}
+	return 0;
+}
+
+struct dcb_set_attribute_ieee_cb {
+	int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data);
+	void *data;
+};
+
+static int dcb_set_attribute_ieee_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
+{
+	struct dcb_set_attribute_ieee_cb *ieee_data = data;
+	struct nlattr *nest;
+	int ret;
+
+	nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE);
+	ret = ieee_data->cb(dcb, nlh, ieee_data->data);
+	if (ret)
+		return ret;
+	mnl_attr_nest_end(nlh, nest);
+
+	return 0;
+}
+
+int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev,
+			 int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data),
+			 void *data)
+{
+	struct dcb_set_attribute_ieee_cb ieee_data = {
+		.cb = cb,
+		.data = data,
+	};
+
+	return __dcb_set_attribute(dcb, command, dev,
+				   &dcb_set_attribute_ieee_cb, &ieee_data,
+				   DCB_ATTR_IEEE);
+}
+
+struct dcb_set_attribute {
+	int attr;
+	const void *data;
+	size_t data_len;
+};
+
+static int dcb_set_attribute_put(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
+{
+	struct dcb_set_attribute *dsa = data;
+
+	mnl_attr_put(nlh, dsa->attr, dsa->data_len, dsa->data);
+	return 0;
+}
+
+int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr, const void *data, size_t data_len)
+{
+	struct dcb_set_attribute dsa = {
+		.attr = attr,
+		.data = data,
+		.data_len = data_len,
+	};
+
+	return dcb_set_attribute_va(dcb, DCB_CMD_IEEE_SET, dev,
+				    &dcb_set_attribute_put, &dsa);
+}
+
+int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev,
+			   int attr, const void *data, size_t data_len,
+			   int response_attr)
+{
+	struct dcb_set_attribute dsa = {
+		.attr = attr,
+		.data = data,
+		.data_len = data_len,
+	};
+
+	return __dcb_set_attribute(dcb, command, dev,
+				   &dcb_set_attribute_put, &dsa, response_attr);
+}
+
+void dcb_print_array_u8(const __u8 *array, size_t size)
+{
+	SPRINT_BUF(b);
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%d ", i);
+		print_uint(PRINT_ANY, NULL, b, array[i]);
+	}
+}
+
+void dcb_print_array_u64(const __u64 *array, size_t size)
+{
+	SPRINT_BUF(b);
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%" PRIu64 " ", i);
+		print_u64(PRINT_ANY, NULL, b, array[i]);
+	}
+}
+
+void dcb_print_array_on_off(const __u8 *array, size_t size)
+{
+	SPRINT_BUF(b);
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%s ", i);
+		print_on_off(PRINT_ANY, NULL, b, array[i]);
+	}
+}
+
+void dcb_print_array_kw(const __u8 *array, size_t array_size,
+			const char *const kw[], size_t kw_size)
+{
+	SPRINT_BUF(b);
+	size_t i;
+
+	for (i = 0; i < array_size; i++) {
+		__u8 emt = array[i];
+
+		snprintf(b, sizeof(b), "%zd:%%s ", i);
+		if (emt < kw_size && kw[emt])
+			print_string(PRINT_ANY, NULL, b, kw[emt]);
+		else
+			print_string(PRINT_ANY, NULL, b, "???");
+	}
+}
+
+void dcb_print_named_array(const char *json_name, const char *fp_name,
+			   const __u8 *array, size_t size,
+			   void (*print_array)(const __u8 *, size_t))
+{
+	open_json_array(PRINT_JSON, json_name);
+	print_string(PRINT_FP, NULL, "%s ", fp_name);
+	print_array(array, size);
+	close_json_array(PRINT_JSON, json_name);
+}
+
+int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key,
+		      const char *what_value, __u64 value, __u64 max_value,
+		      void (*set_array)(__u32 index, __u64 value, void *data),
+		      void *set_array_data)
+{
+	bool is_all = key == (__u32) -1;
+
+	if (!is_all && key > max_key) {
+		fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%d\n",
+			what_key, what_value, what_key, max_key);
+		return -EINVAL;
+	}
+
+	if (value > max_value) {
+		fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%llu\n",
+			what_key, what_value, what_value, max_value);
+		return -EINVAL;
+	}
+
+	if (is_all) {
+		for (key = 0; key <= max_key; key++)
+			set_array(key, value, set_array_data);
+	} else {
+		set_array(key, value, set_array_data);
+	}
+
+	return 0;
+}
+
+void dcb_set_u8(__u32 key, __u64 value, void *data)
+{
+	__u8 *array = data;
+
+	array[key] = value;
+}
+
+void dcb_set_u32(__u32 key, __u64 value, void *data)
+{
+	__u32 *array = data;
+
+	array[key] = value;
+}
+
+void dcb_set_u64(__u32 key, __u64 value, void *data)
+{
+	__u64 *array = data;
+
+	array[key] = value;
+}
+
+int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv,
+		      int (*and_then)(struct dcb *dcb, const char *dev,
+				      int argc, char **argv),
+		      void (*help)(void))
+{
+	const char *dev;
+
+	if (!argc || matches(*argv, "help") == 0) {
+		help();
+		return 0;
+	} else if (matches(*argv, "dev") == 0) {
+		NEXT_ARG();
+		dev = *argv;
+		if (check_ifname(dev)) {
+			invarg("not a valid ifname", *argv);
+			return -EINVAL;
+		}
+		NEXT_ARG_FWD();
+		return and_then(dcb, dev, argc, argv);
+	} else {
+		fprintf(stderr, "Expected `dev DEV', not `%s'", *argv);
+		help();
+		return -EINVAL;
+	}
+}
+
+static void dcb_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb [ OPTIONS ] OBJECT { COMMAND | help }\n"
+		"       dcb [ -f | --force ] { -b | --batch } filename [ -n | --netns ] netnsname\n"
+		"where  OBJECT := { app | buffer | dcbx | ets | maxrate | pfc }\n"
+		"       OPTIONS := [ -V | --Version | -i | --iec | -j | --json\n"
+		"                  | -N | --Numeric | -p | --pretty\n"
+		"                  | -s | --statistics | -v | --verbose]\n");
+}
+
+static int dcb_cmd(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_help();
+		return 0;
+	} else if (matches(*argv, "app") == 0) {
+		return dcb_cmd_app(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "buffer") == 0) {
+		return dcb_cmd_buffer(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "dcbx") == 0) {
+		return dcb_cmd_dcbx(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "ets") == 0) {
+		return dcb_cmd_ets(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "maxrate") == 0) {
+		return dcb_cmd_maxrate(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "pfc") == 0) {
+		return dcb_cmd_pfc(dcb, argc - 1, argv + 1);
+	}
+
+	fprintf(stderr, "Object \"%s\" is unknown\n", *argv);
+	return -ENOENT;
+}
+
+static int dcb_batch_cmd(int argc, char *argv[], void *data)
+{
+	struct dcb *dcb = data;
+
+	return dcb_cmd(dcb, argc, argv);
+}
+
+static int dcb_batch(struct dcb *dcb, const char *name, bool force)
+{
+	return do_batch(name, force, dcb_batch_cmd, dcb);
+}
+
+int main(int argc, char **argv)
+{
+	static const struct option long_options[] = {
+		{ "Version",		no_argument,		NULL, 'V' },
+		{ "force",		no_argument,		NULL, 'f' },
+		{ "batch",		required_argument,	NULL, 'b' },
+		{ "iec",		no_argument,		NULL, 'i' },
+		{ "json",		no_argument,		NULL, 'j' },
+		{ "Numeric",		no_argument,		NULL, 'N' },
+		{ "pretty",		no_argument,		NULL, 'p' },
+		{ "statistics",		no_argument,		NULL, 's' },
+		{ "netns",		required_argument,	NULL, 'n' },
+		{ "help",		no_argument,		NULL, 'h' },
+		{ NULL, 0, NULL, 0 }
+	};
+	const char *batch_file = NULL;
+	bool force = false;
+	struct dcb *dcb;
+	int opt;
+	int err;
+	int ret;
+
+	dcb = dcb_alloc();
+	if (!dcb) {
+		fprintf(stderr, "Failed to allocate memory for dcb\n");
+		return EXIT_FAILURE;
+	}
+
+	while ((opt = getopt_long(argc, argv, "b:fhijn:psvNV",
+				  long_options, NULL)) >= 0) {
+
+		switch (opt) {
+		case 'V':
+			printf("dcb utility, iproute2-%s\n", version);
+			ret = EXIT_SUCCESS;
+			goto dcb_free;
+		case 'f':
+			force = true;
+			break;
+		case 'b':
+			batch_file = optarg;
+			break;
+		case 'j':
+			dcb->json_output = true;
+			break;
+		case 'N':
+			dcb->numeric = true;
+			break;
+		case 'p':
+			pretty = true;
+			break;
+		case 's':
+			dcb->stats = true;
+			break;
+		case 'n':
+			if (netns_switch(optarg)) {
+				ret = EXIT_FAILURE;
+				goto dcb_free;
+			}
+			break;
+		case 'i':
+			dcb->use_iec = true;
+			break;
+		case 'h':
+			dcb_help();
+			ret = EXIT_SUCCESS;
+			goto dcb_free;
+		default:
+			fprintf(stderr, "Unknown option.\n");
+			dcb_help();
+			ret = EXIT_FAILURE;
+			goto dcb_free;
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	err = dcb_init(dcb);
+	if (err) {
+		ret = EXIT_FAILURE;
+		goto dcb_free;
+	}
+
+	if (batch_file)
+		err = dcb_batch(dcb, batch_file, force);
+	else
+		err = dcb_cmd(dcb, argc, argv);
+
+	if (err) {
+		ret = EXIT_FAILURE;
+		goto dcb_fini;
+	}
+
+	ret = EXIT_SUCCESS;
+
+dcb_fini:
+	dcb_fini(dcb);
+dcb_free:
+	dcb_free(dcb);
+
+	return ret;
+}
--- a/dcb/dcb.h
+++ b/dcb/dcb.h
@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DCB_H__
+#define __DCB_H__ 1
+
+#include <libmnl/libmnl.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+/* dcb.c */
+
+struct dcb {
+	char *buf;
+	struct mnl_socket *nl;
+	bool json_output;
+	bool stats;
+	bool use_iec;
+	bool numeric;
+};
+
+int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key,
+		      const char *what_value, __u64 value, __u64 max_value,
+		      void (*set_array)(__u32 index, __u64 value, void *data),
+		      void *set_array_data);
+int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv,
+		      int (*and_then)(struct dcb *dcb, const char *dev,
+				      int argc, char **argv),
+		      void (*help)(void));
+
+void dcb_set_u8(__u32 key, __u64 value, void *data);
+void dcb_set_u32(__u32 key, __u64 value, void *data);
+void dcb_set_u64(__u32 key, __u64 value, void *data);
+
+int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr,
+		      void *data, size_t data_len);
+int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr,
+		      const void *data, size_t data_len);
+int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr,
+			 void **payload_p, __u16 *payload_len_p);
+int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev,
+			 int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data),
+			 void *data);
+int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr,
+			   void **payload_p, __u16 *payload_len_p);
+int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev,
+			   int attr, const void *data, size_t data_len,
+			   int response_attr);
+
+void dcb_print_named_array(const char *json_name, const char *fp_name,
+			   const __u8 *array, size_t size,
+			   void (*print_array)(const __u8 *, size_t));
+void dcb_print_array_u8(const __u8 *array, size_t size);
+void dcb_print_array_u64(const __u64 *array, size_t size);
+void dcb_print_array_on_off(const __u8 *array, size_t size);
+void dcb_print_array_kw(const __u8 *array, size_t array_size,
+			const char *const kw[], size_t kw_size);
+
+/* dcb_app.c */
+
+int dcb_cmd_app(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_buffer.c */
+
+int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_dcbx.c */
+
+int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_ets.c */
+
+int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_maxrate.c */
+
+int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_pfc.c */
+
+int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv);
+
+#endif /* __DCB_H__ */
--- a/dcb/dcb_app.c
+++ b/dcb/dcb_app.c
@ -0,0 +1,795 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <libmnl/libmnl.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+#include "rt_names.h"
+
+static void dcb_app_help_add(void)
+{
+	fprintf(stderr,
+		"Usage: dcb app { add | del | replace } dev STRING\n"
+		"           [ default-prio PRIO ]\n"
+		"           [ ethtype-prio ET:PRIO ]\n"
+		"           [ stream-port-prio PORT:PRIO ]\n"
+		"           [ dgram-port-prio PORT:PRIO ]\n"
+		"           [ port-prio PORT:PRIO ]\n"
+		"           [ dscp-prio INTEGER:PRIO ]\n"
+		"\n"
+		" where PRIO := { 0 .. 7 }\n"
+		"       ET := { 0x600 .. 0xffff }\n"
+		"       PORT := { 1 .. 65535 }\n"
+		"       DSCP := { 0 .. 63 }\n"
+		"\n"
+	);
+}
+
+static void dcb_app_help_show_flush(void)
+{
+	fprintf(stderr,
+		"Usage: dcb app { show | flush } dev STRING\n"
+		"           [ default-prio ]\n"
+		"           [ ethtype-prio ]\n"
+		"           [ stream-port-prio ]\n"
+		"           [ dgram-port-prio ]\n"
+		"           [ port-prio ]\n"
+		"           [ dscp-prio ]\n"
+		"\n"
+	);
+}
+
+static void dcb_app_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb app help\n"
+		"\n"
+	);
+	dcb_app_help_show_flush();
+	dcb_app_help_add();
+}
+
+struct dcb_app_table {
+	struct dcb_app *apps;
+	size_t n_apps;
+};
+
+static void dcb_app_table_fini(struct dcb_app_table *tab)
+{
+	free(tab->apps);
+}
+
+static int dcb_app_table_push(struct dcb_app_table *tab, struct dcb_app *app)
+{
+	struct dcb_app *apps = realloc(tab->apps, (tab->n_apps + 1) * sizeof(*tab->apps));
+
+	if (apps == NULL) {
+		perror("Cannot allocate APP table");
+		return -ENOMEM;
+	}
+
+	tab->apps = apps;
+	tab->apps[tab->n_apps++] = *app;
+	return 0;
+}
+
+static void dcb_app_table_remove_existing(struct dcb_app_table *a,
+					  const struct dcb_app_table *b)
+{
+	size_t ia, ja;
+	size_t ib;
+
+	for (ia = 0, ja = 0; ia < a->n_apps; ia++) {
+		struct dcb_app *aa = &a->apps[ia];
+		bool found = false;
+
+		for (ib = 0; ib < b->n_apps; ib++) {
+			const struct dcb_app *ab = &b->apps[ib];
+
+			if (aa->selector == ab->selector &&
+			    aa->protocol == ab->protocol &&
+			    aa->priority == ab->priority) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			a->apps[ja++] = *aa;
+	}
+
+	a->n_apps = ja;
+}
+
+static void dcb_app_table_remove_replaced(struct dcb_app_table *a,
+					  const struct dcb_app_table *b)
+{
+	size_t ia, ja;
+	size_t ib;
+
+	for (ia = 0, ja = 0; ia < a->n_apps; ia++) {
+		struct dcb_app *aa = &a->apps[ia];
+		bool present = false;
+		bool found = false;
+
+		for (ib = 0; ib < b->n_apps; ib++) {
+			const struct dcb_app *ab = &b->apps[ib];
+
+			if (aa->selector == ab->selector &&
+			    aa->protocol == ab->protocol)
+				present = true;
+			else
+				continue;
+
+			if (aa->priority == ab->priority) {
+				found = true;
+				break;
+			}
+		}
+
+		/* Entries that remain in A will be removed, so keep in the
+		 * table only APP entries whose sel/pid is mentioned in B,
+		 * but that do not have the full sel/pid/prio match.
+		 */
+		if (present && !found)
+			a->apps[ja++] = *aa;
+	}
+
+	a->n_apps = ja;
+}
+
+static int dcb_app_table_copy(struct dcb_app_table *a,
+			      const struct dcb_app_table *b)
+{
+	size_t i;
+	int ret;
+
+	for (i = 0; i < b->n_apps; i++) {
+		ret = dcb_app_table_push(a, &b->apps[i]);
+		if (ret != 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int dcb_app_cmp(const struct dcb_app *a, const struct dcb_app *b)
+{
+	if (a->protocol < b->protocol)
+		return -1;
+	if (a->protocol > b->protocol)
+		return 1;
+	return a->priority - b->priority;
+}
+
+static int dcb_app_cmp_cb(const void *a, const void *b)
+{
+	return dcb_app_cmp(a, b);
+}
+
+static void dcb_app_table_sort(struct dcb_app_table *tab)
+{
+	qsort(tab->apps, tab->n_apps, sizeof(*tab->apps), dcb_app_cmp_cb);
+}
+
+struct dcb_app_parse_mapping {
+	__u8 selector;
+	struct dcb_app_table *tab;
+	int err;
+};
+
+static void dcb_app_parse_mapping_cb(__u32 key, __u64 value, void *data)
+{
+	struct dcb_app_parse_mapping *pm = data;
+	struct dcb_app app = {
+		.selector = pm->selector,
+		.priority = value,
+		.protocol = key,
+	};
+
+	if (pm->err)
+		return;
+
+	pm->err = dcb_app_table_push(pm->tab, &app);
+}
+
+static int dcb_app_parse_mapping_ethtype_prio(__u32 key, char *value, void *data)
+{
+	__u8 prio;
+
+	if (key < 0x600) {
+		fprintf(stderr, "Protocol IDs < 0x600 are reserved for EtherType\n");
+		return -EINVAL;
+	}
+
+	if (get_u8(&prio, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("ETHTYPE", key, 0xffff,
+				 "PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
+				 dcb_app_parse_mapping_cb, data);
+}
+
+static int dcb_app_parse_dscp(__u32 *key, const char *arg)
+{
+	if (parse_mapping_num_all(key, arg) == 0)
+		return 0;
+
+	if (rtnl_dsfield_a2n(key, arg) != 0)
+		return -1;
+
+	if (*key & 0x03) {
+		fprintf(stderr, "The values `%s' uses non-DSCP bits.\n", arg);
+		return -1;
+	}
+
+	/* Unshift the value to convert it from dsfield to DSCP. */
+	*key >>= 2;
+	return 0;
+}
+
+static int dcb_app_parse_mapping_dscp_prio(__u32 key, char *value, void *data)
+{
+	__u8 prio;
+
+	if (get_u8(&prio, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("DSCP", key, 63,
+				 "PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
+				 dcb_app_parse_mapping_cb, data);
+}
+
+static int dcb_app_parse_mapping_port_prio(__u32 key, char *value, void *data)
+{
+	__u8 prio;
+
+	if (key == 0) {
+		fprintf(stderr, "Port ID of 0 is invalid\n");
+		return -EINVAL;
+	}
+
+	if (get_u8(&prio, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("PORT", key, 0xffff,
+				 "PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
+				 dcb_app_parse_mapping_cb, data);
+}
+
+static int dcb_app_parse_default_prio(int *argcp, char ***argvp, struct dcb_app_table *tab)
+{
+	int argc = *argcp;
+	char **argv = *argvp;
+	int ret = 0;
+
+	while (argc > 0) {
+		struct dcb_app app;
+		__u8 prio;
+
+		if (get_u8(&prio, *argv, 0)) {
+			ret = 1;
+			break;
+		}
+
+		app = (struct dcb_app){
+			.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE,
+			.protocol = 0,
+			.priority = prio,
+		};
+		ret = dcb_app_table_push(tab, &app);
+		if (ret != 0)
+			break;
+
+		argc--, argv++;
+	}
+
+	*argcp = argc;
+	*argvp = argv;
+	return ret;
+}
+
+static bool dcb_app_is_ethtype(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+	       app->protocol != 0;
+}
+
+static bool dcb_app_is_default(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+	       app->protocol == 0;
+}
+
+static bool dcb_app_is_dscp(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_DSCP;
+}
+
+static bool dcb_app_is_stream_port(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_STREAM;
+}
+
+static bool dcb_app_is_dgram_port(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_DGRAM;
+}
+
+static bool dcb_app_is_port(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_ANY;
+}
+
+static int dcb_app_print_key_dec(__u16 protocol)
+{
+	return print_uint(PRINT_ANY, NULL, "%d:", protocol);
+}
+
+static int dcb_app_print_key_hex(__u16 protocol)
+{
+	return print_uint(PRINT_ANY, NULL, "%x:", protocol);
+}
+
+static int dcb_app_print_key_dscp(__u16 protocol)
+{
+	const char *name = rtnl_dsfield_get_name(protocol << 2);
+
+
+	if (!is_json_context() && name != NULL)
+		return print_string(PRINT_FP, NULL, "%s:", name);
+	return print_uint(PRINT_ANY, NULL, "%d:", protocol);
+}
+
+static void dcb_app_print_filtered(const struct dcb_app_table *tab,
+				   bool (*filter)(const struct dcb_app *),
+				   int (*print_key)(__u16 protocol),
+				   const char *json_name,
+				   const char *fp_name)
+{
+	bool first = true;
+	size_t i;
+
+	for (i = 0; i < tab->n_apps; i++) {
+		struct dcb_app *app = &tab->apps[i];
+
+		if (!filter(app))
+			continue;
+		if (first) {
+			open_json_array(PRINT_JSON, json_name);
+			print_string(PRINT_FP, NULL, "%s ", fp_name);
+			first = false;
+		}
+
+		open_json_array(PRINT_JSON, NULL);
+		print_key(app->protocol);
+		print_uint(PRINT_ANY, NULL, "%d ", app->priority);
+		close_json_array(PRINT_JSON, NULL);
+	}
+
+	if (!first) {
+		close_json_array(PRINT_JSON, json_name);
+		print_nl();
+	}
+}
+
+static void dcb_app_print_ethtype_prio(const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_ethtype,  dcb_app_print_key_hex,
+			       "ethtype_prio", "ethtype-prio");
+}
+
+static void dcb_app_print_dscp_prio(const struct dcb *dcb,
+				    const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_dscp,
+			       dcb->numeric ? dcb_app_print_key_dec
+					    : dcb_app_print_key_dscp,
+			       "dscp_prio", "dscp-prio");
+}
+
+static void dcb_app_print_stream_port_prio(const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_stream_port, dcb_app_print_key_dec,
+			       "stream_port_prio", "stream-port-prio");
+}
+
+static void dcb_app_print_dgram_port_prio(const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_dgram_port, dcb_app_print_key_dec,
+			       "dgram_port_prio", "dgram-port-prio");
+}
+
+static void dcb_app_print_port_prio(const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_port, dcb_app_print_key_dec,
+			       "port_prio", "port-prio");
+}
+
+static void dcb_app_print_default_prio(const struct dcb_app_table *tab)
+{
+	bool first = true;
+	size_t i;
+
+	for (i = 0; i < tab->n_apps; i++) {
+		if (!dcb_app_is_default(&tab->apps[i]))
+			continue;
+		if (first) {
+			open_json_array(PRINT_JSON, "default_prio");
+			print_string(PRINT_FP, NULL, "default-prio ", NULL);
+			first = false;
+		}
+		print_uint(PRINT_ANY, NULL, "%d ", tab->apps[i].priority);
+	}
+
+	if (!first) {
+		close_json_array(PRINT_JSON, "default_prio");
+		print_nl();
+	}
+}
+
+static void dcb_app_print(const struct dcb *dcb, const struct dcb_app_table *tab)
+{
+	dcb_app_print_ethtype_prio(tab);
+	dcb_app_print_default_prio(tab);
+	dcb_app_print_dscp_prio(dcb, tab);
+	dcb_app_print_stream_port_prio(tab);
+	dcb_app_print_dgram_port_prio(tab);
+	dcb_app_print_port_prio(tab);
+}
+
+static int dcb_app_get_table_attr_cb(const struct nlattr *attr, void *data)
+{
+	struct dcb_app_table *tab = data;
+	struct dcb_app *app;
+	int ret;
+
+	if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE_APP) {
+		fprintf(stderr, "Unknown attribute in DCB_ATTR_IEEE_APP_TABLE: %d\n",
+			mnl_attr_get_type(attr));
+		return MNL_CB_OK;
+	}
+	if (mnl_attr_get_payload_len(attr) < sizeof(struct dcb_app)) {
+		fprintf(stderr, "DCB_ATTR_IEEE_APP payload expected to have size %zd, not %d\n",
+			sizeof(struct dcb_app), mnl_attr_get_payload_len(attr));
+		return MNL_CB_OK;
+	}
+
+	app = mnl_attr_get_payload(attr);
+	ret = dcb_app_table_push(tab, app);
+	if (ret != 0)
+		return MNL_CB_ERROR;
+
+	return MNL_CB_OK;
+}
+
+static int dcb_app_get(struct dcb *dcb, const char *dev, struct dcb_app_table *tab)
+{
+	uint16_t payload_len;
+	void *payload;
+	int ret;
+
+	ret = dcb_get_attribute_va(dcb, dev, DCB_ATTR_IEEE_APP_TABLE, &payload, &payload_len);
+	if (ret != 0)
+		return ret;
+
+	ret = mnl_attr_parse_payload(payload, payload_len, dcb_app_get_table_attr_cb, tab);
+	if (ret != MNL_CB_OK)
+		return -EINVAL;
+
+	return 0;
+}
+
+struct dcb_app_add_del {
+	const struct dcb_app_table *tab;
+	bool (*filter)(const struct dcb_app *app);
+};
+
+static int dcb_app_add_del_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
+{
+	struct dcb_app_add_del *add_del = data;
+	struct nlattr *nest;
+	size_t i;
+
+	nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE_APP_TABLE);
+
+	for (i = 0; i < add_del->tab->n_apps; i++) {
+		const struct dcb_app *app = &add_del->tab->apps[i];
+
+		if (add_del->filter == NULL || add_del->filter(app))
+			mnl_attr_put(nlh, DCB_ATTR_IEEE_APP, sizeof(*app), app);
+	}
+
+	mnl_attr_nest_end(nlh, nest);
+	return 0;
+}
+
+static int dcb_app_add_del(struct dcb *dcb, const char *dev, int command,
+			   const struct dcb_app_table *tab,
+			   bool (*filter)(const struct dcb_app *))
+{
+	struct dcb_app_add_del add_del = {
+		.tab = tab,
+		.filter = filter,
+	};
+
+	if (tab->n_apps == 0)
+		return 0;
+
+	return dcb_set_attribute_va(dcb, command, dev, dcb_app_add_del_cb, &add_del);
+}
+
+static int dcb_cmd_app_parse_add_del(struct dcb *dcb, const char *dev,
+				     int argc, char **argv, struct dcb_app_table *tab)
+{
+	struct dcb_app_parse_mapping pm = {
+		.tab = tab,
+	};
+	int ret;
+
+	if (!argc) {
+		dcb_app_help_add();
+		return 0;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_app_help_add();
+			return 0;
+		} else if (matches(*argv, "ethtype-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
+			ret = parse_mapping(&argc, &argv, false,
+					    &dcb_app_parse_mapping_ethtype_prio,
+					    &pm);
+		} else if (matches(*argv, "default-prio") == 0) {
+			NEXT_ARG();
+			ret = dcb_app_parse_default_prio(&argc, &argv, pm.tab);
+			if (ret != 0) {
+				fprintf(stderr, "Invalid default priority %s\n", *argv);
+				return ret;
+			}
+		} else if (matches(*argv, "dscp-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+			ret = parse_mapping_gen(&argc, &argv,
+						&dcb_app_parse_dscp,
+						&dcb_app_parse_mapping_dscp_prio,
+						&pm);
+		} else if (matches(*argv, "stream-port-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_STREAM;
+			ret = parse_mapping(&argc, &argv, false,
+					    &dcb_app_parse_mapping_port_prio,
+					    &pm);
+		} else if (matches(*argv, "dgram-port-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
+			ret = parse_mapping(&argc, &argv, false,
+					    &dcb_app_parse_mapping_port_prio,
+					    &pm);
+		} else if (matches(*argv, "port-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_ANY;
+			ret = parse_mapping(&argc, &argv, false,
+					    &dcb_app_parse_mapping_port_prio,
+					    &pm);
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_app_help_add();
+			return -EINVAL;
+		}
+
+		if (ret != 0) {
+			fprintf(stderr, "Invalid mapping %s\n", *argv);
+			return ret;
+		}
+		if (pm.err)
+			return pm.err;
+	} while (argc > 0);
+
+	return 0;
+}
+
+static int dcb_cmd_app_add(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table tab = {};
+	int ret;
+
+	ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
+	if (ret != 0)
+		return ret;
+
+	ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &tab, NULL);
+	dcb_app_table_fini(&tab);
+	return ret;
+}
+
+static int dcb_cmd_app_del(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table tab = {};
+	int ret;
+
+	ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
+	if (ret != 0)
+		return ret;
+
+	ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL);
+	dcb_app_table_fini(&tab);
+	return ret;
+}
+
+static int dcb_cmd_app_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table tab = {};
+	int ret;
+
+	ret = dcb_app_get(dcb, dev, &tab);
+	if (ret != 0)
+		return ret;
+
+	dcb_app_table_sort(&tab);
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_app_print(dcb, &tab);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_app_help_show_flush();
+			goto out;
+		} else if (matches(*argv, "ethtype-prio") == 0) {
+			dcb_app_print_ethtype_prio(&tab);
+		} else if (matches(*argv, "dscp-prio") == 0) {
+			dcb_app_print_dscp_prio(dcb, &tab);
+		} else if (matches(*argv, "stream-port-prio") == 0) {
+			dcb_app_print_stream_port_prio(&tab);
+		} else if (matches(*argv, "dgram-port-prio") == 0) {
+			dcb_app_print_dgram_port_prio(&tab);
+		} else if (matches(*argv, "port-prio") == 0) {
+			dcb_app_print_port_prio(&tab);
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_app_help_show_flush();
+			ret = -EINVAL;
+			goto out;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	dcb_app_table_fini(&tab);
+	return ret;
+}
+
+static int dcb_cmd_app_flush(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table tab = {};
+	int ret;
+
+	ret = dcb_app_get(dcb, dev, &tab);
+	if (ret != 0)
+		return ret;
+
+	if (!argc) {
+		ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_app_help_show_flush();
+			goto out;
+		} else if (matches(*argv, "ethtype-prio") == 0) {
+			ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
+					      &dcb_app_is_ethtype);
+			if (ret != 0)
+				goto out;
+		} else if (matches(*argv, "default-prio") == 0) {
+			ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
+					      &dcb_app_is_default);
+			if (ret != 0)
+				goto out;
+		} else if (matches(*argv, "dscp-prio") == 0) {
+			ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
+					      &dcb_app_is_dscp);
+			if (ret != 0)
+				goto out;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_app_help_show_flush();
+			ret = -EINVAL;
+			goto out;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	dcb_app_table_fini(&tab);
+	return ret;
+}
+
+static int dcb_cmd_app_replace(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table orig = {};
+	struct dcb_app_table tab = {};
+	struct dcb_app_table new = {};
+	int ret;
+
+	ret = dcb_app_get(dcb, dev, &orig);
+	if (ret != 0)
+		return ret;
+
+	ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
+	if (ret != 0)
+		goto out;
+
+	/* Attempts to add an existing entry would be rejected, so drop
+	 * these entries from tab.
+	 */
+	ret = dcb_app_table_copy(&new, &tab);
+	if (ret != 0)
+		goto out;
+	dcb_app_table_remove_existing(&new, &orig);
+
+	ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &new, NULL);
+	if (ret != 0) {
+		fprintf(stderr, "Could not add new APP entries\n");
+		goto out;
+	}
+
+	/* Remove the obsolete entries. */
+	dcb_app_table_remove_replaced(&orig, &tab);
+	ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &orig, NULL);
+	if (ret != 0) {
+		fprintf(stderr, "Could not remove replaced APP entries\n");
+		goto out;
+	}
+
+out:
+	dcb_app_table_fini(&new);
+	dcb_app_table_fini(&tab);
+	dcb_app_table_fini(&orig);
+	return 0;
+}
+
+int dcb_cmd_app(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_app_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_show, dcb_app_help_show_flush);
+	} else if (matches(*argv, "flush") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_flush, dcb_app_help_show_flush);
+	} else if (matches(*argv, "add") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_add, dcb_app_help_add);
+	} else if (matches(*argv, "del") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_del, dcb_app_help_add);
+	} else if (matches(*argv, "replace") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_replace, dcb_app_help_add);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_app_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_buffer.c
+++ b/dcb/dcb_buffer.c
@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_buffer_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb buffer set dev STRING\n"
+		"           [ prio-buffer PRIO-MAP ]\n"
+		"           [ buffer-size SIZE-MAP ]\n"
+		"\n"
+		" where PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n"
+		"       PRIO-MAPPING := { all | PRIO }:BUFFER\n"
+		"       SIZE-MAP := [ SIZE-MAP ] SIZE-MAPPING\n"
+		"       SIZE-MAPPING := { all | BUFFER }:INTEGER\n"
+		"       PRIO := { 0 .. 7 }\n"
+		"       BUFFER := { 0 .. 7 }\n"
+		"\n"
+	);
+}
+
+static void dcb_buffer_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb buffer show dev STRING\n"
+		"           [ prio-buffer ] [ buffer-size ] [ total-size ]\n"
+		"\n"
+	);
+}
+
+static void dcb_buffer_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb buffer help\n"
+		"\n"
+	);
+	dcb_buffer_help_show();
+	dcb_buffer_help_set();
+}
+
+static int dcb_buffer_parse_mapping_prio_buffer(__u32 key, char *value, void *data)
+{
+	struct dcbnl_buffer *buffer = data;
+	__u8 buf;
+
+	if (get_u8(&buf, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("PRIO", key, IEEE_8021Q_MAX_PRIORITIES - 1,
+				 "BUFFER", buf, DCBX_MAX_BUFFERS - 1,
+				 dcb_set_u8, buffer->prio2buffer);
+}
+
+static int dcb_buffer_parse_mapping_buffer_size(__u32 key, char *value, void *data)
+{
+	struct dcbnl_buffer *buffer = data;
+	unsigned int size;
+
+	if (get_size(&size, value)) {
+		fprintf(stderr, "%d:%s: Illegal value for buffer size\n", key, value);
+		return -EINVAL;
+	}
+
+	return dcb_parse_mapping("BUFFER", key, DCBX_MAX_BUFFERS - 1,
+				 "INTEGER", size, -1,
+				 dcb_set_u32, buffer->buffer_size);
+}
+
+static void dcb_buffer_print_total_size(const struct dcbnl_buffer *buffer)
+{
+	print_size(PRINT_ANY, "total_size", "total-size %s ", buffer->total_size);
+}
+
+static void dcb_buffer_print_prio_buffer(const struct dcbnl_buffer *buffer)
+{
+	dcb_print_named_array("prio_buffer", "prio-buffer",
+			      buffer->prio2buffer, ARRAY_SIZE(buffer->prio2buffer),
+			      dcb_print_array_u8);
+}
+
+static void dcb_buffer_print_buffer_size(const struct dcbnl_buffer *buffer)
+{
+	size_t size = ARRAY_SIZE(buffer->buffer_size);
+	SPRINT_BUF(b);
+	size_t i;
+
+	open_json_array(PRINT_JSON, "buffer_size");
+	print_string(PRINT_FP, NULL, "buffer-size ", NULL);
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%s ", i);
+		print_size(PRINT_ANY, NULL, b, buffer->buffer_size[i]);
+	}
+
+	close_json_array(PRINT_JSON, "buffer_size");
+}
+
+static void dcb_buffer_print(const struct dcbnl_buffer *buffer)
+{
+	dcb_buffer_print_prio_buffer(buffer);
+	print_nl();
+
+	dcb_buffer_print_buffer_size(buffer);
+	print_nl();
+
+	dcb_buffer_print_total_size(buffer);
+	print_nl();
+}
+
+static int dcb_buffer_get(struct dcb *dcb, const char *dev, struct dcbnl_buffer *buffer)
+{
+	return dcb_get_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer));
+}
+
+static int dcb_buffer_set(struct dcb *dcb, const char *dev, const struct dcbnl_buffer *buffer)
+{
+	return dcb_set_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer));
+}
+
+static int dcb_cmd_buffer_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcbnl_buffer buffer;
+	int ret;
+
+	if (!argc) {
+		dcb_buffer_help_set();
+		return 0;
+	}
+
+	ret = dcb_buffer_get(dcb, dev, &buffer);
+	if (ret)
+		return ret;
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_buffer_help_set();
+			return 0;
+		} else if (matches(*argv, "prio-buffer") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true,
+					    &dcb_buffer_parse_mapping_prio_buffer, &buffer);
+			if (ret) {
+				fprintf(stderr, "Invalid priority mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "buffer-size") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true,
+					    &dcb_buffer_parse_mapping_buffer_size, &buffer);
+			if (ret) {
+				fprintf(stderr, "Invalid buffer size mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_buffer_help_set();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+	return dcb_buffer_set(dcb, dev, &buffer);
+}
+
+static int dcb_cmd_buffer_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcbnl_buffer buffer;
+	int ret;
+
+	ret = dcb_buffer_get(dcb, dev, &buffer);
+	if (ret)
+		return ret;
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_buffer_print(&buffer);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_buffer_help_show();
+			return 0;
+		} else if (matches(*argv, "prio-buffer") == 0) {
+			dcb_buffer_print_prio_buffer(&buffer);
+			print_nl();
+		} else if (matches(*argv, "buffer-size") == 0) {
+			dcb_buffer_print_buffer_size(&buffer);
+			print_nl();
+		} else if (matches(*argv, "total-size") == 0) {
+			dcb_buffer_print_total_size(&buffer);
+			print_nl();
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_buffer_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_buffer_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_buffer_show, dcb_buffer_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_buffer_set, dcb_buffer_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_buffer_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_dcbx.c
+++ b/dcb/dcb_dcbx.c
@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_dcbx_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb dcbx set dev STRING\n"
+		"           [ host | lld-managed ]\n"
+		"           [ cee | ieee ] [ static ]\n"
+		"\n"
+	);
+}
+
+static void dcb_dcbx_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb dcbx show dev STRING\n"
+		"\n"
+	);
+}
+
+static void dcb_dcbx_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb dcbx help\n"
+		"\n"
+	);
+	dcb_dcbx_help_show();
+	dcb_dcbx_help_set();
+}
+
+struct dcb_dcbx_flag {
+	__u8 value;
+	const char *key_fp;
+	const char *key_json;
+};
+
+static struct dcb_dcbx_flag dcb_dcbx_flags[] = {
+	{DCB_CAP_DCBX_HOST, "host"},
+	{DCB_CAP_DCBX_LLD_MANAGED, "lld-managed", "lld_managed"},
+	{DCB_CAP_DCBX_VER_CEE, "cee"},
+	{DCB_CAP_DCBX_VER_IEEE, "ieee"},
+	{DCB_CAP_DCBX_STATIC, "static"},
+};
+
+static void dcb_dcbx_print(__u8 dcbx)
+{
+	int bit;
+	int i;
+
+	while ((bit = ffs(dcbx))) {
+		bool found = false;
+
+		bit--;
+		for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) {
+			struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i];
+
+			if (flag->value == 1 << bit) {
+				print_bool(PRINT_JSON, flag->key_json ?: flag->key_fp,
+					   NULL, true);
+				print_string(PRINT_FP, NULL, "%s ", flag->key_fp);
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			fprintf(stderr, "Unknown DCBX bit %#x.\n", 1 << bit);
+
+		dcbx &= ~(1 << bit);
+	}
+
+	print_nl();
+}
+
+static int dcb_dcbx_get(struct dcb *dcb, const char *dev, __u8 *dcbx)
+{
+	__u16 payload_len;
+	void *payload;
+	int err;
+
+	err = dcb_get_attribute_bare(dcb, DCB_CMD_IEEE_GET, dev, DCB_ATTR_DCBX,
+				     &payload, &payload_len);
+	if (err != 0)
+		return err;
+
+	if (payload_len != 1) {
+		fprintf(stderr, "DCB_ATTR_DCBX payload has size %d, expected 1.\n",
+			payload_len);
+		return -EINVAL;
+	}
+	*dcbx = *(__u8 *) payload;
+	return 0;
+}
+
+static int dcb_dcbx_set(struct dcb *dcb, const char *dev, __u8 dcbx)
+{
+	return dcb_set_attribute_bare(dcb, DCB_CMD_SDCBX, dev, DCB_ATTR_DCBX,
+				      &dcbx, 1, DCB_ATTR_DCBX);
+}
+
+static int dcb_cmd_dcbx_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	__u8 dcbx = 0;
+	__u8 i;
+
+	if (!argc) {
+		dcb_dcbx_help_set();
+		return 0;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_dcbx_help_set();
+			return 0;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) {
+			struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i];
+
+			if (matches(*argv, flag->key_fp) == 0) {
+				dcbx |= flag->value;
+				NEXT_ARG_FWD();
+				goto next;
+			}
+		}
+
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_dcbx_help_set();
+		return -EINVAL;
+
+next:
+		;
+	} while (argc > 0);
+
+	return dcb_dcbx_set(dcb, dev, dcbx);
+}
+
+static int dcb_cmd_dcbx_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	__u8 dcbx;
+	int ret;
+
+	ret = dcb_dcbx_get(dcb, dev, &dcbx);
+	if (ret != 0)
+		return ret;
+
+	while (argc > 0) {
+		if (matches(*argv, "help") == 0) {
+			dcb_dcbx_help_show();
+			return 0;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_dcbx_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	}
+
+	open_json_object(NULL);
+	dcb_dcbx_print(dcbx);
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_dcbx_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_dcbx_show, dcb_dcbx_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_dcbx_set, dcb_dcbx_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_dcbx_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_ets.c
+++ b/dcb/dcb_ets.c
@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_ets_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb ets set dev STRING\n"
+		"           [ willing { on | off } ]\n"
+		"           [ { tc-tsa | reco-tc-tsa } TSA-MAP ]\n"
+		"           [ { pg-bw | tc-bw | reco-tc-bw } BW-MAP ]\n"
+		"           [ { prio-tc | reco-prio-tc } PRIO-MAP ]\n"
+		"\n"
+		" where TSA-MAP := [ TSA-MAP ] TSA-MAPPING\n"
+		"       TSA-MAPPING := { all | TC }:{ strict | cbs | ets | vendor }\n"
+		"       BW-MAP := [ BW-MAP ] BW-MAPPING\n"
+		"       BW-MAPPING := { all | TC }:INTEGER\n"
+		"       PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n"
+		"       PRIO-MAPPING := { all | PRIO }:TC\n"
+		"       TC := { 0 .. 7 }\n"
+		"       PRIO := { 0 .. 7 }\n"
+		"\n"
+	);
+}
+
+static void dcb_ets_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb ets show dev STRING\n"
+		"           [ willing ] [ ets-cap ] [ cbs ] [ tc-tsa ]\n"
+		"           [ reco-tc-tsa ] [ pg-bw ] [ tc-bw ] [ reco-tc-bw ]\n"
+		"           [ prio-tc ] [ reco-prio-tc ]\n"
+		"\n"
+	);
+}
+
+static void dcb_ets_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb ets help\n"
+		"\n"
+	);
+	dcb_ets_help_show();
+	dcb_ets_help_set();
+}
+
+static const char *const tsa_names[] = {
+	[IEEE_8021QAZ_TSA_STRICT] = "strict",
+	[IEEE_8021QAZ_TSA_CB_SHAPER] = "cbs",
+	[IEEE_8021QAZ_TSA_ETS] = "ets",
+	[IEEE_8021QAZ_TSA_VENDOR] = "vendor",
+};
+
+static int dcb_ets_parse_mapping_tc_tsa(__u32 key, char *value, void *data)
+{
+	__u8 tsa;
+	int ret;
+
+	tsa = parse_one_of("TSA", value, tsa_names, ARRAY_SIZE(tsa_names), &ret);
+	if (ret)
+		return ret;
+
+	return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
+				 "TSA", tsa, -1U,
+				 dcb_set_u8, data);
+}
+
+static int dcb_ets_parse_mapping_tc_bw(__u32 key, char *value, void *data)
+{
+	__u8 bw;
+
+	if (get_u8(&bw, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
+				 "BW", bw, 100,
+				 dcb_set_u8, data);
+}
+
+static int dcb_ets_parse_mapping_prio_tc(unsigned int key, char *value, void *data)
+{
+	__u8 tc;
+
+	if (get_u8(&tc, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1,
+				 "TC", tc, IEEE_8021QAZ_MAX_TCS - 1,
+				 dcb_set_u8, data);
+}
+
+static void dcb_print_array_tsa(const __u8 *array, size_t size)
+{
+	dcb_print_array_kw(array, size, tsa_names, ARRAY_SIZE(tsa_names));
+}
+
+static void dcb_ets_print_willing(const struct ieee_ets *ets)
+{
+	print_on_off(PRINT_ANY, "willing", "willing %s ", ets->willing);
+}
+
+static void dcb_ets_print_ets_cap(const struct ieee_ets *ets)
+{
+	print_uint(PRINT_ANY, "ets_cap", "ets-cap %d ", ets->ets_cap);
+}
+
+static void dcb_ets_print_cbs(const struct ieee_ets *ets)
+{
+	print_on_off(PRINT_ANY, "cbs", "cbs %s ", ets->cbs);
+}
+
+static void dcb_ets_print_tc_bw(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("tc_bw", "tc-bw",
+			      ets->tc_tx_bw, ARRAY_SIZE(ets->tc_tx_bw),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print_pg_bw(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("pg_bw", "pg-bw",
+			      ets->tc_rx_bw, ARRAY_SIZE(ets->tc_rx_bw),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print_tc_tsa(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("tc_tsa", "tc-tsa",
+			      ets->tc_tsa, ARRAY_SIZE(ets->tc_tsa),
+			      dcb_print_array_tsa);
+}
+
+static void dcb_ets_print_prio_tc(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("prio_tc", "prio-tc",
+			      ets->prio_tc, ARRAY_SIZE(ets->prio_tc),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print_reco_tc_bw(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("reco_tc_bw", "reco-tc-bw",
+			      ets->tc_reco_bw, ARRAY_SIZE(ets->tc_reco_bw),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print_reco_tc_tsa(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("reco_tc_tsa", "reco-tc-tsa",
+			      ets->tc_reco_tsa, ARRAY_SIZE(ets->tc_reco_tsa),
+			      dcb_print_array_tsa);
+}
+
+static void dcb_ets_print_reco_prio_tc(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("reco_prio_tc", "reco-prio-tc",
+			      ets->reco_prio_tc, ARRAY_SIZE(ets->reco_prio_tc),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print(const struct ieee_ets *ets)
+{
+	dcb_ets_print_willing(ets);
+	dcb_ets_print_ets_cap(ets);
+	dcb_ets_print_cbs(ets);
+	print_nl();
+
+	dcb_ets_print_tc_bw(ets);
+	print_nl();
+
+	dcb_ets_print_pg_bw(ets);
+	print_nl();
+
+	dcb_ets_print_tc_tsa(ets);
+	print_nl();
+
+	dcb_ets_print_prio_tc(ets);
+	print_nl();
+
+	dcb_ets_print_reco_tc_bw(ets);
+	print_nl();
+
+	dcb_ets_print_reco_tc_tsa(ets);
+	print_nl();
+
+	dcb_ets_print_reco_prio_tc(ets);
+	print_nl();
+}
+
+static int dcb_ets_get(struct dcb *dcb, const char *dev, struct ieee_ets *ets)
+{
+	return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets));
+}
+
+static int dcb_ets_validate_bw(const __u8 bw[], const __u8 tsa[], const char *what)
+{
+	bool has_ets = false;
+	unsigned int total = 0;
+	unsigned int tc;
+
+	for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) {
+		if (tsa[tc] == IEEE_8021QAZ_TSA_ETS) {
+			has_ets = true;
+			break;
+		}
+	}
+
+	/* TC bandwidth is only intended for ETS, but 802.1Q-2018 only requires
+	 * that the sum be 100, and individual entries 0..100. It explicitly
+	 * notes that non-ETS TCs can have non-0 TC bandwidth during
+	 * reconfiguration.
+	 */
+	for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) {
+		if (bw[tc] > 100) {
+			fprintf(stderr, "%d%% for TC %d of %s is not a valid bandwidth percentage, expected 0..100%%\n",
+				bw[tc], tc, what);
+			return -EINVAL;
+		}
+		total += bw[tc];
+	}
+
+	/* This is what 802.1Q-2018 requires. */
+	if (total == 100)
+		return 0;
+
+	/* But this requirement does not make sense for all-strict
+	 * configurations. Anything else than 0 does not make sense: either BW
+	 * has not been reconfigured for the all-strict allocation yet, at which
+	 * point we expect sum of 100. Or it has already been reconfigured, at
+	 * which point accept 0.
+	 */
+	if (!has_ets && total == 0)
+		return 0;
+
+	fprintf(stderr, "Bandwidth percentages in %s sum to %d%%, expected %d%%\n",
+		what, total, has_ets ? 100 : 0);
+	return -EINVAL;
+}
+
+static int dcb_ets_set(struct dcb *dcb, const char *dev, const struct ieee_ets *ets)
+{
+	/* Do not validate pg-bw, which is not standard and has unclear
+	 * meaning.
+	 */
+	if (dcb_ets_validate_bw(ets->tc_tx_bw, ets->tc_tsa, "tc-bw") ||
+	    dcb_ets_validate_bw(ets->tc_reco_bw, ets->tc_reco_tsa, "reco-tc-bw"))
+		return -EINVAL;
+
+	return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets));
+}
+
+static int dcb_cmd_ets_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_ets ets;
+	int ret;
+
+	if (!argc) {
+		dcb_ets_help_set();
+		return 1;
+	}
+
+	ret = dcb_ets_get(dcb, dev, &ets);
+	if (ret)
+		return ret;
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_ets_help_set();
+			return 0;
+		} else if (matches(*argv, "willing") == 0) {
+			NEXT_ARG();
+			ets.willing = parse_on_off("willing", *argv, &ret);
+			if (ret)
+				return ret;
+		} else if (matches(*argv, "tc-tsa") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa,
+					    ets.tc_tsa);
+			if (ret) {
+				fprintf(stderr, "Invalid tc-tsa mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "reco-tc-tsa") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa,
+					    ets.tc_reco_tsa);
+			if (ret) {
+				fprintf(stderr, "Invalid reco-tc-tsa mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "tc-bw") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
+					    ets.tc_tx_bw);
+			if (ret) {
+				fprintf(stderr, "Invalid tc-bw mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "pg-bw") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
+					    ets.tc_rx_bw);
+			if (ret) {
+				fprintf(stderr, "Invalid pg-bw mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "reco-tc-bw") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
+					    ets.tc_reco_bw);
+			if (ret) {
+				fprintf(stderr, "Invalid reco-tc-bw mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "prio-tc") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc,
+					    ets.prio_tc);
+			if (ret) {
+				fprintf(stderr, "Invalid prio-tc mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "reco-prio-tc") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc,
+					    ets.reco_prio_tc);
+			if (ret) {
+				fprintf(stderr, "Invalid reco-prio-tc mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_ets_help_set();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+	return dcb_ets_set(dcb, dev, &ets);
+}
+
+static int dcb_cmd_ets_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_ets ets;
+	int ret;
+
+	ret = dcb_ets_get(dcb, dev, &ets);
+	if (ret)
+		return ret;
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_ets_print(&ets);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_ets_help_show();
+			return 0;
+		} else if (matches(*argv, "willing") == 0) {
+			dcb_ets_print_willing(&ets);
+			print_nl();
+		} else if (matches(*argv, "ets-cap") == 0) {
+			dcb_ets_print_ets_cap(&ets);
+			print_nl();
+		} else if (matches(*argv, "cbs") == 0) {
+			dcb_ets_print_cbs(&ets);
+			print_nl();
+		} else if (matches(*argv, "tc-tsa") == 0) {
+			dcb_ets_print_tc_tsa(&ets);
+			print_nl();
+		} else if (matches(*argv, "reco-tc-tsa") == 0) {
+			dcb_ets_print_reco_tc_tsa(&ets);
+			print_nl();
+		} else if (matches(*argv, "tc-bw") == 0) {
+			dcb_ets_print_tc_bw(&ets);
+			print_nl();
+		} else if (matches(*argv, "pg-bw") == 0) {
+			dcb_ets_print_pg_bw(&ets);
+			print_nl();
+		} else if (matches(*argv, "reco-tc-bw") == 0) {
+			dcb_ets_print_reco_tc_bw(&ets);
+			print_nl();
+		} else if (matches(*argv, "prio-tc") == 0) {
+			dcb_ets_print_prio_tc(&ets);
+			print_nl();
+		} else if (matches(*argv, "reco-prio-tc") == 0) {
+			dcb_ets_print_reco_prio_tc(&ets);
+			print_nl();
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_ets_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_ets_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_show, dcb_ets_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_set, dcb_ets_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_ets_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_maxrate.c
+++ b/dcb/dcb_maxrate.c
@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_maxrate_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb maxrate set dev STRING\n"
+		"           [ tc-maxrate RATE-MAP ]\n"
+		"\n"
+		" where RATE-MAP := [ RATE-MAP ] RATE-MAPPING\n"
+		"       RATE-MAPPING := { all | TC }:RATE\n"
+		"       TC := { 0 .. 7 }\n"
+		"\n"
+	);
+}
+
+static void dcb_maxrate_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb [ -i ] maxrate show dev STRING\n"
+		"           [ tc-maxrate ]\n"
+		"\n"
+	);
+}
+
+static void dcb_maxrate_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb maxrate help\n"
+		"\n"
+	);
+	dcb_maxrate_help_show();
+	dcb_maxrate_help_set();
+}
+
+static int dcb_maxrate_parse_mapping_tc_maxrate(__u32 key, char *value, void *data)
+{
+	__u64 rate;
+
+	if (get_rate64(&rate, value))
+		return -EINVAL;
+
+	return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
+				 "RATE", rate, -1,
+				 dcb_set_u64, data);
+}
+
+static void dcb_maxrate_print_tc_maxrate(struct dcb *dcb, const struct ieee_maxrate *maxrate)
+{
+	size_t size = ARRAY_SIZE(maxrate->tc_maxrate);
+	SPRINT_BUF(b);
+	size_t i;
+
+	open_json_array(PRINT_JSON, "tc_maxrate");
+	print_string(PRINT_FP, NULL, "tc-maxrate ", NULL);
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%s ", i);
+		print_rate(dcb->use_iec, PRINT_ANY, NULL, b, maxrate->tc_maxrate[i]);
+	}
+
+	close_json_array(PRINT_JSON, "tc_maxrate");
+}
+
+static void dcb_maxrate_print(struct dcb *dcb, const struct ieee_maxrate *maxrate)
+{
+	dcb_maxrate_print_tc_maxrate(dcb, maxrate);
+	print_nl();
+}
+
+static int dcb_maxrate_get(struct dcb *dcb, const char *dev, struct ieee_maxrate *maxrate)
+{
+	return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate));
+}
+
+static int dcb_maxrate_set(struct dcb *dcb, const char *dev, const struct ieee_maxrate *maxrate)
+{
+	return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate));
+}
+
+static int dcb_cmd_maxrate_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_maxrate maxrate;
+	int ret;
+
+	if (!argc) {
+		dcb_maxrate_help_set();
+		return 0;
+	}
+
+	ret = dcb_maxrate_get(dcb, dev, &maxrate);
+	if (ret)
+		return ret;
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_maxrate_help_set();
+			return 0;
+		} else if (matches(*argv, "tc-maxrate") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true,
+					    &dcb_maxrate_parse_mapping_tc_maxrate, &maxrate);
+			if (ret) {
+				fprintf(stderr, "Invalid mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_maxrate_help_set();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+	return dcb_maxrate_set(dcb, dev, &maxrate);
+}
+
+static int dcb_cmd_maxrate_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_maxrate maxrate;
+	int ret;
+
+	ret = dcb_maxrate_get(dcb, dev, &maxrate);
+	if (ret)
+		return ret;
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_maxrate_print(dcb, &maxrate);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_maxrate_help_show();
+			return 0;
+		} else if (matches(*argv, "tc-maxrate") == 0) {
+			dcb_maxrate_print_tc_maxrate(dcb, &maxrate);
+			print_nl();
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_maxrate_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_maxrate_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_maxrate_show, dcb_maxrate_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_maxrate_set, dcb_maxrate_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_maxrate_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_pfc.c
+++ b/dcb/dcb_pfc.c
@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_pfc_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb pfc set dev STRING\n"
+		"           [ prio-pfc PFC-MAP ]\n"
+		"           [ macsec-bypass { on | off } ]\n"
+		"           [ delay INTEGER ]\n"
+		"\n"
+		" where PFC-MAP := [ PFC-MAP ] PFC-MAPPING\n"
+		"       PFC-MAPPING := { all | TC }:PFC\n"
+		"       TC := { 0 .. 7 }\n"
+		"       PFC := { on | off }\n"
+		"\n"
+	);
+}
+
+static void dcb_pfc_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb [ -s ] pfc show dev STRING\n"
+		"           [ pfc-cap ] [ prio-pfc ] [ macsec-bypass ]\n"
+		"           [ delay ] [ requests ] [ indications ]\n"
+		"\n"
+	);
+}
+
+static void dcb_pfc_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb pfc help\n"
+		"\n"
+	);
+	dcb_pfc_help_show();
+	dcb_pfc_help_set();
+}
+
+static void dcb_pfc_to_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 pfc_en)
+{
+	int i;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		array[i] = !!(pfc_en & (1 << i));
+}
+
+static void dcb_pfc_from_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 *pfc_en_p)
+{
+	__u8 pfc_en = 0;
+	int i;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (array[i])
+			pfc_en |= 1 << i;
+	}
+
+	*pfc_en_p = pfc_en;
+}
+
+static int dcb_pfc_parse_mapping_prio_pfc(__u32 key, char *value, void *data)
+{
+	struct ieee_pfc *pfc = data;
+	__u8 pfc_en[IEEE_8021QAZ_MAX_TCS];
+	bool enabled;
+	int ret;
+
+	dcb_pfc_to_array(pfc_en, pfc->pfc_en);
+
+	enabled = parse_on_off("PFC", value, &ret);
+	if (ret)
+		return ret;
+
+	ret = dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1,
+				"PFC", enabled, -1,
+				dcb_set_u8, pfc_en);
+	if (ret)
+		return ret;
+
+	dcb_pfc_from_array(pfc_en, &pfc->pfc_en);
+	return 0;
+}
+
+static void dcb_pfc_print_pfc_cap(const struct ieee_pfc *pfc)
+{
+	print_uint(PRINT_ANY, "pfc_cap", "pfc-cap %d ", pfc->pfc_cap);
+}
+
+static void dcb_pfc_print_macsec_bypass(const struct ieee_pfc *pfc)
+{
+	print_on_off(PRINT_ANY, "macsec_bypass", "macsec-bypass %s ", pfc->mbc);
+}
+
+static void dcb_pfc_print_delay(const struct ieee_pfc *pfc)
+{
+	print_uint(PRINT_ANY, "delay", "delay %d ", pfc->delay);
+}
+
+static void dcb_pfc_print_prio_pfc(const struct ieee_pfc *pfc)
+{
+	__u8 pfc_en[IEEE_8021QAZ_MAX_TCS];
+
+	dcb_pfc_to_array(pfc_en, pfc->pfc_en);
+	dcb_print_named_array("prio_pfc", "prio-pfc",
+			      pfc_en, ARRAY_SIZE(pfc_en), &dcb_print_array_on_off);
+}
+
+static void dcb_pfc_print_requests(const struct ieee_pfc *pfc)
+{
+	open_json_array(PRINT_JSON, "requests");
+	print_string(PRINT_FP, NULL, "requests ", NULL);
+	dcb_print_array_u64(pfc->requests, ARRAY_SIZE(pfc->requests));
+	close_json_array(PRINT_JSON, "requests");
+}
+
+static void dcb_pfc_print_indications(const struct ieee_pfc *pfc)
+{
+	open_json_array(PRINT_JSON, "indications");
+	print_string(PRINT_FP, NULL, "indications ", NULL);
+	dcb_print_array_u64(pfc->indications, ARRAY_SIZE(pfc->indications));
+	close_json_array(PRINT_JSON, "indications");
+}
+
+static void dcb_pfc_print(const struct dcb *dcb, const struct ieee_pfc *pfc)
+{
+	dcb_pfc_print_pfc_cap(pfc);
+	dcb_pfc_print_macsec_bypass(pfc);
+	dcb_pfc_print_delay(pfc);
+	print_nl();
+
+	dcb_pfc_print_prio_pfc(pfc);
+	print_nl();
+
+	if (dcb->stats) {
+		dcb_pfc_print_requests(pfc);
+		print_nl();
+
+		dcb_pfc_print_indications(pfc);
+		print_nl();
+	}
+}
+
+static int dcb_pfc_get(struct dcb *dcb, const char *dev, struct ieee_pfc *pfc)
+{
+	return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc));
+}
+
+static int dcb_pfc_set(struct dcb *dcb, const char *dev, const struct ieee_pfc *pfc)
+{
+	return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc));
+}
+
+static int dcb_cmd_pfc_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_pfc pfc;
+	int ret;
+
+	if (!argc) {
+		dcb_pfc_help_set();
+		return 0;
+	}
+
+	ret = dcb_pfc_get(dcb, dev, &pfc);
+	if (ret)
+		return ret;
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_pfc_help_set();
+			return 0;
+		} else if (matches(*argv, "prio-pfc") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true,
+					    &dcb_pfc_parse_mapping_prio_pfc, &pfc);
+			if (ret) {
+				fprintf(stderr, "Invalid pfc mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "macsec-bypass") == 0) {
+			NEXT_ARG();
+			pfc.mbc = parse_on_off("macsec-bypass", *argv, &ret);
+			if (ret)
+				return ret;
+		} else if (matches(*argv, "delay") == 0) {
+			NEXT_ARG();
+			/* Do not support the size notations for delay.
+			 * Delay is specified in "bit times", not bits, so
+			 * it is not applicable. At the same time it would
+			 * be confusing that 10Kbit does not mean 10240,
+			 * but 1280.
+			 */
+			if (get_u16(&pfc.delay, *argv, 0)) {
+				fprintf(stderr, "Invalid delay `%s', expected an integer 0..65535\n",
+					*argv);
+				return -EINVAL;
+			}
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_pfc_help_set();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+	return dcb_pfc_set(dcb, dev, &pfc);
+}
+
+static int dcb_cmd_pfc_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_pfc pfc;
+	int ret;
+
+	ret = dcb_pfc_get(dcb, dev, &pfc);
+	if (ret)
+		return ret;
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_pfc_print(dcb, &pfc);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_pfc_help_show();
+			return 0;
+		} else if (matches(*argv, "prio-pfc") == 0) {
+			dcb_pfc_print_prio_pfc(&pfc);
+			print_nl();
+		} else if (matches(*argv, "pfc-cap") == 0) {
+			dcb_pfc_print_pfc_cap(&pfc);
+			print_nl();
+		} else if (matches(*argv, "macsec-bypass") == 0) {
+			dcb_pfc_print_macsec_bypass(&pfc);
+			print_nl();
+		} else if (matches(*argv, "delay") == 0) {
+			dcb_pfc_print_delay(&pfc);
+			print_nl();
+		} else if (matches(*argv, "requests") == 0) {
+			dcb_pfc_print_requests(&pfc);
+			print_nl();
+		} else if (matches(*argv, "indications") == 0) {
+			dcb_pfc_print_indications(&pfc);
+			print_nl();
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_pfc_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_pfc_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_pfc_show, dcb_pfc_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_pfc_set, dcb_pfc_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_pfc_help();
+		return -EINVAL;
+	}
+}
--- a/devlink/Makefile
+++ b/devlink/Makefile
@ -1,21 +1,25 @@
-include ../Config
+# SPDX-License-Identifier: GPL-2.0
+include ../config.mk
+
+TARGETS :=
+
 ifeq ($(HAVE_MNL),y)

 DEVLINKOBJ = devlink.o mnlg.o
-TARGETS=devlink
-
-CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags)
-LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs)
+TARGETS += devlink
+LDLIBS += -lm

 endif

 all: $(TARGETS) $(LIBS)

-devlink: $(DEVLINKOBJ)
+devlink: $(DEVLINKOBJ) $(LIBNETLINK)
 	$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@

 install: all
-	install -m 0755 $(TARGETS) $(DESTDIR)$(SBINDIR)
+	for i in $(TARGETS); \
+	do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \
+	done

 clean:
 	rm -f $(DEVLINKOBJ) $(TARGETS)
--- a/devlink/devlink.c
+++ b/devlink/devlink.c
--- a/devlink/mnlg.c
+++ b/devlink/mnlg.c
@ -14,10 +14,12 @@
 #include <string.h>
 #include <errno.h>
 #include <unistd.h>
-#include <time.h>
 #include <libmnl/libmnl.h>
 #include <linux/genetlink.h>

+#include "libnetlink.h"
+#include "mnl_utils.h"
+#include "utils.h"
 #include "mnlg.h"

 struct mnlg_socket {
@ -26,56 +28,13 @@ struct mnlg_socket {
 	uint32_t id;
 	uint8_t version;
 	unsigned int seq;
-	unsigned int portid;
 };

-static struct nlmsghdr *__mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd,
-					   uint16_t flags, uint32_t id,
-					   uint8_t version)
-{
-	struct nlmsghdr *nlh;
-	struct genlmsghdr *genl;
-
-	nlh = mnl_nlmsg_put_header(nlg->buf);
-	nlh->nlmsg_type	= id;
-	nlh->nlmsg_flags = flags;
-	nlg->seq = time(NULL);
-	nlh->nlmsg_seq = nlg->seq;
-
-	genl = mnl_nlmsg_put_extra_header(nlh, sizeof(struct genlmsghdr));
-	genl->cmd = cmd;
-	genl->version = version;
-
-	return nlh;
-}
-
-struct nlmsghdr *mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd,
-				  uint16_t flags)
-{
-	return __mnlg_msg_prepare(nlg, cmd, flags, nlg->id, nlg->version);
-}
-
-int mnlg_socket_send(struct mnlg_socket *nlg, const struct nlmsghdr *nlh)
+int mnlg_socket_send(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh)
 {
 	return mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len);
 }

-int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data)
-{
-	int err;
-
-	do {
-		err = mnl_socket_recvfrom(nlg->nl, nlg->buf,
-					  MNL_SOCKET_BUFFER_SIZE);
-		if (err <= 0)
-			break;
-		err = mnl_cb_run(nlg->buf, err, nlg->seq, nlg->portid,
-				 data_cb, data);
-	} while (err > 0);
-
-	return err;
-}
-
 struct group_info {
 	bool found;
 	uint32_t id;
@ -155,15 +114,17 @@ static int get_group_id_cb(const struct nlmsghdr *nlh, void *data)
 	return MNL_CB_OK;
 }

-int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name)
+int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name)
 {
 	struct nlmsghdr *nlh;
 	struct group_info group_info;
 	int err;

-	nlh = __mnlg_msg_prepare(nlg, CTRL_CMD_GETFAMILY,
-				 NLM_F_REQUEST | NLM_F_ACK, GENL_ID_CTRL, 1);
-	mnl_attr_put_u32(nlh, CTRL_ATTR_FAMILY_ID, nlg->id);
+	nlh = _mnlu_gen_socket_cmd_prepare(nlg, CTRL_CMD_GETFAMILY,
+					   NLM_F_REQUEST | NLM_F_ACK,
+					   GENL_ID_CTRL, 1);
+
+	mnl_attr_put_u16(nlh, CTRL_ATTR_FAMILY_ID, nlg->family);

 	err = mnlg_socket_send(nlg, nlh);
 	if (err < 0)
@ -171,7 +132,7 @@ int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name)

 	group_info.found = false;
 	group_info.name = group_name;
-	err = mnlg_socket_recv_run(nlg, get_group_id_cb, &group_info);
+	err = mnlu_gen_socket_recv_run(nlg, get_group_id_cb, &group_info);
 	if (err < 0)
 		return err;

@ -188,87 +149,7 @@ int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name)
 	return 0;
 }

-static int get_family_id_attr_cb(const struct nlattr *attr, void *data)
+int mnlg_socket_get_fd(struct mnlu_gen_socket *nlg)
 {
-	const struct nlattr **tb = data;
-	int type = mnl_attr_get_type(attr);
-
-	if (mnl_attr_type_valid(attr, CTRL_ATTR_MAX) < 0)
-		return MNL_CB_ERROR;
-
-	if (type == CTRL_ATTR_FAMILY_ID &&
-	    mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
-		return MNL_CB_ERROR;
-	tb[type] = attr;
-	return MNL_CB_OK;
-}
-
-static int get_family_id_cb(const struct nlmsghdr *nlh, void *data)
-{
-	uint32_t *p_id = data;
-	struct nlattr *tb[CTRL_ATTR_MAX + 1] = {};
-	struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
-
-	mnl_attr_parse(nlh, sizeof(*genl), get_family_id_attr_cb, tb);
-	if (!tb[CTRL_ATTR_FAMILY_ID])
-		return MNL_CB_ERROR;
-	*p_id = mnl_attr_get_u16(tb[CTRL_ATTR_FAMILY_ID]);
-	return MNL_CB_OK;
-}
-
-struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version)
-{
-	struct mnlg_socket *nlg;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlg = malloc(sizeof(*nlg));
-	if (!nlg)
-		return NULL;
-
-	nlg->buf = malloc(MNL_SOCKET_BUFFER_SIZE);
-	if (!nlg->buf)
-		goto err_buf_alloc;
-
-	nlg->nl = mnl_socket_open(NETLINK_GENERIC);
-	if (!nlg->nl)
-		goto err_mnl_socket_open;
-
-	err = mnl_socket_bind(nlg->nl, 0, MNL_SOCKET_AUTOPID);
-	if (err < 0)
-		goto err_mnl_socket_bind;
-
-	nlg->portid = mnl_socket_get_portid(nlg->nl);
-
-	nlh = __mnlg_msg_prepare(nlg, CTRL_CMD_GETFAMILY,
-				 NLM_F_REQUEST | NLM_F_ACK, GENL_ID_CTRL, 1);
-	mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name);
-
-	err = mnlg_socket_send(nlg, nlh);
-	if (err < 0)
-		goto err_mnlg_socket_send;
-
-	err = mnlg_socket_recv_run(nlg, get_family_id_cb, &nlg->id);
-	if (err < 0)
-		goto err_mnlg_socket_recv_run;
-
-	nlg->version = version;
-	return nlg;
-
-err_mnlg_socket_recv_run:
-err_mnlg_socket_send:
-err_mnl_socket_bind:
-	mnl_socket_close(nlg->nl);
-err_mnl_socket_open:
-	free(nlg->buf);
-err_buf_alloc:
-	free(nlg);
-	return NULL;
-}
-
-void mnlg_socket_close(struct mnlg_socket *nlg)
-{
-	mnl_socket_close(nlg->nl);
-	free(nlg->buf);
-	free(nlg);
+	return mnl_socket_get_fd(nlg->nl);
 }
--- a/devlink/mnlg.h
+++ b/devlink/mnlg.h
@ -14,14 +14,10 @@

 #include <libmnl/libmnl.h>

-struct mnlg_socket;
+struct mnlu_gen_socket;

-struct nlmsghdr *mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd,
-				  uint16_t flags);
-int mnlg_socket_send(struct mnlg_socket *nlg, const struct nlmsghdr *nlh);
-int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data);
-int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name);
-struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version);
-void mnlg_socket_close(struct mnlg_socket *nlg);
+int mnlg_socket_send(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh);
+int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name);
+int mnlg_socket_get_fd(struct mnlu_gen_socket *nlg);

 #endif /* _MNLG_H_ */
--- a/doc/Makefile
+++ b/doc/Makefile
@ -1,73 +0,0 @@
-PSFILES=ip-cref.ps ip-tunnels.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps tc-filters.ps
-# tc-cref.ps
-# api-rtnl.tex api-pmtudisc.tex api-news.tex
-# iki-netdev.ps iki-neighdst.ps
-
-
-LATEX=latex
-DVIPS=dvips
-SGML2DVI=sgml2latex
-SGML2HTML=sgml2html -s 0
-LPR=lpr -Zsduplex
-SHELL=bash
-PAGESIZE=a4
-PAGESPERPAGE=2
-
-HTMLFILES=$(subst .sgml,.html,$(shell echo *.sgml))
-DVIFILES=$(subst .ps,.dvi,$(PSFILES))
-PDFFILES=$(subst .ps,.pdf,$(PSFILES))
-
-
-all: pstwocol
-
-pstwocol: $(PSFILES)
-
-html: $(HTMLFILES)
-
-dvi: $(DVIFILES)
-
-pdf: $(PDFFILES)
-
-print: $(PSFILES)
-	$(LPR) $(PSFILES)
-
-%.tex: %.sgml
-	$(SGML2DVI) --output=tex $<
-
-%.dvi: %.sgml
-	$(SGML2DVI) --output=dvi $<
-
-%.dvi: %.tex
-	@set -e; pass=2; echo "Running LaTeX $<"; \
-	while [ `$(LATEX) $< </dev/null 2>&1 | \
-		 grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
-		if [ $$pass -gt 3 ]; then \
-			echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
-		fi; \
-		echo "Re-running LaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
-	done
-
-%.pdf: %.tex
-	@set -e; pass=2; echo "Running pdfLaTeX $<"; \
-	while [ `pdflatex $< </dev/null 2>&1 | \
-		 grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
-		if [ $$pass -gt 3 ]; then \
-			echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
-		fi; \
-		echo "Re-running pdfLaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
-	done
-#%.pdf: %.ps
-#	ps2pdf $<
-
-%.ps: %.dvi
-	$(DVIPS) $< -o $@
-
-%.html: %.sgml
-	$(SGML2HTML) $<
-
-install:
-	install -m 0644 $(shell echo *.tex) $(DESTDIR)$(DOCDIR)
-	install -m 0644 $(shell echo *.sgml) $(DESTDIR)$(DOCDIR)
-
-clean:
-	rm -f *.aux *.log *.toc $(PSFILES) $(DVIFILES) *.html *.pdf
--- a/doc/Plan
+++ b/doc/Plan
@ -1,16 +0,0 @@
-Partially finished work.
-
-1.  User Reference manuals.
-1.1 IP Command reference (ip-cref.tex, published)
-1.2 TC Command reference (tc-cref.tex)
-1.3 IP tunnels (ip-tunnels.tex, published)
-
-2.  Linux-2.2 Networking API
-2.1 RTNETLINK (api-rtnl.tex)
-2.2 Path MTU Discovery (api-pmtudisc.tex)
-2.3 IPv6 Flow Labels (api-ip6-flowlabels.tex, published)
-2.4 Miscellaneous extensions (api-misc.tex)
-
-3.  Linux-2.2 Networking Intra-Kernel Interfaces
-3.1 NetDev --- Networking Devices and netdev... (iki-netdev.tex)
-3.2 Neighbour cache and destination cache. (iki-neighdst.tex)
--- a/doc/SNAPSHOT.tex
+++ b/doc/SNAPSHOT.tex
@ -1 +0,0 @@
-\def\Draft{020116}
--- a/doc/actions/actions-general
+++ b/doc/actions/actions-general
@ -6,8 +6,8 @@ What is it?
 -----------

 An extension to the filtering/classification architecture of Linux Traffic
-Control. 
-Up to 2.6.8 the only action that could be "attached" to a filter was policing. 
+Control.
+Up to 2.6.8 the only action that could be "attached" to a filter was policing.
 i.e you could say something like:

 -----
@ -17,11 +17,11 @@ tc filter add dev lo parent ffff: protocol ip prio 10 u32 match ip src \

 which implies "if a packet is seen on the ingress of the lo device with
 a source IP address of 127.0.0.1/32 we give it a classification id  of 1:1 and
-we execute a policing action which rate limits its bandwidth utilization 
+we execute a policing action which rate limits its bandwidth utilization
 to 1.5Mbps".

 The new extensions allow for more than just policing actions to be added.
-They are also fully backward compatible. If you have a kernel that doesnt
+They are also fully backward compatible. If you have a kernel that doesn't
 understand them, then the effect is null i.e if you have a newer tc
 but older kernel, the actions are not installed. Likewise if you
 have a newer kernel but older tc, obviously the tc will use current
@ -29,9 +29,9 @@ syntax which will work fine. Of course to get the required effect you need
 both newer tc and kernel. If you are reading this you have the
 right tc ;->

-A side effect is that we can now get stateless firewalling to work with tc. 
+A side effect is that we can now get stateless firewalling to work with tc.
 Essentially this is now an alternative to iptables.
-I wont go into details of my dislike for iptables at times, but 
+I won't go into details of my dislike for iptables at times, but
 scalability is one of the main issues; however, if you need stateful
 classification - use netfilter (for now).

@ -61,7 +61,7 @@ tc filter add dev lo parent 1:0 protocol ip prio 10 u32 \
 match ip src 127.0.0.1/32 flowid 1:1 \
 action police mtu 4000 rate 1500kbit burst 90k

-" generic Actions" (gact) at the moment are: 
+" generic Actions" (gact) at the moment are:
 { drop, pass, reclassify, continue}
 (If you have others, no listed here give me a reason and we will add them)
 +drop says to drop the packet
@ -77,7 +77,7 @@ iptable target. I have only tested with mangler targets up to now.
 In terms of hooks:
 *ingress is mapped to pre-routing hook
 *egress is mapped to post-routing hook
-I dont see much value in the other hooks, if you see it and email me good
+I don't see much value in the other hooks, if you see it and email me good
 reasons, the addition is trivial.

 Example syntax for iptables targets usage becomes:
@ -93,43 +93,43 @@ decimal 12, then use flowid 1:c.

 3) A feature i call pipe
 The motivation is derived from Unix pipe mechanism but applied to packets.
-Essentially take a matching packet and pass it through 
+Essentially take a matching packet and pass it through
 action1 | action2 | action3 etc.
 You could do something similar to this with the tc policer and the "continue"
-operator but this rather restricts it to just the policer and requires 
-multiple rules (and lookups, hence quiet inefficient); 
+operator but this rather restricts it to just the policer and requires
+multiple rules (and lookups, hence quiet inefficient);

-as an example -- and please note that this is just an example _not_ The 
+as an example -- and please note that this is just an example _not_ The
 Word Youve Been Waiting For (yes i have had problems giving examples
 which ended becoming dogma in documents and people modifying them a little
-to look clever); 
+to look clever);

-i selected the metering rates to be small so that i can show better how 
+i selected the metering rates to be small so that i can show better how
 things work.
- 
-The script below does the following: 
- an incoming packet from 10.0.0.21 is first given a firewall mark of 1. 

- It is then metered to make sure it does not exceed its allocated rate of 
-1Kbps. If it doesnt exceed rate, this is where we terminate action execution.
+The script below does the following:
+- an incoming packet from 10.0.0.21 is first given a firewall mark of 1.

- If it does exceed its rate, its "color" changes to a mark of 2 and it is 
+- It is then metered to make sure it does not exceed its allocated rate of
+1Kbps. If it doesn't exceed rate, this is where we terminate action execution.
+
+- If it does exceed its rate, its "color" changes to a mark of 2 and it is
 then passed through a second meter.

-The second meter is shared across all flows on that device [i am suprised 
-that this seems to be not a well know feature of the policer; Bert was telling 
+-The second meter is shared across all flows on that device [i am surpised
+that this seems to be not a well know feature of the policer; Bert was telling
 me that someone was writing a qdisc just to do sharing across multiple devices;
 it must be the summer heat again; weve had someone doing that every year around
-summer  -- the key to sharing is to use a operator "index" in your policer 
-rules (example "index 20"). All your rules have to use the same index to 
+summer  -- the key to sharing is to use a operator "index" in your policer
+rules (example "index 20"). All your rules have to use the same index to
 share.]
- 
+
 -If the second meter is exceeded the color of the flow changes further to 3.

 -We then pass the packet to another meter which is shared across all devices
 in the system. If this meter is exceeded we drop the packet.

-Note the mark can be used further up the system to do things like policy 
+Note the mark can be used further up the system to do things like policy
 or more interesting things on the egress.

 ------------------ cut here -------------------------------
@ -145,7 +145,7 @@ u32 match ip src 10.0.0.21/32 flowid 1:15 \
 action ipt -j mark --set-mark 1 index 2 \
 #
 # then pass it through a policer which allows 1kbps; if the flow
-# doesnt exceed that rate, this is where we stop, if it exceeds we
+# doesn't exceed that rate, this is where we stop, if it exceeds we
 # pipe the packet to the next action
 action police rate 1kbit burst 9k pipe \
 #
@ -161,31 +161,31 @@ action ipt -j mark --set-mark 3 \
 # and then attempt to borrow from a meter used by all devices in the
 # system. Should this be exceeded, drop the packet on the floor.
 action police index 20 mtu 5000 rate 1kbit burst 90k drop
--------------------------------- 
+---------------------------------

-Now lets see the actions installed with 
+Now lets see the actions installed with
 "tc filter show parent ffff: dev eth0"

 -------- output -----------
 jroot# tc filter show parent ffff: dev eth0
-filter protocol ip pref 1 u32 
-filter protocol ip pref 1 u32 fh 800: ht divisor 1 
-filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15 
+filter protocol ip pref 1 u32
+filter protocol ip pref 1 u32 fh 800: ht divisor 1
+filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15

-   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x1  index 2

-   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb 
+   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb

-   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x2  index 1

-   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b 
+   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b

-   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x3  index 3

-   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b 
+   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b

  match 0a000015/ffffffff at 12
 -------------------------------
@ -209,31 +209,31 @@ Now lets take a look at the stats with "tc -s filter show parent ffff: dev eth0"

 --------------
 jroot# tc -s filter show parent ffff: dev eth0
-filter protocol ip pref 1 u32 
-filter protocol ip pref 1 u32 fh 800: ht divisor 1 
+filter protocol ip pref 1 u32
+filter protocol ip pref 1 u32 fh 800: ht divisor 1
 filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
-5 
+5

-   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x1  index 2
-         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0) 
+         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0)

-   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb 
-         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122) 
+   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
+         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122)

-   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x2  index 1
-         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0) 
+         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0)

-   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b 
-         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945) 
+   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
+         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945)

-   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x3  index 3
-         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0) 
+         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0)

-   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b 
-         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437) 
+   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
+         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437)

  match 0a000015/ffffffff at 12
 -------------------------------
@ -241,7 +241,7 @@ filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
 Neat, eh?


-Wanna write an action module?
+Want to  write an action module?
 ------------------------------
 Its easy. Either look at the code or send me email. I will document at
 some point; will also accept documentation.
@ -254,4 +254,3 @@ At the moment the focus has been on getting the architecture in place.
 Expect new things in the spurious time i have to work on this
 (particularly around end of year when i have typically get time off
 from work).
-
--- a/doc/actions/gact-usage
+++ b/doc/actions/gact-usage
@ -1,16 +1,16 @@

 gact <ACTION> [RAND] [INDEX]

-Where: 
-	ACTION := reclassify | drop | continue | pass | ok 
+Where:
+	ACTION := reclassify | drop | continue | pass | ok
 	RAND := random <RANDTYPE> <ACTION> <VAL>
 	RANDTYPE := netrand | determ
        VAL : = value not exceeding 10000
        INDEX := index value used
-      
+
 ACTION semantics
 - pass and ok are equivalent to accept
- continue allows to restart classification lookup
+- continue allows one to restart classification lookup
 - drop drops packets
 - reclassify implies continue classification where we left off

@ -42,14 +42,14 @@ filter u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16  (rule hit 32 suc
         random type none pass val 0
         index 1 ref 1 bind 1 installed 59 sec used 35 sec
         Sent 1680 bytes 20 pkts (dropped 20, overlimits 0 )
- 
+
 ----

 # example 2
 #allow 1 out 10 randomly using the netrand generator
 tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
 10.0.0.9/32 flowid 1:16 action drop random netrand ok 10
- 
+
 ping -c 20 10.0.0.9

 ----
@ -59,14 +59,14 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
         random type netrand pass val 10
         index 5 ref 1 bind 1 installed 49 sec used 25 sec
         Sent 1680 bytes 20 pkts (dropped 16, overlimits 0 )
-                                                                                
+
 --------
 #alternative: deterministically accept every second packet
 tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
 10.0.0.9/32 flowid 1:16 action drop random determ ok 2
-                                                                                
+
 ping -c 20 10.0.0.9
-                                                                                
+
 tc -s filter show parent ffff: dev eth0
 -----
 filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1filter protocol ip pref 6 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16  (rule hit 20 success 20)
@ -76,4 +76,3 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
         index 4 ref 1 bind 1 installed 118 sec used 82 sec
         Sent 1680 bytes 20 pkts (dropped 10, overlimits 0 )
 -----
-
--- a/doc/actions/ifb-README
+++ b/doc/actions/ifb-README
@ -6,47 +6,47 @@ with a _lot_ less code.
 Known IMQ/IFB USES
 ------------------

-As far as i know the reasons listed below is why people use IMQ. 
+As far as i know the reasons listed below is why people use IMQ.
 It would be nice to know of anything else that i missed.

 1) qdiscs/policies that are per device as opposed to system wide.
 IFB allows for sharing.

 2) Allows for queueing incoming traffic for shaping instead of
-dropping. I am not aware of any study that shows policing is 
+dropping. I am not aware of any study that shows policing is
 worse than shaping in achieving the end goal of rate control.
 I would be interested if anyone is experimenting.

-3) Very interesting use: if you are serving p2p you may wanna give 
-preference to your own localy originated traffic (when responses come back)
+3) Very interesting use: if you are serving p2p you may want to give
+preference to your own locally originated traffic (when responses come back)
 vs someone using your system to do bittorent. So QoSing based on state
-comes in as the solution. What people did to achive this was stick
+comes in as the solution. What people did to achieve this was stick
 the IMQ somewhere prelocal hook.
 I think this is a pretty neat feature to have in Linux in general.
 (i.e not just for IMQ).
-But i wont go back to putting netfilter hooks in the device to satisfy
-this.  I also dont think its worth it hacking ifb some more to be 
+But i won't go back to putting netfilter hooks in the device to satisfy
+this.  I also don't think its worth it hacking ifb some more to be
 aware of say L3 info and play ip rule tricks to achieve this.
--> Instead the plan is to have a contrack related action. This action will
-selectively either query/create contrack state on incoming packets. 
-Packets could then be redirected to ifb based on what happens -> eg 
-on incoming packets; if we find they are of known state we could send to 
-a different queue than one which didnt have existing state. This
+--> Instead the plan is to have a conntrack related action. This action will
+selectively either query/create conntrack state on incoming packets.
+Packets could then be redirected to ifb based on what happens -> eg
+on incoming packets; if we find they are of known state we could send to
+a different queue than one which didn't have existing state. This
 all however is dependent on whatever rules the admin enters.

 At the moment this 3rd function does not exist yet. I have decided that
-instead of sitting on the patch for another year, to release it and then 
-if theres pressure i will add this feature.
+instead of sitting on the patch for another year, to release it and then
+if there is pressure i will add this feature.

 An example, to provide functionality that most people use IMQ for below:

 --------
 export TC="/sbin/tc"

-$TC qdisc add dev ifb0 root handle 1: prio 
+$TC qdisc add dev ifb0 root handle 1: prio
 $TC qdisc add dev ifb0 parent 1:1 handle 10: sfq
 $TC qdisc add dev ifb0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000
-$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq                                
+$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq
 $TC filter add dev ifb0 protocol ip pref 1 parent 1: handle 1 fw classid 1:1
 $TC filter add dev ifb0 protocol ip pref 2 parent 1: handle 2 fw classid 1:2

@ -54,7 +54,7 @@ ifconfig ifb0 up

 $TC qdisc add dev eth0 ingress

-# redirect all IP packets arriving in eth0 to ifb0 
+# redirect all IP packets arriving in eth0 to ifb0
 # use mark 1 --> puts them onto class 1:1
 $TC filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
 match u32 0 0 flowid 1:1 \
@ -77,44 +77,44 @@ PING 10.22 (10.0.0.22): 56 data bytes
 --- 10.22 ping statistics ---
 3 packets transmitted, 3 packets received, 0% packet loss
 round-trip min/avg/max = 0.6/1.3/2.8 ms
-[root@jzny action-tests]# 
+[root@jzny action-tests]#
 -----
 Now look at some stats:

 ---
 [root@jmandrake]:~# $TC -s filter show parent ffff: dev eth0
-filter protocol ip pref 10 u32 
-filter protocol ip pref 10 u32 fh 800: ht divisor 1 
-filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 
+filter protocol ip pref 10 u32
+filter protocol ip pref 10 u32 fh 800: ht divisor 1
+filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
  match 00000000/00000000 at 0
-        action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING 
-        target MARK set 0x1  
-        index 1 ref 1 bind 1 installed 4195sec  used 27sec 
-         Sent 252 bytes 3 pkts (dropped 0, overlimits 0) 
+        action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING
+        target MARK set 0x1
+        index 1 ref 1 bind 1 installed 4195sec  used 27sec
+         Sent 252 bytes 3 pkts (dropped 0, overlimits 0)

        action order 2: mirred (Egress Redirect to device ifb0) stolen
        index 1 ref 1 bind 1 installed 165 sec used 27 sec
-         Sent 252 bytes 3 pkts (dropped 0, overlimits 0) 
+         Sent 252 bytes 3 pkts (dropped 0, overlimits 0)

 [root@jmandrake]:~# $TC -s qdisc
-qdisc sfq 30: dev ifb0 limit 128p quantum 1514b 
- Sent 0 bytes 0 pkts (dropped 0, overlimits 0) 
-qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s 
- Sent 210 bytes 3 pkts (dropped 0, overlimits 0) 
-qdisc sfq 10: dev ifb0 limit 128p quantum 1514b 
- Sent 294 bytes 3 pkts (dropped 0, overlimits 0) 
+qdisc sfq 30: dev ifb0 limit 128p quantum 1514b
+ Sent 0 bytes 0 pkts (dropped 0, overlimits 0)
+qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s
+ Sent 210 bytes 3 pkts (dropped 0, overlimits 0)
+qdisc sfq 10: dev ifb0 limit 128p quantum 1514b
+ Sent 294 bytes 3 pkts (dropped 0, overlimits 0)
 qdisc prio 1: dev ifb0 bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
- Sent 504 bytes 6 pkts (dropped 0, overlimits 0) 
-qdisc ingress ffff: dev eth0 ---------------- 
- Sent 308 bytes 5 pkts (dropped 0, overlimits 0) 
+ Sent 504 bytes 6 pkts (dropped 0, overlimits 0)
+qdisc ingress ffff: dev eth0 ----------------
+ Sent 308 bytes 5 pkts (dropped 0, overlimits 0)

 [root@jmandrake]:~# ifconfig ifb0
-ifb0    Link encap:Ethernet  HWaddr 00:00:00:00:00:00  
+ifb0    Link encap:Ethernet  HWaddr 00:00:00:00:00:00
          inet6 addr: fe80::200:ff:fe00:0/64 Scope:Link
          UP BROADCAST RUNNING NOARP  MTU:1500  Metric:1
          RX packets:6 errors:0 dropped:3 overruns:0 frame:0
          TX packets:3 errors:0 dropped:0 overruns:0 carrier:0
-          collisions:0 txqueuelen:32 
+          collisions:0 txqueuelen:32
          RX bytes:504 (504.0 b)  TX bytes:252 (252.0 b)
 -----

--- a/doc/actions/mirred-usage
+++ b/doc/actions/mirred-usage
@ -7,10 +7,10 @@ flow to be mirrored. High end switches typically can select based
 on more than just a port (eg a 5 tuple classifier). They may also be
 capable of redirecting.

-Usage: 
+Usage:

-mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME> 
-where: 
+mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME>
+where:
 DIRECTION := <ingress | egress>
 ACTION := <mirror | redirect>
 INDEX is the specific policy instance id
@ -18,7 +18,7 @@ DEVICENAME is the devicename

 Direction:
 - Ingress is not supported at the moment. It will be in the
-future as well as mirror/redirecting to a socket. 
+future as well as mirror/redirecting to a socket.

 Action:
 - Mirror takes a copy of the packet and sends it to specified
@ -26,17 +26,17 @@ dev ("port" in ethernet switch/bridging terminology)
 - redirect
 steals the packet and redirects to specified destination dev.

-What NOT to do if you dont want your machine to crash:
+What NOT to do if you don't want your machine to crash:
 ------------------------------------------------------

-Do not create loops! 
+Do not create loops!
 Loops are not hard to create in the egress qdiscs.

-Here are simple rules to follow if you dont want to get
+Here are simple rules to follow if you don't want to get
 hurt:
 A) Do not have the same packet go to same netdevice twice
 in a single graph of policies. Your machine will just hang!
-This is design intent _not a bug_ to teach you some lessons. 
+This is design intent _not a bug_ to teach you some lessons.

 In the future if there are easy ways to do this in the kernel
 without affecting other packets not interested in this feature
@ -51,7 +51,7 @@ B) Do not redirect from one IFB device to another.
 Remember that IFB is a very specialized case of packet redirecting
 device. Instead of redirecting it puts packets at the exact spot
 on the stack it found them from.
-Redirecting from ifbX->ifbY will actually not crash your machine but your 
+Redirecting from ifbX->ifbY will actually not crash your machine but your
 packets will all be dropped (this is much simpler to detect
 and resolve and is only affecting users of ifb as opposed to the
 whole stack).
@ -64,7 +64,7 @@ Some examples:

 1) Mirror all packets arriving on eth0 to be sent out on eth1.
 You may have a sniffer or some accounting box hooked up on eth1.
- 
+
 ---
 tc qdisc add dev eth0 ingress
 tc filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
@ -100,7 +100,7 @@ stack (i.e ping would work).
 3) Even more funky example:

 #
-#allow 1 out 10 packets on ingress of lo to randomly make it to the 
+#allow 1 out 10 packets on ingress of lo to randomly make it to the
 # host A (Randomness uses the netrand generator)
 #
 ---
@ -111,9 +111,9 @@ action mirred egress mirror dev eth0
 ---

 4)
-# for packets from 10.0.0.9 going out on eth0 (could be local 
-# IP or something # we are forwarding) - 
-# if exceeding a 100Kbps rate, then redirect to eth1 
+# for packets from 10.0.0.9 going out on eth0 (could be local
+# IP or something # we are forwarding) -
+# if exceeding a 100Kbps rate, then redirect to eth1
 #

 ---
@ -129,7 +129,7 @@ so you could tcpdump them (dummy by defaults drops all packets it sees).
 This is a very useful debug feature.

 Lets say you are policing packets from alias 192.168.200.200/32
-you dont want those to exceed 100kbps going out.
+you don't want those to exceed 100kbps going out.

 ---
 tc qdisc add dev eth0 handle 1:0 root prio
@ -158,7 +158,7 @@ Essentially a good debugging/logging interface (sort of like
 BSDs speacialized log device does without needing one).

 If you replace mirror with redirect, those packets will be
-blackholed and will never make it out. 
+blackholed and will never make it out.

 cheers,
 jamal
--- a/doc/api-ip6-flowlabels.tex
+++ b/doc/api-ip6-flowlabels.tex
@ -1,429 +0,0 @@
-\documentstyle[12pt,twoside]{article}
-\def\TITLE{IPv6 Flow Labels}
-\input preamble
-\begin{center}
-\Large\bf IPv6 Flow Labels in Linux-2.2.
-\end{center}
-
-
-\begin{center}
-{ \large Alexey~N.~Kuznetsov } \\
-\em Institute for Nuclear Research, Moscow \\
-\verb|kuznet@ms2.inr.ac.ru| \\
-\rm April 11, 1999
-\end{center}
-
-\vspace{5mm}
-
-\tableofcontents
-
-\section{Introduction.}
-
-Every IPv6 packet carries 28 bits of flow information. RFC2460 splits
-these bits to two fields: 8 bits of traffic class (or DS field, if you
-prefer this term) and 20 bits of flow label. Currently there exist
-no well-defined API to manage IPv6 flow information. In this document
-I describe an attempt to design the API for Linux-2.2 IPv6 stack.
-
-\vskip 1mm
-
-The API must solve the following tasks:
-
-\begin{enumerate}
-
-\item To allow user to set traffic class bits.
-
-\item To allow user to read traffic class bits of received packets.
-This feature is not so useful as the first one, however it will be
-necessary f.e.\ to implement ECN [RFC2481] for datagram oriented services
-or to implement receiver side of SRP or another end-to-end protocol
-using traffic class bits.
-
-\item To assign flow labels to packets sent by user.
-
-\item To get flow labels of received packets. I do not know
-any applications of this feature, but it is possible that receiver will
-want to use flow labels to distinguish sub-flows.
-
-\item To allocate flow labels in the way, compliant to RFC2460. Namely:
-
-\begin{itemize}
-\item
-Flow labels must be uniformly distributed (pseudo-)random numbers,
-so that any subset of 20 bits can be used as hash key.
-
-\item
-Flows with coinciding source address and flow label must have identical
-destination address and not-fragmentable extensions headers (i.e.\ 
-hop by hop options and all the headers up to and including routing header,
-if it is present.)
-
-\begin{NB}
-There is a hole in specs: some hop-by-hop options can be
-defined only on per-packet base (f.e.\  jumbo payload option).
-Essentially, it means that such options cannot present in packets
-with flow labels.
-\end{NB}
-\begin{NB}
-NB notes here and below reflect only my personal opinion,
-they should be read with smile or should not be read at all :-).
-\end{NB}
-
-
-\item
-Flow labels have finite lifetime and source is not allowed to reuse
-flow label for another flow within the maximal lifetime has expired,
-so that intermediate nodes will be able to invalidate flow state before
-the label is taken over by another flow.
-Flow state, including lifetime, is propagated along datagram path
-by some application specific methods
-(f.e.\ in RSVP PATH messages or in some hop-by-hop option).
-
-
-\end{itemize}
-
-\end{enumerate}
-
-\section{Sending/receiving flow information.}
-
-\paragraph{Discussion.}
-\addcontentsline{toc}{subsection}{Discussion}
-It was proposed (Where? I do not remember any explicit statement)
-to solve the first four tasks using
-\verb|sin6_flowinfo| field added to \verb|struct| \verb|sockaddr_in6|
-(see RFC2553).
-
-\begin{NB}
-	This method is difficult to consider as reasonable, because it
-	puts additional overhead to all the services, despite of only
-	very small subset of them (none, to be more exact) really use it.
-	It contradicts both to IETF spirit and the letter. Before RFC2553
-	one justification existed, IPv6 address alignment left 4 byte
-	hole in \verb|sockaddr_in6| in any case. Now it has no justification.
-\end{NB}
-
-We have two problems with this method. The first one is common for all OSes:
-if \verb|recvmsg()| initializes \verb|sin6_flowinfo| to flow info
-of received packet, we loose one very important property of BSD socket API,
-namely, we are not allowed to use received address for reply directly
-and have to mangle it, even if we are not interested in flowinfo subtleties.
-
-\begin{NB}
-	RFC2553 adds new requirement: to clear \verb|sin6_flowinfo|.
-	Certainly, it is not solution but rather attempt to force applications
-	to make unnecessary work. Well, as usually, one mistake in design
-	is followed by attempts	to patch the hole and more mistakes...
-\end{NB}
-
-Another problem is Linux specific. Historically Linux IPv6 did not
-initialize \verb|sin6_flowinfo| at all, so that, if kernel does not
-support flow labels, this field is not zero, but a random number.
-Some applications also did not take care about it. 
-
-\begin{NB}
-Following RFC2553 such applications can be considered as broken,
-but I still think that they are right: clearing all the address
-before filling known fields is robust but stupid solution.
-Useless wasting CPU cycles and
-memory bandwidth is not a good idea. Such patches are acceptable
-as temporary hacks, but not as standard of the future.
-\end{NB}
-
-
-\paragraph{Implementation.}
-\addcontentsline{toc}{subsection}{Implementation}
-By default Linux IPv6 does not read \verb|sin6_flowinfo| field
-assuming that common applications are not obliged to initialize it
-and are permitted to consider it as pure alignment padding.
-In order to tell kernel that application
-is aware of this field, it is necessary to set socket option
-\verb|IPV6_FLOWINFO_SEND|.
-
-\begin{verbatim}
-  int on = 1;
-  setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO_SEND,
-             (void*)&on, sizeof(on));
-\end{verbatim}
-
-Linux kernel never fills \verb|sin6_flowinfo| field, when passing
-message to user space, though the kernels which support flow labels
-initialize it to zero. If user wants to get received flowinfo, he
-will set option \verb|IPV6_FLOWINFO| and after this he will receive
-flowinfo as ancillary data object of type \verb|IPV6_FLOWINFO|
-(cf.\ RFC2292).
-
-\begin{verbatim}
-  int on = 1;
-  setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO, (void*)&on, sizeof(on));
-\end{verbatim}
-
-Flowinfo received and latched by a connected TCP socket also may be fetched
-with \verb|getsockopt()| \verb|IPV6_PKTOPTIONS| together with
-another optional information.
-
-Besides that, in the spirit of RFC2292 the option \verb|IPV6_FLOWINFO|
-may be used as alternative way to send flowinfo with \verb|sendmsg()| or
-to latch it with \verb|IPV6_PKTOPTIONS|.
-
-\paragraph{Note about IPv6 options and destination address.}
-\addcontentsline{toc}{subsection}{IPv6 options and destination address}
-If \verb|sin6_flowinfo| does contain not zero flow label,
-destination address in \verb|sin6_addr| and non-fragmentable
-extension headers are ignored. Instead, kernel uses the values
-cached at flow setup (see below). However, for connected sockets
-kernel prefers the values set at connection time.
-
-\paragraph{Example.}
-\addcontentsline{toc}{subsection}{Example}
-After setting socket option \verb|IPV6_FLOWINFO|
-flowlabel and DS field are received as ancillary data object
-of type \verb|IPV6_FLOWINFO| and level \verb|SOL_IPV6|.
-In the cases when it is convenient to use \verb|recvfrom(2)|,
-it is possible to replace library variant with your own one,
-sort of:
-
-\begin{verbatim}
-#include <sys/socket.h>
-#include <netinet/in6.h>
-
-size_t recvfrom(int fd, char *buf, size_t len, int flags,
-                struct sockaddr *addr, int *addrlen)
-{
-  size_t cc;
-  char cbuf[128];
-  struct cmsghdr *c;
-  struct iovec iov = { buf, len };
-  struct msghdr msg = { addr, *addrlen,
-                        &iov,  1,
-                        cbuf, sizeof(cbuf),
-                        0 };
-
-  cc = recvmsg(fd, &msg, flags);
-  if (cc < 0)
-    return cc;
-  ((struct sockaddr_in6*)addr)->sin6_flowinfo = 0;
-  *addrlen = msg.msg_namelen;
-  for (c=CMSG_FIRSTHDR(&msg); c; c = CMSG_NEXTHDR(&msg, c)) {
-    if (c->cmsg_level != SOL_IPV6 ||
-      c->cmsg_type != IPV6_FLOWINFO)
-        continue;
-    ((struct sockaddr_in6*)addr)->sin6_flowinfo = *(__u32*)CMSG_DATA(c);
-  }
-  return cc;
-}
-\end{verbatim}
-
-
-
-\section{Flow label management.}
-
-\paragraph{Discussion.}
-\addcontentsline{toc}{subsection}{Discussion}
-Requirements of RFC2460 are pretty tough. Particularly, lifetimes
-longer than boot time require to store allocated labels at stable
-storage, so that the full implementation necessarily includes user space flow
-label manager. There are at least three different approaches:
-
-\begin{enumerate}
-\item {\bf ``Cooperative''. } We could leave flow label allocation wholly
-to user space. When user needs label he requests manager directly. The approach
-is valid, but as any ``cooperative'' approach it suffers of security problems.
-
-\begin{NB}
-One idea is to disallow not privileged user to allocate flow
-labels, but instead to pass the socket to manager via \verb|SCM_RIGHTS|
-control message, so that it will allocate label and assign it to socket
-itself. Hmm... the idea is interesting.
-\end{NB}
-
-\item {\bf ``Indirect''.} Kernel redirects requests to user level daemon
-and does not install label until the daemon acknowledged the request.
-The approach is the most promising, it is especially pleasant to recognize
-parallel with IPsec API [RFC2367,Craig]. Actually, it may share API with
-IPsec.
-
-\item {\bf ``Stupid''.} To allocate labels in kernel space. It is the simplest
-method, but it suffers of two serious flaws: the first,
-we cannot lease labels with lifetimes longer than boot time, the second, 
-it is sensitive to DoS attacks. Kernel have to remember all the obsolete
-labels until their expiration and malicious user may fastly eat all the
-flow label space.
-
-\end{enumerate}
-
-Certainly, I choose the most ``stupid'' method. It is the cheapest one
-for implementor (i.e.\ me), and taking into account that flow labels
-still have no serious applications it is not useful to work on more
-advanced API, especially, taking into account that eventually we
-will get it for no fee together with IPsec.
-
-
-\paragraph{Implementation.}
-\addcontentsline{toc}{subsection}{Implementation}
-Socket option \verb|IPV6_FLOWLABEL_MGR| allows to
-request flow label manager to allocate new flow label, to reuse
-already allocated one or to delete old flow label.
-Its argument is \verb|struct| \verb|in6_flowlabel_req|:
-
-\begin{verbatim}
-struct in6_flowlabel_req
-{
-        struct in6_addr flr_dst;
-        __u32           flr_label;
-        __u8            flr_action;
-        __u8            flr_share;
-        __u16           flr_flags;
-        __u16           flr_expires;
-        __u16           flr_linger;
-        __u32         __flr_reserved;
-        /* Options in format of IPV6_PKTOPTIONS */
-};
-\end{verbatim}
-
-\begin{itemize}
-
-\item \verb|dst| is IPv6 destination address associated with the label.
-
-\item \verb|label| is flow label value in network byte order. If it is zero,
-kernel will allocate new pseudo-random number. Otherwise, kernel will try
-to lease flow label ordered by user. In this case, it is user task to provide
-necessary flow label randomness.
-
-\item \verb|action| is requested operation. Currently, only three operations
-are defined:
-
-\begin{verbatim}
-#define IPV6_FL_A_GET   0   /* Get flow label */
-#define IPV6_FL_A_PUT   1   /* Release flow label */
-#define IPV6_FL_A_RENEW 2   /* Update expire time */
-\end{verbatim}
-
-\item \verb|flags| are optional modifiers. Currently
-only \verb|IPV6_FL_A_GET| has modifiers:
-
-\begin{verbatim}
-#define IPV6_FL_F_CREATE 1   /* Allowed to create new label */
-#define IPV6_FL_F_EXCL   2   /* Do not create new label */
-\end{verbatim}
-
-
-\item \verb|share| defines who is allowed to reuse the same flow label.
-
-\begin{verbatim}
-#define IPV6_FL_S_NONE    0   /* Not defined */
-#define IPV6_FL_S_EXCL    1   /* Label is private */
-#define IPV6_FL_S_PROCESS 2   /* May be reused by this process */
-#define IPV6_FL_S_USER    3   /* May be reused by this user */
-#define IPV6_FL_S_ANY     255 /* Anyone may reuse it */
-\end{verbatim}
-
-\item \verb|linger| is time in seconds. After the last user releases flow
-label, it will not be reused with different destination and options at least
-during this time. If \verb|share| is not \verb|IPV6_FL_S_EXCL| the label
-still can be shared by another sockets. Current implementation does not allow
-unprivileged user to set linger longer than 60 sec.
-
-\item \verb|expires| is time in seconds. Flow label will be kept at least
-for this time, but it will not be destroyed before user released it explicitly
-or closed all the sockets using it. Current implementation does not allow
-unprivileged user to set timeout longer than 60 sec. Proviledged applications
-MAY set longer lifetimes, but in this case they MUST save allocated
-labels at stable storage and restore them back after reboot before the first
-application allocates new flow.
-
-\end{itemize}
-
-This structure is followed by optional extension headers associated
-with this flow label in format of \verb|IPV6_PKTOPTIONS|. Only
-\verb|IPV6_HOPOPTS|, \verb|IPV6_RTHDR| and, if \verb|IPV6_RTHDR| presents,
-\verb|IPV6_DSTOPTS| are allowed.
-
-\paragraph{Example.}
-\addcontentsline{toc}{subsection}{Example}
- The function \verb|get_flow_label| allocates
-private flow label.
-
-\begin{verbatim}
-int get_flow_label(int fd, struct sockaddr_in6 *dst, __u32 fl)
-{
-        int on = 1;
-        struct in6_flowlabel_req freq;
-
-        memset(&freq, 0, sizeof(freq));
-        freq.flr_label = htonl(fl);
-        freq.flr_action = IPV6_FL_A_GET;
-        freq.flr_flags = IPV6_FL_F_CREATE | IPV6_FL_F_EXCL;
-        freq.flr_share = IPV6_FL_S_EXCL;
-        memcpy(&freq.flr_dst, &dst->sin6_addr, 16);
-        if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
-                       &freq, sizeof(freq)) == -1) {
-                perror ("can't lease flowlabel");
-                return -1;
-        }
-        dst->sin6_flowinfo |= freq.flr_label;
-
-        if (setsockopt(fd, SOL_IPV6, IPV6_FLOWINFO_SEND,
-                       &on, sizeof(on)) == -1) {
-                perror ("can't send flowinfo");
-
-                freq.flr_action = IPV6_FL_A_PUT;
-                setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
-                           &freq, sizeof(freq));
-                return -1;
-        }
-        return 0;
-}
-\end{verbatim}
-
-A bit more complicated example using routing header can be found
-in \verb|ping6| utility (\verb|iputils| package). Linux rsvpd backend
-contains an example of using operation \verb|IPV6_FL_A_RENEW|.
-
-\paragraph{Listing flow labels.} 
-\addcontentsline{toc}{subsection}{Listing flow labels}
-List of currently allocated
-flow labels may be read from \verb|/proc/net/ip6_flowlabel|.
-
-\begin{verbatim}
-Label S Owner Users Linger Expires Dst                              Opt
-A1BE5 1 0     0     6      3       3ffe2400000000010a0020fffe71fb30 0
-\end{verbatim}
-
-\begin{itemize}
-\item \verb|Label| is hexadecimal flow label value.
-\item \verb|S| is sharing style.
-\item \verb|Owner| is ID of creator, it is zero, pid or uid, depending on
-		sharing style.
-\item \verb|Users| is number of applications using the label now.
-\item \verb|Linger| is \verb|linger| of this label in seconds.
-\item \verb|Expires| is time until expiration of the label in seconds. It may
-	be negative, if the label is in use.
-\item \verb|Dst| is IPv6 destination address.
-\item \verb|Opt| is length of options, associated with the label. Option
-	data are not accessible.
-\end{itemize}
-
-
-\paragraph{Flow labels and RSVP.} 
-\addcontentsline{toc}{subsection}{Flow labels and RSVP}
-RSVP daemon supports IPv6 flow labels
-without any modifications to standard ISI RAPI. Sender must allocate
-flow label, fill corresponding sender template and submit it to local rsvp
-daemon. rsvpd will check the label and start to announce it in PATH
-messages. Rsvpd on sender node will renew the flow label, so that it will not
-be reused before path state expires and all the intermediate
-routers and receiver purge flow state.
-
-\verb|rtap| utility is modified to parse flow labels. F.e.\ if user allocated
-flow label \verb|0xA1234|, he may write:
-
-\begin{verbatim}
-RTAP> sender 3ffe:2400::1/FL0xA1234 <Tspec>
-\end{verbatim}
-
-Receiver makes reservation with command:
-\begin{verbatim}
-RTAP> reserve ff 3ffe:2400::1/FL0xA1234 <Flowspec>
-\end{verbatim}
-
-\end{document}
--- a/doc/arpd.sgml
+++ b/doc/arpd.sgml
@ -1,130 +0,0 @@
-<!doctype linuxdoc system>
-
-<article>
-
-<title>ARPD Daemon
-<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
-<date>some_negative_number, 20 Sep 2001
-<abstract>
-<tt/arpd/ is daemon collecting gratuitous ARP information, saving
-it on local disk and feeding it to kernel on demand to avoid
-redundant broadcasting due to limited size of kernel ARP cache. 
-</abstract>
-
-
-<p><bf/Description/
-
-<p>The format of the command is:
-
-<tscreen><verb>
-       arpd OPTIONS [ INTERFACE [ INTERFACE ... ] ]
-</verb></tscreen>
-
-<p> <tt/OPTIONS/ are:
-
-<itemize>
-
-<item><tt/-l/ - dump <tt/arpd/ database to stdout and exit. Output consists
-of three columns: interface index, IP address and MAC address.
-Negative entries for dead hosts are also shown, in this case MAC address
-is replaced by word <tt/FAILED/ followed by colon and time when the fact
-that host is dead was proven the last time.
-
-<item><tt/-f FILE/  - read and load <tt/arpd/ database from <tt/FILE/
-in text format similar dumped by option <tt/-l/. Exit after load,
-probably listing resulting database, if option <tt/-l/ is also given.
-If <tt/FILE/ is <tt/-/, <tt/stdin/ is read to get ARP table.
- 
-<item><tt/-b DATABASE/  - location of database file. Default location is
-<tt>/var/lib/arpd/arpd.db</tt>.
-
-<item><tt/-a NUMBER/ - <tt/arpd/ not only passively listens ARP on wire, but
-also send brodcast queries itself. <tt/NUMBER/ is number of such queries
-to make before destination is considered as dead. When <tt/arpd/ is started
-as kernel helper (i.e. with <tt/app_solicit/ enabled in <tt/sysctl/
-or even with option <tt/-k/) without this option and still did not learn enough
-information, you can observe 1 second gaps in service. Not fatal, but
-not good.
-
-<item><tt/-k/ - suppress sending broadcast queries by kernel. It takes
-sense together with option <tt/-a/.
-
-<item><tt/-n TIME/ - timeout of negative cache. When resolution fails <tt/arpd/
-suppresses further attempts to resolve for this period. It makes sense
-only together with option <tt/-k/. This timeout should not be too much
-longer than boot time of a typical host not supporting gratuitous ARP.
-Default value is 60 seconds.
-
-<item><tt/-R RATE/ - maximal steady rate of broadcasts sent by <tt/arpd/
-in packets per second. Default value is 1.
-
-<item><tt/-B NUMBER/ - number of broadcasts sent by <tt/arpd/ back to back.
-Default value is 3. Together with option <tt/-R/ this option allows
-to police broadcasting not to exceed <tt/B+R*T/ over any interval
-of time <tt/T/.
-
-</itemize>
-
-<p><tt/INTERFACE/ is name of networking inteface to watch.
-If no interfaces given, <tt/arpd/ monitors all the interfaces.
-In this case <tt/arpd/ does not adjust <tt/sysctl/ parameters,
-it is supposed user does this himself after <tt/arpd/ is started.
-
-
-<p> Signals
-
-<p> <tt/arpd/ exits gracefully syncing database and restoring adjusted
-<tt/sysctl/ parameters, when receives <tt/SIGINT/ or <tt/SIGTERM/.
-<tt/SIGHUP/ syncs database to disk. <tt/SIGUSR1/ sends some statistics
-to <tt/syslog/. Effect of another signals is undefined, they may corrupt
-database and leave <tt/sysctl/ parameters in an unpredictable state.
-
-<p> Note
-
-<p> In order to <tt/arpd/ be able to serve as ARP resolver, kernel must be
-compiled with the option <tt/CONFIG_ARPD/ and, in the case when interface list
-is not given on command line, variable <tt/app_solicit/
-on interfaces of interest should be set in <tt>/proc/sys/net/ipv4/neigh/*</tt>.
-If this is not made <tt/arpd/ still collects gratuitous ARP information
-in its database.
-
-<p> Examples
-
-<enum>
-<item> Start <tt/arpd/ to collect gratuitous ARP, but not messing
-with kernel functionality:
-
-<tscreen><verb>
-   arpd -b /var/tmp/arpd.db
-</verb></tscreen>
-
-<item> Look at result after some time:
-
-<tscreen><verb>
-   killall arpd
-   arpd -l -b /var/tmp/arpd.db
-</verb></tscreen>
-
-<item> To enable kernel helper, leaving leading role to kernel:
-
-<tscreen><verb>
-   arpd -b /var/tmp/arpd.db -a 1 eth0 eth1
-</verb></tscreen>
-
-<item> Completely replace kernel resolution on interfaces <tt/eth0/
-and <tt/eth1/. In this case kernel still does unicast probing to
-validate entries, but all the broadcast activity is suppressed
-and made under authority of <tt/arpd/: 
-
-<tscreen><verb>
-   arpd -b /var/tmp/arpd.db -a 3 -k eth0 eth1
-</verb></tscreen>
-
-This is mode which <tt/arpd/ is supposed to work normally.
-It is not default just to prevent occasional enabling of too aggressive
-mode occasionally.
-
-</enum>
-
-</article>
-
--- a/doc/do-psnup
+++ b/doc/do-psnup
@ -1,16 +0,0 @@
-#! /bin/bash
-# $1 = Temporary file . "string"
-# $2 = File to process . "string"
-# $3 = Page size . ie: a4 , letter ... "string"
-# $4 = Number of pages to fit on a single sheet . "numeric"
-
-if type psnup >&/dev/null; then
-	echo "psnup -$4 -p$3 $1 $2"
-	psnup -$4 -p$3 $1 $2
-elif type psmulti >&/dev/null; then
-	echo "psmulti $1 > $2"
-	psmulti $1 > $2
-else
-	echo "cp $1 $2"
-	cp $1 $2
-fi
--- a/doc/ip-cref.tex
+++ b/doc/ip-cref.tex
--- a/doc/ip-tunnels.tex
+++ b/doc/ip-tunnels.tex
@ -1,469 +0,0 @@
-\documentstyle[12pt,twoside]{article}
-\def\TITLE{Tunnels over IP}
-\input preamble
-\begin{center}
-\Large\bf Tunnels over IP in Linux-2.2
-\end{center}
-
-
-\begin{center}
-{ \large Alexey~N.~Kuznetsov } \\
-\em Institute for Nuclear Research, Moscow \\
-\verb|kuznet@ms2.inr.ac.ru| \\
-\rm March 17, 1999
-\end{center}
-
-\vspace{5mm}
-
-\tableofcontents
-
-
-\section{Instead of introduction: micro-FAQ.}
-
-\begin{itemize}
-
-\item
-Q: In linux-2.0.36 I used:
-\begin{verbatim} 
-    ifconfig tunl1 10.0.0.1 pointopoint 193.233.7.65
-\end{verbatim} 
-to create tunnel. It does not work in 2.2.0!
-
-A: You are right, it does not work. The command written above is split to two commands.
-\begin{verbatim}
-    ip tunnel add MY-TUNNEL mode ipip remote 193.233.7.65
-\end{verbatim} 
-will create tunnel device with name \verb|MY-TUNNEL|. Now you may configure
-it with:
-\begin{verbatim} 
-    ifconfig MY-TUNNEL 10.0.0.1
-\end{verbatim} 
-Certainly, if you prefer name \verb|tunl1| to \verb|MY-TUNNEL|,
-you still may use it.
-
-\item
-Q: In linux-2.0.36 I used:
-\begin{verbatim} 
-    ifconfig tunl0 10.0.0.1
-    route add -net 10.0.0.0 gw 193.233.7.65 dev tunl0
-\end{verbatim} 
-to tunnel net 10.0.0.0 via router 193.233.7.65. It does not
-work in 2.2.0! Moreover, \verb|route| prints a funny error sort of
-``network unreachable'' and after this I found a strange direct route
-to 10.0.0.0 via \verb|tunl0| in routing table.
-
-A: Yes, in 2.2 the rule that {\em normal} gateway must reside on directly
-connected network has not any exceptions. You may tell kernel, that
-this particular route is {\em abnormal}:
-\begin{verbatim} 
-  ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
-  ip route add 10.0.0.0/8 via 193.233.7.65 dev tunl0 onlink
-\end{verbatim}
-Note keyword \verb|onlink|, it is the magic key that orders kernel
-not to check for consistency of gateway address.
-Probably, after this explanation you have already guessed another method
-to cheat kernel:
-\begin{verbatim} 
-  ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
-  route add -host 193.233.7.65 dev tunl0
-  route add -net 10.0.0.0 netmask 255.0.0.0 gw 193.233.7.65
-  route del -host 193.233.7.65 dev tunl0
-\end{verbatim}
-Well, if you like such tricks, nobody may prohibit you to use them.
-Only do not forget
-that between \verb|route add| and \verb|route del| host 193.233.7.65 is
-unreachable.
-
-\item
-Q: In 2.0.36 I used to load \verb|tunnel| device module and \verb|ipip| module.
-I cannot find any \verb|tunnel| in 2.2!
-
-A: Linux-2.2 has single module \verb|ipip| for both directions of tunneling
-and for all IPIP tunnel devices.
-
-\item
-Q: \verb|traceroute| does not work over tunnel! Well, stop... It works,
-     only skips some number of hops.
-
-A: Yes. By default tunnel driver copies \verb|ttl| value from
-inner packet to outer one. It means that path traversed by tunneled
-packets to another endpoint is not hidden. If you dislike this, or if you
-are going to use some routing protocol expecting that packets
-with ttl 1 will reach peering host (f.e.\ RIP, OSPF or EBGP)
-and you are not afraid of
-tunnel loops, you may append option \verb|ttl 64|, when creating tunnel
-with \verb|ip tunnel add|.
-
-\item
-Q: ... Well, list of things, which 2.0 was able to do finishes.
-
-\end{itemize}
-
-\paragraph{Summary of differences between 2.2 and 2.0.}
-
-\begin{itemize}
-
-\item {\bf In 2.0} you could compile tunnel device into kernel
-	and got set of 4 devices \verb|tunl0| ... \verb|tunl3| or,
-	alternatively, compile it as module and load new module
-	for each new tunnel. Also, module \verb|ipip| was necessary
-	to receive tunneled packets.
-
-      {\bf 2.2} has {\em one\/} module \verb|ipip|. Loading it you get base
-	tunnel device \verb|tunl0| and another tunnels may be created with command
-	\verb|ip tunnel add|. These new devices may have arbitrary names.
-
-
-\item {\bf In 2.0} you set remote tunnel endpoint address with
-	the command \verb|ifconfig| ... \verb|pointopoint A|.
-
-	{\bf In 2.2} this command has the same semantics on all
-	the interfaces, namely it sets not tunnel endpoint,
-	but address of peering host, which is directly reachable
-	via this tunnel,
-	rather than via Internet. Actual tunnel endpoint address \verb|A|
-	should be set with \verb|ip tunnel add ... remote A|.
-
-\item {\bf In 2.0} you create tunnel routes with the command:
-\begin{verbatim}
-    route add -net 10.0.0.0 gw A dev tunl0
-\end{verbatim}
-
-	{\bf 2.2} interprets this command equally for all device
-	kinds and gateway is required to be directly reachable via this tunnel,
-	rather than via Internet. You still may use \verb|ip route add ... onlink|
-	to override this behaviour.
-
-\end{itemize}
-
-
-\section{Tunnel setup: basics}
-
-Standard Linux-2.2 kernel supports three flavor of tunnels,
-listed in the following table:
-\vspace{2mm}
-
-\begin{tabular}{lll}
-\vrule depth 0.8ex width 0pt\relax
-Mode & Description  & Base device \\
-ipip & IP over IP & tunl0 \\
-sit & IPv6 over IP & sit0 \\
-gre & ANY over GRE over IP & gre0
-\end{tabular}
-
-\vspace{2mm}
-
-\noindent All the kinds of tunnels are created with one command:
-\begin{verbatim}
-  ip tunnel add <NAME> mode <MODE> [ local <S> ] [ remote <D> ]
-\end{verbatim}
-
-This command creates new tunnel device with name \verb|<NAME>|.
-The \verb|<NAME>| is an arbitrary string. Particularly,
-it may be even \verb|eth0|. The rest of parameters set
-different tunnel characteristics.
-
-\begin{itemize}
-
-\item
-\verb|mode <MODE>| sets tunnel mode. Three modes are available now
-	\verb|ipip|, \verb|sit| and \verb|gre|.
-
-\item
-\verb|remote <D>| sets remote endpoint of the tunnel to IP
-	address \verb|<D>|.
-\item
-\verb|local <S>| sets fixed local address for tunneled
-	packets. It must be an address on another interface of this host.
-
-\end{itemize}
-
-\let\thefootnote\oldthefootnote
-
-Both \verb|remote| and \verb|local| may be omitted. In this case we
-say that they are zero or wildcard. Two tunnels of one mode cannot
-have the same \verb|remote| and \verb|local|. Particularly it means
-that base device or fallback tunnel cannot be replicated.\footnote{
-This restriction is relaxed for keyed GRE tunnels.}
-
-Tunnels are divided to two classes: {\bf pointopoint} tunnels, which
-have some not wildcard \verb|remote| address and deliver all the packets
-to this destination, and {\bf NBMA} (i.e. Non-Broadcast Multi-Access) tunnels,
-which have no \verb|remote|. Particularly, base devices (f.e.\ \verb|tunl0|)
-are NBMA, because they have neither \verb|remote| nor
-\verb|local| addresses.
-
-
-After tunnel device is created you should configure it as you did
-it with another devices. Certainly, the configuration of tunnels has
-some features related to the fact that they work over existing Internet
-routing infrastructure and simultaneously create new virtual links,
-which changes this infrastructure. The danger that not enough careful
-tunnel setup will result in formation of tunnel loops,
-collapse of routing or flooding network with exponentially
-growing number of tunneled fragments is very real.
-
-
-Protocol setup on pointopoint tunnels does not differ of configuration
-of another devices. You should set a protocol address with \verb|ifconfig|
-and add routes with \verb|route| utility.
-
-NBMA tunnels are different. To route something via NBMA tunnel
-you have to explain to driver, where it should deliver packets to.
-The only way to make it is to create special routes with gateway
-address pointing to desired endpoint. F.e.\ 
-\begin{verbatim}
-    ip route add 10.0.0.0/24 via <A> dev tunl0 onlink
-\end{verbatim}
-It is important to use option \verb|onlink|, otherwise
-kernel will refuse request to create route via gateway not directly
-reachable over device \verb|tunl0|. With IPv6 the situation is much simpler:
-when you start device \verb|sit0|, it automatically configures itself
-with all IPv4 addresses mapped to IPv6 space, so that all IPv4
-Internet is {\em really reachable} via \verb|sit0|! Excellent, the command
-\begin{verbatim}
-    ip route add 3FFE::/16 via ::193.233.7.65 dev sit0
-\end{verbatim}
-will route \verb|3FFE::/16| via \verb|sit0|, sending all the packets
-destined to this prefix to 193.233.7.65.
-
-\section{Tunnel setup: options}
-
-Command \verb|ip tunnel add| has several additional options.
-\begin{itemize}
-
-\item \verb|ttl N| --- set fixed TTL \verb|N| on tunneled packets.
-	\verb|N| is number in the range 1--255. 0 is special value,
-	meaning that packets inherit TTL value. 
-		Default value is: \verb|inherit|.
-
-\item \verb|tos T| --- set fixed tos \verb|T| on tunneled packets.
-		Default value is: \verb|inherit|.
-
-\item \verb|dev DEV| --- bind tunnel to device \verb|DEV|, so that
-	tunneled packets will be routed only via this device and will
-	not be able to escape to another device, when route to endpoint changes.
-
-\item \verb|nopmtudisc| --- disable Path MTU Discovery on this tunnel.
-	It is enabled by default. Note that fixed ttl is incompatible
-	with this option: tunnels with fixed ttl always make pmtu discovery.
-
-\end{itemize}
-
-\verb|ipip| and \verb|sit| tunnels have no more options. \verb|gre|
-tunnels are more complicated:
-
-\begin{itemize}
-
-\item \verb|key K| --- use keyed GRE with key \verb|K|. \verb|K| is
-	either number or IP address-like dotted quad.
-
-\item \verb|csum| --- checksum tunneled packets.
-
-\item \verb|seq| --- serialize packets.
-\begin{NB}
-	I think this option does not
-	work. At least, I did not test it, did not debug it and
-	even do not understand,	how it is supposed to work and for what
-	purpose Cisco planned to use it.
-\end{NB}
-
-\end{itemize}
-
-
-Actually, these GRE options can be set separately for input and
-output directions by prefixing corresponding keywords with letter
-\verb|i| or \verb|o|. F.e.\ \verb|icsum| orders to accept only
-packets with correct checksum and \verb|ocsum| means, that
-our host will calculate and send checksum.
-
-Command \verb|ip tunnel add| is not the only operation,
-which can be made with tunnels. Certainly, you may get short help page
-with:
-\begin{verbatim}
-    ip tunnel help
-\end{verbatim}
-
-Besides that, you may view list of installed tunnels with the help of command:
-\begin{verbatim}
-    ip tunnel ls
-\end{verbatim}
-Also you may look at statistics:
-\begin{verbatim}
-    ip -s tunnel ls Cisco
-\end{verbatim}
-where \verb|Cisco| is name of tunnel device. Command
-\begin{verbatim}
-    ip tunnel del Cisco
-\end{verbatim}
-destroys tunnel \verb|Cisco|. And, finally,
-\begin{verbatim}
-    ip tunnel change Cisco mode sit local ME remote HE ttl 32
-\end{verbatim}
-changes its parameters.
-
-\section{Differences 2.2 and 2.0 tunnels revisited.}
-
-Now we can discuss more subtle differences between tunneling in 2.0
-and 2.2.
-
-\begin{itemize}
-
-\item In 2.0 all tunneled packets were received promiscuously
-as soon as you loaded module \verb|ipip|. 2.2 tries to select the best
-tunnel device and packet looks as received on this. F.e.\ if host
-received \verb|ipip| packet from host \verb|D| destined to our
-local address \verb|S|, kernel searches for matching tunnels
-in order:
-
-\begin{tabular}{ll}
-1 & \verb|remote| is \verb|D| and \verb|local| is \verb|S| \\
-2 & \verb|remote| is \verb|D| and \verb|local| is wildcard \\
-3 & \verb|remote| is wildcard and \verb|local| is \verb|S| \\
-4 & \verb|tunl0|
-\end{tabular}
-
-If tunnel exists, but it is not in \verb|UP| state, the tunnel is ignored.
-Note, that if \verb|tunl0| is \verb|UP| it receives all the IPIP packets,
-not acknowledged by more specific tunnels.
-Be careful, it means that without carefully installed firewall rules
-anyone on the Internet may inject to your network any packets with
-source addresses indistinguishable from local ones. It is not so bad idea
-to design tunnels in the way enforcing maximal route symmetry
-and to enable reversed path filter (\verb|rp_filter| sysctl option) on
-tunnel devices.
-
-\item In 2.2 you can monitor and debug tunnels with \verb|tcpdump|.
-F.e.\ \verb|tcpdump| \verb|-i Cisco| \verb|-nvv| will dump packets,
-which kernel output, via tunnel \verb|Cisco| and the packets received on it
-from kernel viewpoint.
-
-\end{itemize}
-
-
-\section{Linux and Cisco IOS tunnels.}
-
-Among another tunnels Cisco IOS supports IPIP and GRE.
-Essentially, Cisco setup is subset of options, available for Linux.
-Let us consider the simplest example:
-
-\begin{verbatim}
-interface Tunnel0
- tunnel mode gre ip
- tunnel source 10.10.14.1
- tunnel destination 10.10.13.2
-\end{verbatim}
-
-
-This command set translates to:
-
-\begin{verbatim}
-    ip tunnel add Tunnel0 \
-        mode gre \
-        local 10.10.14.1 \
-        remote 10.10.13.2
-\end{verbatim}
-
-Any questions? No questions.
-
-\section{Interaction IPIP tunnels and DVMRP.}
-
-DVMRP exploits IPIP tunnels to route multicasts via Internet.
-\verb|mrouted| creates
-IPIP tunnels listed in its configuration file automatically.
-From kernel and user viewpoints there are no differences between
-tunnels, created in this way, and tunnels created by \verb|ip tunnel|.
-I.e.\ if \verb|mrouted| created some tunnel, it may be used to
-route unicast packets, provided appropriate routes are added.
-And vice versa, if administrator has already created a tunnel,
-it will be reused by \verb|mrouted|, if it requests DVMRP
-tunnel with the same local and remote addresses.
-
-Do not wonder, if your manually configured tunnel is
-destroyed, when mrouted exits.
-
-
-\section{Broadcast GRE ``tunnels''.}
-
-It is possible to set \verb|remote| for GRE tunnel to a multicast
-address. Such tunnel becomes {\bf broadcast} tunnel (though word
-tunnel is not quite appropriate in this case, it is rather virtual network).
-\begin{verbatim}
-  ip tunnel add Universe local 193.233.7.65 \
-                         remote 224.66.66.66 ttl 16
-  ip addr add 10.0.0.1/16 dev Universe
-  ip link set Universe up
-\end{verbatim}
-This tunnel is true broadcast network and broadcast packets are
-sent to multicast group 224.66.66.66. By default such tunnel starts
-to resolve both IP and IPv6 addresses via ARP/NDISC, so that
-if multicast routing is supported in surrounding network, all GRE nodes
-will find one another automatically and will form virtual Ethernet-like
-broadcast network. If multicast routing does not work, it is unpleasant
-but not fatal flaw. The tunnel becomes NBMA rather than broadcast network.
-You may disable dynamic ARPing by:
-\begin{verbatim}
-  echo 0 > /proc/sys/net/ipv4/neigh/Universe/mcast_solicit
-\end{verbatim}
-and to add required information to ARP tables manually:
-\begin{verbatim}
-  ip neigh add 10.0.0.2 lladdr 128.6.190.2 dev Universe nud permanent
-\end{verbatim}
-In this case packets sent to 10.0.0.2 will be encapsulated in GRE
-and sent to 128.6.190.2. It is possible to facilitate address resolution
-using methods typical for another NBMA networks f.e.\ to start user
-level \verb|arpd| daemon, which will maintain database of hosts attached
-to GRE virtual network or ask for information
-dedicated ARP or NHRP server.
-
-
-Actually, such setup is the most natural for tunneling,
-it is really flexible, scalable and easily managable, so that
-it is strongly recommended to be used with GRE tunnels instead of ugly
-hack with NBMA mode and \verb|onlink| modifier. Unfortunately,
-by historical reasons broadcast mode is not supported by IPIP tunnels,
-but this probably will change in future.
-
-
-
-\section{Traffic control issues.}
-
-Tunnels are devices, hence all the power of Linux traffic control
-applies to them. The simplest (and the most useful in practice)
-example is limiting tunnel bandwidth. The following command:
-\begin{verbatim}
-    tc qdisc add dev tunl0 root tbf \
-        rate 128Kbit burst 4K limit 10K
-\end{verbatim}
-will limit tunneled traffic to 128Kbit with maximal burst size of 4K
-and queuing not more than 10K.
-
-However, you should remember, that tunnels are {\em virtual} devices
-implemented in software and true queue management is impossible for them
-just because they have no queues. Instead, it is better to create classes
-on real physical interfaces and to map tunneled packets to them.
-In general case of dynamic routing you should create such classes
-on all outgoing interfaces, or, alternatively,
-to use option \verb|dev DEV| to bind tunnel to a fixed physical device.
-In the last case packets will be routed only via specified device
-and you need to setup corresponding classes only on it.
-Though you have to pay for this convenience,
-if routing will change, your tunnel will fail.
-
-Suppose that CBQ class \verb|1:ABC| has been created on device \verb|eth0| 
-specially for tunnel \verb|Cisco| with endpoints \verb|S| and \verb|D|.
-Now you can select IPIP packets with addresses \verb|S| and \verb|D|
-with some classifier and map them to class \verb|1:ABC|. F.e.\ 
-it is easy to make with \verb|rsvp| classifier:
-\begin{verbatim}
-    tc filter add dev eth0 pref 100 proto ip rsvp \
-        session D ipproto ipip filter S \
-        classid 1:ABC
-\end{verbatim}
-
-If you want to make more detailed classification of sub-flows
-transmitted via tunnel, you can build CBQ subtree,
-rooted at \verb|1:ABC| and attach to subroot set of rules parsing
-IPIP packets more deeply.
-
-\end{document}
--- a/doc/nstat.sgml
+++ b/doc/nstat.sgml
@ -1,110 +0,0 @@
-<!doctype linuxdoc system>
-
-<article>
-
-<title>NSTAT, IFSTAT and RTACCT Utilities
-<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
-<date>some_negative_number, 20 Sep 2001
-<abstract>
-<tt/nstat/, <tt/ifstat/ and <tt/rtacct/ are simple tools helping
-to monitor kernel snmp counters and network interface statistics.
-</abstract>
-
-<p> These utilities are very similar, so that I describe
-them simultaneously, using name <tt/Xstat/ in the places which apply
-to all of them.
-
-<p>The format of the command is:
-
-<tscreen><verb>
-       Xstat [ OPTIONS ] [ PATTERN [ PATTERN ... ] ]
-</verb></tscreen>
-
-<p>
-<tt/PATTERN/ is shell style pattern, selecting identifier
-of SNMP variables or interfaces to show. Variable is displayed
-if one of patterns matches its name. If no patterns are given,
-<tt/Xstat/ assumes that user wants to see all the variables.  
-
-<p> <tt/OPTIONS/ is list of single letter options, using common unix
-conventions.
-
-<itemize>
-<item><tt/-h/  - show help page
-<item><tt/-?/  - the same, of course
-<item><tt/-v/, <tt/-V/  - print version of <tt/Xstat/ and exit
-<item><tt/-z/ - dump zero counters too. By default they are not shown.
-<item><tt/-a/ - dump absolute values of counters. By default <tt/Xstat/
-                calculates increments since the previous use.
-<item><tt/-s/ - do not update history, so that the next time you will
-                see counters including values accumulated to the moment
-                of this measurement too.
-<item><tt/-n/ - do not display anything, only update history.
-<item><tt/-r/ - reset history.
-<item><tt/-d INTERVAL/ - <tt/Xstat/ is run in daemon mode collecting
-                statistics. <tt/INTERVAL/ is interval between measurements
-                in seconds.
-<item><tt/-t INTERVAL/ - time interval to average rates. Default value
-                is 60 seconds. 
-<item><tt/-e/ - display extended information about errors (<tt/ifstat/ only).
-</itemize>
-
-<p>
-History is just dump saved in file <tt>/tmp/.Xstat.uUID</tt>
-or in file given by environment variables <tt/NSTAT_HISTORY/,
-<tt/IFSTAT_HISTORY/ and <tt/RTACCT_HISTORY/.
-Each time when you use <tt/Xstat/ values there are updated.
-If you use patterns, only the values which you _really_ see
-are updated. If you want to skip an unintersting period,
-use option <tt/-n/, or just output to <tt>/dev/null</tt>.
-
-<p>
-<tt/Xstat/ understands when history is invalidated by system reboot
-or source of information switched between different instances
-of daemonic <tt/Xstat/ and kernel SNMP tables and does not
-use invalid history.
-
-<p> Beware, <tt/Xstat/ will not produce sane output,
-when many processes use it simultaneously. If several processes
-under single user need this utility they should use environment
-variables to put their history in safe places
-or to use it with options <tt/-a -s/.
-
-<p>
-Well, that's all. The utility is very simple, but nevertheless
-very handy.
-
-<p> <bf/Output of XSTAT/
-<p> The first line of output is <tt/#/ followed by identifier
-of source of information, it may be word <tt/kernel/, when <tt/Xstat/
-gets information from kernel or some dotted decimal number followed
-by parameters, when it obtains information from running <tt/Xstat/ daemon.
-
-<p>In the case of <tt/nstat/ the rest of output consists of three columns:
-SNMP MIB identifier,
-its value (or increment since previous measurement) and average
-rate of increase of the counter per second. <tt/ifstat/ outputs
-interface name followed by pairs of counter and rate of its change.
-
-<p> <bf/Daemonic Xstat/
-<p> <tt/Xstat/ may be started as daemon by any user. This makes sense
-to avoid wrapped counters and to obtain reasonable long counters
-for large time. Also <tt/Xstat/ daemon calculates average rates.
-For the first goal sampling interval (option <tt/-d/) may be large enough,
-f.e. for gigabit rates byte counters overflow not more frequently than
-each 40 seconds and you may select interval of 20 seconds.
-From the other hand, when <tt/Xstat/ is used for estimating rates
-interval should be less than averaging period (option <tt/-t/), otherwise
-estimation loses in quality.
-
-Client <tt/Xstat/, before trying to get information from the kernel,
-contacts daemon started by this user, then it tries system wide
-daemon, which is supposed to be started by superuser. And only if
-none of them replied it gets information from kernel.
-
-<p> <bf/Environment/
-<p> <tt/NSTAT_HISTORY/ - name of history file for <tt/nstat/.
-<p> <tt/IFSTAT_HISTORY/ - name of history file for <tt/ifstat/.
-<p> <tt/RTACCT_HISTORY/ - name of history file for <tt/rtacct/.
-
-</article>
--- a/doc/preamble.tex
+++ b/doc/preamble.tex
@ -1,26 +0,0 @@
-\textwidth   6.0in
-\textheight  8.5in
-
-\input SNAPSHOT
-
-\pagestyle{myheadings}
-\markboth{\protect\TITLE}{}
-\markright{{\protect\sc iproute2-ss\Draft}}
-
-% To print it in compact form: both sides on one sheet (psnup -2)
-\evensidemargin=\oddsidemargin
-
-\newenvironment{NB}{\bgroup \vskip 1mm\leftskip 1cm \footnotesize \noindent NB.
-}{\par\egroup \vskip 1mm}
-
-\def\threeonly{[2.3.15+ only] }
-
-\begin{document}
-
-\makeatletter
-\renewcommand{\@oddhead}{{\protect\sc iproute2-ss\Draft} \hfill \protect\arabic{page}}
-\makeatother
-\let\oldthefootnote\thefootnote
-\def\thefootnote{}
-\footnotetext{Copyright \copyright~1999 A.N.Kuznetsov}
-
--- a/doc/rtstat.sgml
+++ b/doc/rtstat.sgml
@ -1,52 +0,0 @@
-<!doctype linuxdoc system>
-
-<article>
-
-<title>RTACCT Utility
-<author>Robert Olsson
-<date>some_negative_number, 20 Dec 2001
-
-<p>
-Here is some code for monitoring the route cache. For systems handling high
-network load, servers, routers, firewalls etc the route cache and its garbage
-collection is crucial. Linux has a solid implementation.
-
-<p>
-The kernel patch (not required since linux-2.4.7) adds statistics counters
-from route cache process into 
-/proc/net/rt_cache_stat. A companion user mode program presents the statistics
-in a vmstat or iostat manner. The ratio between cache hits and misses gives 
-the flow length.
-
-<p>
-Hopefully it can help understanding performance and DoS and other related
-issues.
-
-<p> An URL where newer versions of this utility can be (probably) found
-is ftp://robur.slu.se/pub/Linux/net-development/rt_cache_stat/
-
-
-<p><bf/Description/
-
-<p>The format of the command is:
-
-<tscreen><verb>
-       rtstat [ OPTIONS ]
-</verb></tscreen>
-
-<p> <tt/OPTIONS/ are:
-
-<itemize>
-
-<item><tt/-h/, <tt/-help/ - show help page and version of the utility.
-
-<item><tt/-i INTERVAL/ - interval between snapshots, default value is
-2 seconds.
-
-<item><tt/-s NUMBER/ - whether to print header line. 0 inhibits header line,
-1 prescribes to print it once and 2 (this is default setting) forces header
-line each 20 lines. 
-
-</itemize>
-
-</article>
--- a/doc/ss.sgml
+++ b/doc/ss.sgml
@ -1,525 +0,0 @@
-<!doctype linuxdoc system>
-
-<article>
-
-<title>SS Utility: Quick Intro
-<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
-<date>some_negative_number, 20 Sep 2001
-<abstract>
-<tt/ss/ is one another utility to investigate sockets.
-Functionally it is NOT better than <tt/netstat/ combined
-with some perl/awk scripts and though it is surely faster
-it is not enough to make it much better. :-)
-So, stop reading this now and do not waste your time.
-Well, certainly, it proposes some functionality, which current
-netstat is still not able to do, but surely will soon.
-</abstract>
-
-<sect>Why?
-
-<p> <tt>/proc</tt> interface is inadequate, unfortunately.
-When amount of sockets is enough large, <tt/netstat/ or even
-plain <tt>cat /proc/net/tcp/</tt> cause nothing but pains and curses.
-In linux-2.4 the desease became worse: even if amount
-of sockets is small reading <tt>/proc/net/tcp/</tt> is slow enough.
-
-This utility presents a new approach, which is supposed to scale
-well. I am not going to describe technical details here and
-will concentrate on description of the command.
-The only important thing to say is that it is not so bad idea
-to load module <tt/tcp_diag/, which can be found in directory
-<tt/Modules/ of <tt/iproute2/. If you do not make this <tt/ss/
-will work, but it falls back to <tt>/proc</tt> and becomes slow
-like <tt/netstat/, well, a bit faster yet (see section "Some numbers"). 
-
-<sect>Old news
-
-<p>
-In the simplest form <tt/ss/ is equivalent to netstat
-with some small deviations.
-
-<itemize>
-<item><tt/ss -t -a/ dumps all TCP sockets
-<item><tt/ss -u -a/ dumps all UDP sockets
-<item><tt/ss -w -a/ dumps all RAW sockets
-<item><tt/ss -x -a/ dumps all UNIX sockets
-</itemize>
-
-<p>
-Option <tt/-o/ shows TCP timers state.
-Option <tt/-e/ shows some extended information.
-Etc. etc. etc. Seems, all the options of netstat related to sockets
-are supported. Though not AX.25 and other bizarres. :-)
-If someone wants, he can make support for decnet and ipx.
-Some rudimentary support for them is already present in iproute2 libutils,
-and I will be glad to see these new members.
-
-<p>
-However, standard functionality is a bit different:
-
-<p>
-The first: without option <tt/-a/ sockets in states
-<tt/TIME-WAIT/ and <tt/SYN-RECV/ are skipped too.
-It is more reasonable default, I think.
-
-<p>
-The second: format of UNIX sockets is different. It coincides
-with tcp/udp. Though standard kernel still does not allow to
-see write/read queues and peer address of connected UNIX sockets,
-the patch doing this exists.
-
-<p>
-The third: default is to dump only TCP sockets, rather than all of the types.
-
-<p>
-The next: by default it does not resolve numeric host addresses (like <tt/ip/)!
-Resolving is enabled with option <tt/-r/. Service names, usually stored
-in local files, are resolved by default. Also, if service database
-does not contain references to a port, <tt/ss/ queries system
-<tt/rpcbind/. RPC services are prefixed with <tt/rpc./
-Resolution of services may be suppressed with option <tt/-n/.
-
-<p>
-It does not accept "long" options (I dislike them, sorry).
-So, address family is given with family identifier following
-option <tt/-f/ to be algined to iproute2 conventions.
-Mostly, it is to allow option parser to parse
-addresses correctly, but as side effect it really limits dumping
-to sockets supporting only given family. Option <tt/-A/ followed
-by list of socket tables to dump is also supported.
-Logically, id of socket table is different of _address_ family, which is
-another point of incompatibility. So, id is one of
-<tt/all/, <tt/tcp/, <tt/udp/,
-<tt/raw/, <tt/inet/, <tt/unix/, <tt/packet/, <tt/netlink/. See?
-Well, <tt/inet/ is just abbreviation for <tt/tcp|udp|raw/
-and it is not difficult to guess that <tt/packet/ allows
-to look at packet sockets. Actually, there are also some other abbreviations,
-f.e. <tt/unix_dgram/ selects only datagram UNIX sockets.
-
-<p>
-The next: well, I still do not know. :-)
-
-
-
-
-<sect>Time to talk about new functionality.
-
-<p>It is builtin filtering of socket lists. 
-
-<sect1> Filtering by state.
-
-<p>
-<tt/ss/ allows to filter socket states, using keywords
-<tt/state/ and <tt/exclude/, followed by some state
-identifier.
-
-<p>
-State identifier are standard TCP state names (not listed,
-they are useless for you if you already do not know them)
-or abbreviations:
-
-<itemize>
-<item><tt/all/        - for all the states
-<item><tt/bucket/     - for TCP minisockets (<tt/TIME-WAIT|SYN-RECV/)
-<item><tt/big/	      - all except for minisockets
-<item><tt/connected/  - not closed and not listening
-<item><tt/synchronized/ - connected and not <tt/SYN-SENT/
-</itemize>
-
-<p>
-   F.e. to dump all tcp sockets except <tt/SYN-RECV/:
-
-<tscreen><verb>
-   ss exclude SYN-RECV
-</verb></tscreen>
-
-<p>
-   If neither <tt/state/ nor <tt/exclude/ directives
-   are present,
-   state filter defaults to <tt/all/ with option <tt/-a/
-   or to <tt/all/,
-   excluding listening, syn-recv, time-wait and closed sockets.
-
-<sect1> Filtering by addresses and ports.
-
-<p>
-Option list may contain address/port filter.
-It is boolean expression which consists of boolean operation
-<tt/or/, <tt/and/, <tt/not/ and predicates. 
-Actually, all the flavors of names for boolean operations are eaten:
-<tt/&amp/, <tt/&amp&amp/, <tt/|/, <tt/||/, <tt/!/, but do not forget
-about special sense given to these symbols by unix shells and escape
-them correctly, when used from command line.
-
-<p>
-Predicates may be of the folowing kinds:
-
-<itemize>
-<item>A. Address/port match, where address is checked against mask
-      and port is either wildcard or exact. It is one of:
- 
-<tscreen><verb>
-	dst prefix:port
-	src prefix:port
-	src unix:STRING
-	src link:protocol:ifindex
-	src nl:channel:pid
-</verb></tscreen>
-
-      Both prefix and port may be absent or replaced with <tt/*/,
-      which means wildcard. UNIX socket use more powerful scheme
-      matching to socket names by shell wildcards. Also, prefixes
-      unix: and link: may be omitted, if address family is evident
-      from context (with option <tt/-x/ or with <tt/-f unix/
-      or with <tt/unix/ keyword) 
-
-<p>
-      F.e.
-
-<tscreen><verb>
-	dst 10.0.0.1
-	dst 10.0.0.1:
-	dst 10.0.0.1/32:
-	dst 10.0.0.1:*
-</verb></tscreen>
-   are equivalent and mean socket connected to
-	                 any port on host 10.0.0.1
-
-<tscreen><verb>
-	dst 10.0.0.0/24:22
-</verb></tscreen>
-   sockets connected to port 22 on network
-                          10.0.0.0...255.
-
-<p>
-      Note that port separated of address with colon, which creates
-      troubles with IPv6 addresses. Generally, we interpret the last
-      colon as splitting port. To allow to give IPv6 addresses,
-      trick like used in IPv6 HTTP URLs may be used:
-
-<tscreen><verb>
-      dst [::1]
-</verb></tscreen>
-       are sockets connected to ::1 on any port
-
-<p>
-      Another way is <tt/dst ::1/128/. / helps to understand that
-      colon is part of IPv6 address.
-
-<p>
-      Now we can add another alias for <tt/dst 10.0.0.1/:
-      <tt/dst [10.0.0.1]/. :-)
-
-<p>   Address may be a DNS name. In this case all the addresses are looked
-      up (in all the address families, if it is not limited by option <tt/-f/
-      or special address prefix <tt/inet:/, <tt/inet6/) and resulting
-      expression is <tt/or/ over all of them.  
-
-<item>   B. Port expressions:
-<tscreen><verb>
-      dport &gt= :1024
-      dport != :22
-      sport &lt :32000
-</verb></tscreen>
-      etc.
-
-      All the relations: <tt/&lt/, <tt/&gt/, <tt/=/, <tt/>=/, <tt/=/, <tt/==/,
-      <tt/!=/, <tt/eq/, <tt/ge/, <tt/lt/, <tt/ne/...
-      Use variant which you like more, but not forget to escape special
-      characters when typing them in command line. :-) 
-
-      Note that port number syntactically coincides to the case A!
-      You may even add an IP address, but it will not participate
-      incomparison, except for <tt/==/ and <tt/!=/, which are equivalent
-      to corresponding predicates of type A. F.e.
-<p>
-<tt/dst 10.0.0.1:22/
-    is equivalent to  <tt/dport eq 10.0.0.1:22/
-      and
-      <tt/not dst 10.0.0.1:22/     is equivalent to
- <tt/dport neq 10.0.0.1:22/
-
-<item>C. Keyword <tt/autobound/. It matches to sockets bound automatically
-      on local system.
-
-</itemize>
-
-
-<sect> Examples
-
-<p>
-<itemize>
-<item>1. List all the tcp sockets in state <tt/FIN-WAIT-1/ for our apache
-   to network 193.233.7/24 and look at their timers:
-
-<tscreen><verb>
-   ss -o state fin-wait-1 \( sport = :http or sport = :https \) \
-                          dst 193.233.7/24
-</verb></tscreen>
-
-   Oops, forgot to say that missing logical operation is
-   equivalent to <tt/and/.
-
-<item> 2. Well, now look at the rest...
-
-<tscreen><verb>
-   ss -o excl fin-wait-1
-   ss state fin-wait-1 \( sport neq :http and sport neq :https \) \
-                       or not dst 193.233.7/24
-</verb></tscreen>
-
-   Note that we have to do _two_ calls of ss to do this.
-   State match is always anded to address/port match.
-   The reason for this is purely technical: ss does fast skip of
-   not matching states before parsing addresses and I consider the
-   ability to skip fastly gobs of time-wait and syn-recv sockets
-   as more important than logical generality.
-
-<item> 3. So, let's look at all our sockets using autobound ports:
-
-<tscreen><verb>
-   ss -a -A all autobound
-</verb></tscreen>
-
-
-<item> 4. And eventually find all the local processes connected
-   to local X servers:
-
-<tscreen><verb>
-   ss -xp dst "/tmp/.X11-unix/*"
-</verb></tscreen>
-
-   Pardon, this does not work with current kernel, patching is required.
-   But we still can look at server side:
-   
-<tscreen><verb>
-   ss -x src "/tmp/.X11-unix/*"
-</verb></tscreen>
-
-</itemize>
-
-
-<sect> Returning to ground: real manual  
-
-<p>
-<sect1> Command arguments
-
-<p> General format of arguments to <tt/ss/ is:
-
-<tscreen><verb>
-       ss [ OPTIONS ] [ STATE-FILTER ] [ ADDRESS-FILTER ]
-</verb></tscreen>
-
-<sect2><tt/OPTIONS/
-<p> <tt/OPTIONS/ is list of single letter options, using common unix
-conventions.
-
-<itemize>
-<item><tt/-h/  - show help page
-<item><tt/-?/  - the same, of course
-<item><tt/-v/, <tt/-V/  - print version of <tt/ss/ and exit
-<item><tt/-s/  - print summary statistics. This option does not parse
-socket lists obtaining summary from various sources. It is useful
-when amount of sockets is so huge that parsing <tt>/proc/net/tcp</tt>
-is painful.
-<item><tt/-D FILE/  - do not display anything, just dump raw information
-about TCP sockets to <tt/FILE/ after applying filters. If <tt/FILE/ is <tt/-/
-<tt/stdout/ is used. 
-<item><tt/-F FILE/  - read continuation of filter from <tt/FILE/.
-Each line of <tt/FILE/ is interpreted like single command line option.
-If <tt/FILE/ is <tt/-/ <tt/stdin/ is used. 
-<item><tt/-r/  - try to resolve numeric address/ports
-<item><tt/-n/  - do not try to resolve ports
-<item><tt/-o/  - show some optional information, f.e. TCP timers
-<item><tt/-i/  - show some infomration specific to TCP (RTO, congestion
-window, slow start threshould etc.)
-<item><tt/-e/  - show even more optional information
-<item><tt/-m/  - show extended information on memory used by the socket.
-It is available only with <tt/tcp_diag/ enabled.
-<item><tt/-p/  - show list of processes owning the socket
-<item><tt/-f FAMILY/ - default address family used for parsing addresses.
-                 Also this option limits listing to sockets supporting
-                 given address family. Currently the following families
-                 are supported: <tt/unix/, <tt/inet/, <tt/inet6/, <tt/link/,
-                 <tt/netlink/.
-<item><tt/-4/ - alias for <tt/-f inet/
-<item><tt/-6/ - alias for <tt/-f inet6/
-<item><tt/-0/ - alias for <tt/-f link/
-<item><tt/-A LIST-OF-TABLES/ - list of socket tables to dump, separated
-                 by commas. The following identifiers are understood:
-                 <tt/all/, <tt/inet/, <tt/tcp/, <tt/udp/, <tt/raw/,
-                 <tt/unix/, <tt/packet/, <tt/netlink/, <tt/unix_dgram/,
-                 <tt/unix_stream/, <tt/packet_raw/, <tt/packet_dgram/.
-<item><tt/-x/ - alias for <tt/-A unix/
-<item><tt/-t/ - alias for <tt/-A tcp/
-<item><tt/-u/ - alias for <tt/-A udp/
-<item><tt/-w/ - alias for <tt/-A raw/
-<item><tt/-a/ - show sockets of all the states. By default sockets
-                in states <tt/LISTEN/, <tt/TIME-WAIT/, <tt/SYN_RECV/
-                and <tt/CLOSE/ are skipped.
-<item><tt/-l/ - show only sockets in state <tt/LISTEN/ 
-</itemize>
-
-<sect2><tt/STATE-FILTER/
-
-<p><tt/STATE-FILTER/ allows to construct arbitrary set of
-states to match. Its syntax is sequence of keywords <tt/state/
-and <tt/exclude/ followed by identifier of state.
-Available identifiers are:
-
-<p>
-<itemize>
-<item> All standard TCP states: <tt/established/, <tt/syn-sent/,
-<tt/syn-recv/, <tt/fin-wait-1/, <tt/fin-wait-2/, <tt/time-wait/,
-<tt/closed/, <tt/close-wait/, <tt/last-ack/, <tt/listen/ and <tt/closing/.
-
-<item><tt/all/ - for all the states 
-<item><tt/connected/ - all the states except for <tt/listen/ and <tt/closed/ 
-<item><tt/synchronized/ - all the <tt/connected/ states except for 
-<tt/syn-sent/
-<item><tt/bucket/ - states, which are maintained as minisockets, i.e.
-<tt/time-wait/ and <tt/syn-recv/.
-<item><tt/big/ - opposite to <tt/bucket/
-</itemize>
-
-<sect2><tt/ADDRESS_FILTER/
-
-<p><tt/ADDRESS_FILTER/ is boolean expression with operations <tt/and/, <tt/or/
-and <tt/not/, which can be abbreviated in C style f.e. as <tt/&amp/,
-<tt/&amp&amp/.
-
-<p>
-Predicates check socket addresses, both local and remote.
-There are the following kinds of predicates:
-
-<itemize>
-<item> <tt/dst ADDRESS_PATTERN/ - matches remote address and port
-<item> <tt/src ADDRESS_PATTERN/ - matches local address and port
-<item> <tt/dport RELOP PORT/    - compares remote port to a number
-<item> <tt/sport RELOP PORT/    - compares local port to a number
-<item> <tt/autobound/           - checks that socket is bound to an ephemeral
-                                  port
-</itemize>
-
-<p><tt/RELOP/ is some of <tt/&lt=/, <tt/&gt=/, <tt/==/ etc.
-To make this more convinient for use in unix shell, alphabetic
-FORTRAN-like notations <tt/le/, <tt/gt/ etc. are accepted as well.
-
-<p>The format and semantics of <tt/ADDRESS_PATTERN/ depends on address
-family.
-
-<itemize>
-<item><tt/inet/ - <tt/ADDRESS_PATTERN/ consists of IP prefix, optionally
-followed by colon and port. If prefix or port part is absent or replaced
-with <tt/*/, this means wildcard match.
-<item><tt/inet6/ - The same as <tt/inet/, only prefix refers to an IPv6
-address. Unlike <tt/inet/ colon becomes ambiguous, so that <tt/ss/ allows
-to use scheme, like used in URLs, where address is suppounded with
-<tt/[/ ... <tt/]/.
-<item><tt/unix/ - <tt/ADDRESS_PATTERN/ is shell-style wildcard.
-<item><tt/packet/ - format looks like <tt/inet/, only interface index
-stays instead of port and link layer protocol id instead of address.
-<item><tt/netlink/ - format looks like <tt/inet/, only socket pid
-stays instead of port and netlink channel instead of address.
-</itemize>
-
-<p><tt/PORT/ is syntactically <tt/ADDRESS_PATTERN/ with wildcard
-address part. Certainly, it is undefined for UNIX sockets. 
-
-<sect1> Environment variables
-
-<p>
-<tt/ss/ allows to change source of information using various
-environment variables:
-
-<p>
-<itemize>
-<item> <tt/PROC_SLABINFO/  to override <tt>/proc/slabinfo</tt>
-<item> <tt/PROC_NET_TCP/  to override <tt>/proc/net/tcp</tt>
-<item> <tt/PROC_NET_UDP/  to override <tt>/proc/net/udp</tt>
-<item> etc.
-</itemize> 
-
-<p>
-Variable <tt/PROC_ROOT/ allows to change root of all the <tt>/proc/</tt>
-hierarchy.
-
-<p>
-Variable <tt/TCPDIAG_FILE/ prescribes to open a file instead of
-requesting kernel to dump information about TCP sockets.
-
-
-<p> This option is used mainly to investigate bug reports,
-when dumps of files usually found in <tt>/proc/</tt> are recevied
-by e-mail.
-
-<sect1> Output format
-
-<p>Six columns. The first is <tt/Netid/, it denotes socket type and
-transport protocol, when it is ambiguous: <tt/tcp/, <tt/udp/, <tt/raw/,
-<tt/u_str/ is abbreviation for <tt/unix_stream/, <tt/u_dgr/ for UNIX
-datagram sockets, <tt/nl/ for netlink, <tt/p_raw/ and <tt/p_dgr/ for
-raw and datagram packet sockets. This column is optional, it will
-be hidden, if filter selects an unique netid.
-
-<p>
-The second column is <tt/State/. Socket state is displayed here.
-The names are standard TCP names, except for <tt/UNCONN/, which
-cannot happen for TCP, but normal for not connected sockets
-of another types. Again, this column can be hidden.
-
-<p>
-Then two columns (<tt/Recv-Q/ and <tt/Send-Q/) showing amount of data
-queued for receive and transmit.
-
-<p>
-And the last two columns display local address and port of the socket
-and its peer address, if the socket is connected.
-
-<p>
-If options <tt/-o/, <tt/-e/ or <tt/-p/ were given, options are
-displayed not in fixed positions but separated by spaces pairs:
-<tt/option:value/. If value is not a single number, it is presented
-as list of values, enclosed to <tt/(/ ... <tt/)/ and separated with
-commas. F.e.
-
-<tscreen><verb>
-   timer:(keepalive,111min,0)
-</verb></tscreen>
-is typical format for TCP timer (option <tt/-o/).
-
-<tscreen><verb>
-   users:((X,113,3))
-</verb></tscreen>
-is typical for list of users (option <tt/-p/).
-
-
-<sect>Some numbers
-
-<p>
-Well, let us use <tt/pidentd/ and a tool <tt/ibench/ to measure
-its performance. It is 30 requests per second here. Nothing to test,
-it is too slow. OK, let us patch pidentd with patch from directory
-Patches. After this it handles about 4300 requests per second
-and becomes handy tool to pollute socket tables with lots of timewait
-buckets.
-
-<p>
-So, each test starts from pollution tables with 30000 sockets
-and then doing full dump of the table piped to wc and measuring
-timings with time:
-
-<p>Results:
-
-<itemize>
-<item> <tt/netstat -at/ - 15.6 seconds
-<item> <tt/ss -atr/, but without <tt/tcp_diag/     - 5.4 seconds
-<item> <tt/ss -atr/ with <tt/tcp_diag/     - 0.47 seconds
-</itemize>
-
-No comments. Though one comment is necessary, most of time
-without <tt/tcp_diag/ is wasted inside kernel with completely
-blocked networking. More than 10 seconds, yes. <tt/tcp_diag/
-does the same work for 100 milliseconds of system time.
-
-</article>
--- a/doc/tc-filters.tex
+++ b/doc/tc-filters.tex
@ -1,514 +0,0 @@
-\documentclass[12pt,twoside]{article}
-
-\usepackage[hidelinks]{hyperref}	% \url
-\usepackage{booktabs}			% nicer tabulars
-\usepackage{fancyvrb}
-\usepackage{fullpage}
-\usepackage{float}
-
-\newcommand{\iface}{\textit}
-\newcommand{\cmd}{\texttt}
-\newcommand{\man}{\textit}
-\newcommand{\qdisc}{\texttt}
-\newcommand{\filter}{\texttt}
-
-\begin{document}
-\title{QoS in Linux with TC and Filters}
-\author{Phil Sutter (phil@nwl.cc)}
-\date{January 2016}
-\maketitle
-
-Standard practice when transmitting packets over a medium which may block (due
-to congestion, e.g.) is to use a queue which temporarily holds these packets. In
-Linux, this queueing approach is where QoS happens: A Queueing Discipline
-(qdisc) holds multiple packet queues with different priorities for dequeueing to
-the network driver. The classification (i.e. deciding which queue a packet
-should go into) is typically done based on Type Of Service (IPv4) or Traffic
-Class (IPv6) header fields but depending on qdisc implementation, might be
-controlled by the user as well.
-
-Qdiscs come in two flavors, classful or classless. While classless qdiscs are
-not as flexible as classful ones, they also require much less customizing. Often
-it is enough to just attach them to an interface, without exact knowledge of
-what is done internally. Classful qdiscs are the exact opposite: flexible in
-application, they are often not even usable without insightful configuration.
-
-As the name implies, classful qdiscs provide configurable classes to sort
-traffic into. In it's basic form, this is not much different than, say, the
-classless \qdisc{pfifo\_fast} which holds three queues and classifies per
-packet upon priority field. Though typically classes go beyond that by
-supporting nesting and additional characteristics like e.g. maximum traffic
-rate or quantum.
-
-When it comes to controlling the classification process, filters come into play.
-They attach to the parent of a set of classes (i.e. either the qdisc itself or
-a parent class) and specify how a packet (or it's associated flow) has to look
-like in order to suit a given class. To overcome this simplification, it is
-possible to attach multiple filters to the same parent, which then consults each
-of them in row until the first one accepts the packet.
-
-Before getting into detail about what filters there are and how to use them, a
-simple setup of a qdisc with classes is necessary:
-\begin{figure}[H]
-\begin{Verbatim}
-  .-------------------------------------------------------.
-  |                                                       |
-  |  HTB                                                  |
-  |                                                       |
-  | .----------------------------------------------------.|
-  | |                                                    ||
-  | |  Class 1:1                                         ||
-  | |                                                    ||
-  | | .---------------..---------------..---------------.||
-  | | |               ||               ||               |||
-  | | |  Class 1:10   ||  Class 1:20   ||  Class 1:30   |||
-  | | |               ||               ||               |||
-  | | | .------------.|| .------------.|| .------------.|||
-  | | | |            ||| |            ||| |            ||||
-  | | | |  fq_codel  ||| |  fq_codel  ||| |  fq_codel  ||||
-  | | | |            ||| |            ||| |            ||||
-  | | | '------------'|| '------------'|| '------------'|||
-  | | '---------------''---------------''---------------'||
-  | '----------------------------------------------------'|
-  '-------------------------------------------------------'
-\end{Verbatim}
-\end{figure}
-\noindent
-The following commands establish the basic setup shown:
-\begin{Verbatim}
-(1) # tc qdisc replace dev eth0 root handle 1: htb default 30
-(2) # tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit
-(3) # alias tclass='tc class add dev eth0 parent 1:1'
-(4) # tclass classid 1:10 htb rate 1mbit ceil 20mbit prio 1
-(4) # tclass classid 1:20 htb rate 90mbit ceil 95mbit prio 2
-(4) # tclass classid 1:30 htb rate 1mbit ceil 95mbit prio 3
-(5) # tc qdisc add dev eth0 parent 1:10 fq_codel
-(5) # tc qdisc add dev eth0 parent 1:20 fq_codel
-(5) # tc qdisc add dev eth0 parent 1:30 fq_codel
-\end{Verbatim}
-A little explanation for the unfamiliar reader:
-\begin{enumerate}
-\item Replace the root qdisc of \iface{eth0} by an instance of \qdisc{HTB}.
-  Specifying the handle is necessary so it can be referenced in consecutive
-  calls to \cmd{tc}. The default class for unclassified traffic is set to
-  30.
-\item Create a single top-level class with handle 1:1 which limits the total
-   bandwidth allowed to 95mbit/s. It is assumed that \iface{eth0} is a 100mbit/s link,
-   staying a little below that helps to keep the main point of enqueueing in
-   the qdisc layer instead of the interface hardware queue or at another
-   bottleneck in the network.
-\item Define an alias for the common part of the remaining three calls in order
-   to improve readability. This means all remaining classes are attached to the
-   common parent class from (2).
-\item Create three child classes for different uses: Class 1:10 has highest
-   priority but is tightly limited in bandwidth - fine for interactive
-   connections.  Class 1:20 has mid priority and high guaranteed bandwidth, for
-   high priority bulk traffic. Finally, there's the default class 1:30 with
-   lowest priority, low guaranteed bandwidth and the ability to use the full
-   link in case it's unused otherwise. This should be fine for uninteresting
-   traffic not explicitly taken care of.
-\item Attach a leaf qdisc to each of the child classes created in (4). Since
-   \qdisc{HTB} by default attaches \qdisc{pfifo} as leaf qdisc, this step is optional. Still,
-   the fairness between different flows provided by the classless \qdisc{fq\_codel} is
-   worth the effort.
-\end{enumerate}
-More information about the qdiscs and fine-tuning parameters can be found in
-\man{tc-htb(8)} and \man{tc-fq\_codel(8)}.
-
-Without any additional setup done, now all traffic leaving \iface{eth0} is shaped to
-95mbit/s and directed through class 1:30. This can be verified by looking at the
-\texttt{Sent} field of the class statistics printed via \cmd{tc -s class show dev eth0}:
-Only the root class 1:1 and it's child 1:30 should show any traffic.
-
-
-\section*{Finally time to start filtering!}
-
-Let's begin with a simple one, i.e. reestablishing what \qdisc{pfifo\_fast} did
-automatically based on TOS/Priority field. Linux internally translates the
-header field into the priority field of struct skbuff, which
-\qdisc{pfifo\_fast} uses for
-classification. \man{tc-prio(8)} contains a table listing the priority (and
-ultimately, \qdisc{pfifo\_fast} queue index) each TOS value is being translated into.
-Here is a shorter version:
-\begin{center}
-\begin{tabular}{lll}
-TOS Values & Linux Priority (Number) & Queue Index \\
-\midrule
-0x0  - 0x6  & Best Effort (0)      & 1 \\
-0x8  - 0xe  & Bulk (2)             & 2 \\
-0x10 - 0x16 & Interactive (6)      & 0 \\
-0x18 - 0x1e & Interactive Bulk (4) & 1 \\
-\end{tabular}
-\end{center}
-Using the \filter{basic} filter, it is possible to match packets based on that skbuff
-field, which has the added benefit of being IP version agnostic. Since the
-\qdisc{HTB} setup above defaults to class ID 1:30, the Bulk priority can be
-ignored. The \filter{basic} filter allows to combine matches, therefore we get along
-with only two filters:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: basic \
-        match 'meta(priority eq 6)' classid 1:10
-# tc filter add dev eth0 parent 1: basic \
-        match 'meta(priority eq 0)' \
-        or 'meta(priority eq 4)' classid 1:20
-\end{Verbatim}
-A detailed description of the \filter{basic} filter and the ematch syntax it uses can be
-found in \man{tc-basic(8)} and \man{tc-ematch(8)}.
-
-Obviously, this first example cries for optimization. A simple one would be to
-just change the default class from 1:30 to 1:20, so filters are only needed for
-Bulk and Interactive priorities:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: basic \
-        match 'meta(priority eq 6)' classid 1:10
-# tc filter add dev eth0 parent 1: basic \
-        match 'meta(priority eq 2)' classid 1:20
-\end{Verbatim}
-Given that class IDs are random, choosing them wisely allows for a direct
-mapping. So first, recreate the qdisc and classes configuration:
-\begin{Verbatim}
-# tc qdisc replace dev eth0 root handle 1: htb default 10
-# tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit
-# alias tclass='tc class add dev eth0 parent 1:1'
-# tclass classid 1:16 htb rate 1mbit ceil 20mbit prio 1
-# tclass classid 1:10 htb rate 90mbit ceil 95mbit prio 2
-# tclass classid 1:12 htb rate 1mbit ceil 95mbit prio 3
-# tc qdisc add dev eth0 parent 1:16 fq_codel
-# tc qdisc add dev eth0 parent 1:10 fq_codel
-# tc qdisc add dev eth0 parent 1:12 fq_codel
-\end{Verbatim}
-This is basically identical to above, but with changed leaf class IDs and the
-second priority class being the default. Using the \filter{flow} filter with it's \texttt{map}
-functionality, a single filter command is enough:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: handle 0x1337 flow \
-        map key priority baseclass 1:10
-\end{Verbatim}
-The \filter{flow} filter now uses the priority value to construct a destination class ID
-by adding it to the value of \texttt{baseclass}. While this works for priority values of
-0, 2 and 6, it will result in non-existent class ID 1:14 for Interactive Bulk
-traffic. In that case, the \qdisc{HTB} default applies so that traffic goes into class
-ID 1:10 just as intended. Please note that specifying a handle is a mandatory
-requirement by the \filter{flow} filter, although I didn't see where one would use that
-later. For more information about \filter{flow}, see \man{tc-flow(8)}.
-
-While \filter{flow} and \filter{basic} filters are relatively easy to apply and understand, they
-are as well quite limited to their intended purpose. A more flexible option is
-the \filter{u32} filter, which allows to match on arbitrary parts of the packet data -
-yet only on that, not any meta data associated to it by the kernel (with the
-exception of firewall mark value). So in order to continue this little
-exercise with \filter{u32}, we have to base classification directly upon the actual TOS
-value. An intuitive attempt might look like this:
-\begin{Verbatim}
-# alias tcfilter='tc filter add dev eth0 parent 1:'
-# tcfilter u32 match ip dsfield 0x10 0x1e classid 1:16
-# tcfilter u32 match ip dsfield 0x12 0x1e classid 1:16
-# tcfilter u32 match ip dsfield 0x14 0x1e classid 1:16
-# tcfilter u32 match ip dsfield 0x16 0x1e classid 1:16
-# tcfilter u32 match ip dsfield 0x8 0x1e classid 1:12
-# tcfilter u32 match ip dsfield 0xa 0x1e classid 1:12
-# tcfilter u32 match ip dsfield 0xc 0x1e classid 1:12
-# tcfilter u32 match ip dsfield 0xe 0x1e classid 1:12
-\end{Verbatim}
-The obvious drawback here is the amount of filters needed. And without the
-default class, eight more filters would be necessary. This also has performance
-implications: A packet with TOS value 0xe will be checked eight times in total
-in order to determine it's destination class. While there's not much to be done
-about the number of filters, at least the performance problem can be eliminated
-by using \filter{u32}'s hash table support:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: prio 99 handle 1: u32 divisor 16
-\end{Verbatim}
-This creates a hash table with 16 buckets. The table size is arbitrary, but not
-random: Since the first bit of the TOS field is not interesting, it can be
-ignored and therefore the range of values to consider is just [0;15], i.e. a
-number of 16 different values. The next step is to populate the hash table:
-\begin{Verbatim}
-# alias tcfilter='tc filter add dev eth0 parent 1: prio 99'
-# tcfilter u32 match u8 0 0 ht 1:0: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:1: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:2: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:3: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:4: classid 1:12
-# tcfilter u32 match u8 0 0 ht 1:5: classid 1:12
-# tcfilter u32 match u8 0 0 ht 1:6: classid 1:12
-# tcfilter u32 match u8 0 0 ht 1:7: classid 1:12
-# tcfilter u32 match u8 0 0 ht 1:8: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:9: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:a: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:b: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:c: classid 1:10
-# tcfilter u32 match u8 0 0 ht 1:d: classid 1:10
-# tcfilter u32 match u8 0 0 ht 1:e: classid 1:10
-# tcfilter u32 match u8 0 0 ht 1:f: classid 1:10
-\end{Verbatim}
-The parameter \texttt{ht} denotes the hash table and bucket the filter should be added
-to. Since the first TOS bit is ignored, it's value has to be divided by two in
-order to get to the bucket it maps to. E.g. a TOS value of 0x10 will therefore
-map to bucket 0x8.  For the sake of completeness, all possible values are mapped
-and therefore a configurable default class is not required. Note that the used
-match expression is not necessary, but mandatory. Therefore anything that
-matches any packet will suffice. Finally, a filter which links to the defined
-hash table is needed:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: prio 1 protocol ip u32 \
-        link 1: hashkey mask 0x001e0000 match u8 0 0
-\end{Verbatim}
-Here again, the actual match statement is not necessary, but syntactically
-required. All the magic lies within the \texttt{hashkey} parameter, which defines which
-part of the packet should be used directly as hash key. Here's a drawing of the
-first four bytes of the IPv4 header, with the area selected by \texttt{hashkey mask}
-highlighted:
-\begin{figure}[H]
-\begin{Verbatim}
- 0                1                2                3
- .-----------------------------------------------------------------.
- |        |       | ########  |    |                               |
- | Version|  IHL  | #DSCP###  | ECN|  Total Length                 |
- |        |       | ########  |    |                               |
- '-----------------------------------------------------------------'
-\end{Verbatim}
-\end{figure}
-\noindent
-This may look confusing at first, but keep in mind that bit- as well as
-byte-ordering here is LSB while the mask value is written in MSB we humans use.
-Therefore reading the mask is done like so, starting from left:
-\begin{enumerate}
-\item Skip the first byte (which contains Version and IHL fields).
-\item Skip the lowest bit of the second byte (0x1e is even).
-\item Mark the four following bits (0x1e is 11110 in binary).
-\item Skip the remaining three bits of the second byte as well as the remaining two
-   bytes.
-\end{enumerate}
-Before doing the lookup, the kernel right-shifts the masked value by the amount
-of zero-bits in \texttt{mask}, which implicitly also does the division by two which the
-hash table depends on. With this setup, every packet has to pass exactly two
-filters to be classified. Note that this filter is limited to IPv4 packets: Due
-to the related Traffic Class field being at a different offset in the packet, it
-would not work for IPv6. To use the same setup for IPv6 as well, a second
-entry-level filter is necessary:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: prio 2 protocol ipv6 u32 \
-        link 1: hashkey mask 0x01e00000 match u8 0 0
-\end{Verbatim}
-For illustration purposes, here again is a drawing of the first four bytes of
-the IPv6 header, again with masked area highlighted:
-\begin{figure}[H]
-\begin{Verbatim}
- 0                1                2                3
- .-----------------------------------------------------------------.
- |        | ########      |                                        |
- | Version| #Traffic Class|   Flow Label                           |
- |        | ########      |                                        |
- '-----------------------------------------------------------------'
-\end{Verbatim}
-\end{figure}
-\noindent
-Reading the mask value is analogous to IPv4 with the added complexity that
-Traffic Class spans over two bytes. Yet, for comparison there's a simple trick:
-IPv6 has the interesting field shifted by four bits to the left, and the new
-mask's value is shifted by the same amount. For further information about
-\filter{u32} and what can be done with it, consult it's man page
-\man{tc-u32(8)}.
-
-Of course, the kernel provides many more filters than just \filter{basic},
-\filter{flow} and \filter{u32} which have been presented above. As of now, the
-remaining ones are:
-\begin{description}
-\item[bpf]
-        Filtering using Berkeley Packet Filter programs. The program's return
-        code determines the packet's destination class ID.
-
-\item[cgroup]
-        Filter packets based on control groups. This is only useful for packets
-        originating from the local host, as control groups only exist in that
-        scope.
-
-\item[flower]
-        An extended variant of the flow filter.
-
-\item[fw]
-        Matches on firewall mark values previously assigned to the packet by
-        netfilter (or a filter action, see below for details). This allows to
-        export the classification algorithm into netfilter, which is very
-        convenient if appropriate rules exist on the same system in there
-        already.
-
-\item[route]
-        Filter packets based on matching routing table entry. Basically
-        equivalent to the \texttt{fw} filter above, to make use of an already existing
-        extensive routing table setup.
-
-\item[rsvp, rsvp6]
-        Implementation of the Resource Reservation Protocol in Linux, to react
-        upon requests sent by an RSVP daemon.
-
-\item[tcindex]
-        Match packets based on tcindex value, which is usually set by the dsmark
-        qdisc. This is part of an approach to support Differentiated Services in
-        Linux, which is another topic on it's own.
-\end{description}
-
-
-\section*{Filter Actions}
-
-The tc filter framework provides the infrastructure to another extensible set of
-tools as well, namely tc actions. As the name suggests, they allow to do things
-with packets (or associated data). (The list of) Actions are part of a given
-filter. If it matches, each action it contains is executed in order before
-returning the classification result. Since the action has direct access to the
-latter, it is in theory possible for an action to react upon or even change the
-filtering result - as long as the packet matched, of course. Yet none of the
-currently in-tree actions make use of this.
-
-The Generic Actions framework originally evolved out of the filters' ability to
-police traffic to a given maximum bandwidth. One common use case for that is to
-limit ingress traffic, dropping packets which exceed the threshold. A classic
-setup example is like so:
-\begin{Verbatim}
-# tc qdisc add dev eth0 handle ffff: ingress
-# tc filter add dev eth0 parent ffff: u32 \
-        match u32 0 0
-        police rate 1mbit burst 100k
-\end{Verbatim}
-The ingress qdisc is not a real one, but merely a point of reference for filters
-to attach to which should get applied to incoming traffic. The \filter{u32} filter added
-above matches on any packet and therefore limits the total incoming bandwidth to
-1mbit/s, allowing bursts of up to 100kbytes. Using the new syntax, the filter
-command changes slightly:
-\begin{Verbatim}
-# tc filter add dev eth0 parent ffff: u32 \
-        match u32 0 0 \
-        action police rate 1mbit burst 100k
-\end{Verbatim}
-The important detail is that this syntax allows to define multiple actions.
-E.g. for testing purposes, it is possible to redirect exceeding traffic to the
-loopback interface instead of dropping it:
-\begin{Verbatim}
-# tc filter add dev eth0 parent ffff: u32 \
-        match u32 0 0 \
-        action police rate 1mbit burst 100k conform-exceed pipe \
-        action mirred egress redirect dev lo
-\end{Verbatim}
-The added parameter \texttt{conform-exceed pipe} tells the police action to allow for
-further actions to handle the exceeding packet.
-
-Apart from \texttt{police} and \texttt{mirred} actions, there are a few more. Here's a full
-list of the currently implemented ones:
-\begin{description}
-\item[bpf]
-        Apply a Berkeley Packet Filter program to the packet.
-
-\item[connmark]
-        Set the packet's firewall mark to that of it's connection. This works by
-        searching the conntrack table for a matching entry. If found, the mark
-        is restored.
-
-\item[csum]
-        Trigger recalculation of packet checksums. The supported protocols are:
-        IPv4, ICMP, IGMP, TCP, UDP and UDPLite.
-
-\item[ipt]
-        Pass the packet to an iptables target. This allows to use iptables
-        extensions directly instead of having to go the extra mile via setting
-        an arbitrary firewall mark and matching on that from within netfilter.
-
-\item[mirred]
-        Mirror or redirect packets. This is often combined with the ifb pseudo
-        device to share a common QoS setup between multiple interfaces or even
-        ingress traffic.
-
-\item[nat]
-        Perform stateless Native Address Translation. This is certainly not
-        complete and therefore inferior to NAT using iptables: Although the
-        kernel module decides between TCP, UDP and ICMP traffic, it does not
-        handle typical problematic protocols such as active FTP or SIP.
-
-\item[pedit]
-        Generic packet editing. This allows to alter arbitrary bytes of the
-        packet, either by specifying an offset into the packet or by naming a
-        packet header and field name to change. Currently, the latter is
-        implemented only for IPv4 yet.
-
-\item[police]
-        Apply a bandwidth rate limiting policy. Packets exceeding it are dropped
-        by default, but may optionally be handled differently.
-
-\item[simple]
-        This is rather an example than real action. All it does is print a
-        user-defined string together with a packet counter. Useful maybe for
-        debugging when filter statistics are not available or too complicated.
-
-\item[skbedit]
-        Edit associated packet data, supports changing queue mapping, priority
-        field and firewall mark value.
-
-\item[vlan]
-        Add/remove a VLAN header to/from the packet. This might serve as
-        alternative to using 802.1Q pseudo-interfaces in combination with
-        routing rules when e.g. packets for a given destination need to be
-        encapsulated.
-\end{description}
-
-
-\section*{Intermediate Functional Block}
-
-The Intermediate Functional Block (\texttt{ifb}) pseudo network interface acts as a QoS
-concentrator for multiple different sources of traffic. Packets from or to other
-interfaces have to be redirected to it using the \texttt{mirred} action in order to be
-handled, regularly routed traffic will be dropped. This way, a single stack of
-qdiscs, classes and filters can be shared between multiple interfaces.
-
-Here's a simple example to feed incoming traffic from multiple interfaces
-through a Stochastic Fairness Queue (\qdisc{sfq}):
-\begin{Verbatim}
-(1) # modprobe ifb
-(2) # ip link set ifb0 up
-(3) # tc qdisc add dev ifb0 root sfq
-\end{Verbatim}
-The first step is to load the \texttt{ifb} kernel module (1). By default, this will
-create two ifb devices: \iface{ifb0} and \iface{ifb1}. After setting
-\iface{ifb0} up in (2), the root
-qdisc is replaced by \qdisc{sfq} in (3). Finally, one can start redirecting ingress
-traffic to \iface{ifb0}, e.g. from \iface{eth0}:
-\begin{Verbatim}
-# tc qdisc add dev eth0 handle ffff: ingress
-# tc filter add dev eth0 parent ffff: u32 \
-        match u32 0 0 \
-        action mirred egress redirect dev ifb0
-\end{Verbatim}
-The same can be done for other interfaces, just replacing \iface{eth0} in the two
-commands above. One thing to keep in mind here is the asymmetrical routing this
-creates within the host doing the QoS: Incoming packets enter the system via
-\iface{ifb0}, while corresponding replies leave directly via \iface{eth0}. This can be observed
-using \cmd{tcpdump} on \iface{ifb0}, which shows the input part of the traffic only. What's
-more confusing is that \cmd{tcpdump} on \iface{eth0} shows both incoming and outgoing traffic,
-but the redirection is still effective - a simple prove is setting
-\iface{ifb0} down,
-which will interrupt the communication. Obviously \cmd{tcpdump} catches the packets to
-dump before they enter the ingress qdisc, which is why it sees them while the
-kernel itself doesn't.
-
-
-\section*{Conclusion}
-
-Once the steep learning curve has been mastered, the conglomerate of (classful)
-qdiscs, filters and actions provides a highly sophisticated and flexible
-infrastructure to perform QoS, which plays nicely along with routing and
-firewalling setups.
-
-
-\section*{Further Reading}
-
-A good starting point for novice users and experienced ones diving into unknown
-areas is the extensive HOWTO at \url{http://lartc.org}. The iproute2 package ships
-some examples (usually in /usr/share/doc/, depending on distribution) as well as
-man pages for \cmd{tc} in general, qdiscs and filters. The latter have been added
-just recently though, so if your distribution does not ship iproute2 version
-4.3.0 yet, these are not in there. Apart from that, the internet is a spring of
-HOWTOs and scripts people wrote - though these should be taken with a grain of
-salt: The complexity of the matter often leads to copying others' solutions
-without much validation, which allows for less optimal or even obsolete
-implementations to survive much longer than desired.
-
-\end{document}
--- a/etc/iproute2/ematch_map
+++ b/etc/iproute2/ematch_map
@ -5,3 +5,4 @@
 4	meta
 7	canid
 8	ipset
+9	ipt
--- a/etc/iproute2/rt_protos
+++ b/etc/iproute2/rt_protos
@ -14,18 +14,12 @@
 13	dnrouted
 14	xorp
 15	ntk
-16      dhcp
+16	dhcp
+18	keepalived
 42	babel
-
-#
-#	Used by me for gated
-#
-254	gated/aggr
-253	gated/bgp
-252	gated/ospf
-251	gated/ospfase
-250	gated/rip
-249	gated/static
-248	gated/conn
-247	gated/inet
-246	gated/default
+99	openr
+186	bgp
+187	isis
+188	ospf
+189	rip
+192	eigrp
--- a/examples/README.cbq
+++ b/examples/README.cbq
@ -1,122 +0,0 @@
-# CHANGES
-# -------
-# v0.3a2- fixed bug in "if" operator. Thanks kad@dgtu.donetsk.ua.
-# v0.3a-  added TIME parameter. Example:
-#         TIME=00:00-19:00;64Kbit/6Kbit
-#         So, between 00:00 and 19:00 RATE will be 64Kbit.
-#         Just start "cbq.init timecheck" periodically from cron (every 10
-#         minutes for example).
-#         !!! Anyway you MUST start "cbq.init start" for CBQ initialize.
-# v0.2 -  Some cosmetique changes. Now it more compatible with
-#         old bash version. Thanks to Stanislav V. Voronyi
-#         <stas@cnti.uanet.kharkov.ua>.
-# v0.1 -  First public release
-# 
-# README
-# ------
-# 
-# First of all - this is just a SIMPLE EXAMPLE of CBQ power.
-# Don't ask me "why" and "how" :)
-# 
-# This is an example of using CBQ (Class Based Queueing) and policy-based
-# filter for building smart ethernet shapers. All CBQ parameters are
-# correct only for ETHERNET (eth0,1,2..) linux interfaces. It works for
-# ARCNET too (just set bandwidth parameter to 2Mbit). It was tested
-# on 2.1.125-2.1.129 linux kernels (KSI linux, Nostromo version) and 
-# ip-route utility by A.Kuznetsov (iproute2-ss981101 version). 
-# You can download ip-route from ftp://ftp.inr.ac.ru/ip-routing or
-# get iproute2*.rpm (compiled with glibc) from ftp.ksi-linux.com.
-# 
-# 
-# HOW IT WORKS
-# 
-# Each shaper must be described by config file in $CBQ_PATH
-# (/etc/sysconfig/cbq/) directory - one config file for each CBQ shaper.
-# 
-# Some words about config file name:
-# Each shaper has its personal ID - two byte HEX number. Really ID is 
-# CBQ class.
-# So, filename looks like:
-# 
-# cbq-1280.My_first_shaper
-# ^^^ ^^^  ^^^^^^^^^^^^^
-#  |  |            |______ Shaper name - any word
-#  |  |___________________ ID (0000-FFFF), let ID looks like shaper's rate
-#  |______________________ Filename must begin from "cbq-" 
-# 
-# 
-# Config file describes shaper parameters and source[destination] 
-# address[port].
-# For example let's prepare /etc/sysconfig/cbq/cbq-1280.My_first_shaper:
-# 
-# ----------8<---------------------
-# DEVICE=eth0,10Mbit,1Mbit
-# RATE=128Kbit
-# WEIGHT=10Kbit
-# PRIO=5
-# RULE=192.168.1.0/24
-# ----------8<---------------------
-# 
-# This is minimal configuration, where:
-# DEVICE:  eth0   - device where we do control our traffic
-#          10Mbit - REAL ethernet card bandwidth
-#          1Mbit  - "weight" of :1 class (parent for all shapers for eth0),
-#                   as a rule of thumb weight=batdwidth/10.
-#          100Mbit adapter's example: DEVICE=eth0,100Mbit,10Mbit
-#          *** If you want to build more than one shaper per device it's
-#              enough to describe bandwidth and weight once  - cbq.init
-#              is smart :) You can put only 'DEVICE=eth0' into cbq-* 
-#              config file for eth0.
-# 
-# RATE:    Shaper's speed - Kbit,Mbit or bps (bytes per second)
-# 
-# WEIGHT:  "weight" of shaper (CBQ class). Like for DEVICE - approx. RATE/10
-# 
-# PRIO:    shaper's priority from 1 to 8 where 1 is the highest one.
-#          I do always use "5" for all my shapers.
-# 
-# RULE:    [source addr][:source port],[dest addr][:dest port]
-#          Some examples:
-# RULE=10.1.1.0/24:80         - all traffic for network 10.1.1.0 to port 80
-#                               will be shaped.
-# RULE=10.2.2.5               - shaper works only for IP address 10.2.2.5   
-# RULE=:25,10.2.2.128/25:5000 - all traffic from any address and port 25 to
-#                               address 10.2.2.128 - 10.2.2.255 and port 5000
-#                               will be shaped.
-# RULE=10.5.5.5:80,           - shaper active only for traffic from port 80 of
-#                               address 10.5.5.5
-# Multiple RULE fields per one config file are allowed. For example:
-# RULE=10.1.1.2:80
-# RULE=10.1.1.2:25
-# RULE=10.1.1.2:110
-# 
-# *** ATTENTION!!!
-# All shapers do work only for outgoing traffic!
-# So, if you want to build bidirectional shaper you must set it up for
-# both ethernet card. For example let's build shaper for our linux box like:
-# 
-#                     ---------             192.168.1.1
-# BACKBONE -----eth0-|  linux  |-eth1------*[our client]
-#                     ---------
-# 
-# Let all traffic from backbone to client will be shaped at 28Kbit and
-# traffic from client to backbone - at 128Kbit. We need two config files:
-# 
-# ---8<-----/etc/sysconfig/cbq/cbq-28.client-out----
-# DEVICE=eth1,10Mbit,1Mbit
-# RATE=28Kbit
-# WEIGHT=2Kbit
-# PRIO=5
-# RULE=192.168.1.1
-# ---8<---------------------------------------------
-# 
-# ---8<-----/etc/sysconfig/cbq/cbq-128.client-in----
-# DEVICE=eth0,10Mbit,1Mbit
-# RATE=128Kbit
-# WEIGHT=10Kbit
-# PRIO=5
-# RULE=192.168.1.1,
-# ---8<---------------------------------------------
-#                 ^pay attention to "," - this is source address!
-# 
-# Enjoy.
--- a/examples/SYN-DoS.rate.limit
+++ b/examples/SYN-DoS.rate.limit
@ -1,49 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# this script shows how one can rate limit incoming SYNs
-# Useful for TCP-SYN attack protection. You can use
-# IPchains to have more powerful additions to the SYN (eg 
-# in addition the subnet)
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-#
-# tag all incoming SYN packets through $INDEV as mark value 1
-############################################################ 
-$IPCHAINS -A input -i $INDEV -y -m 1
-############################################################ 
-#
-# install the ingress qdisc on the ingress interface
-############################################################ 
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-
-#
-# 
-# SYN packets are 40 bytes (320 bits) so three SYNs equals
-# 960 bits (approximately 1kbit); so we rate limit below
-# the incoming SYNs to 3/sec (not very sueful really; but
-#serves to show the point - JHS
-############################################################ 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 1 fw \
-police rate 1kbit burst 40 mtu 9k drop flowid :1
-############################################################ 
-
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/bpf/README
+++ b/examples/bpf/README
@ -1,8 +1,18 @@
 eBPF toy code examples (running in kernel) to familiarize yourself
 with syntax and features:

- - bpf_shared.c		-> Ingress/egress map sharing example
- - bpf_tailcall.c	-> Using tail call chains
- - bpf_cyclic.c		-> Simple cycle as tail calls
+- BTF defined map examples
 - bpf_graft.c		-> Demo on altering runtime behaviour
- - bpf_map_in_map.c     -> Using map in map example
+ - bpf_shared.c 	-> Ingress/egress map sharing example
+ - bpf_map_in_map.c	-> Using map in map example
+
+- legacy struct bpf_elf_map defined map examples
+ - legacy/bpf_shared.c		-> Ingress/egress map sharing example
+ - legacy/bpf_tailcall.c	-> Using tail call chains
+ - legacy/bpf_cyclic.c		-> Simple cycle as tail calls
+ - legacy/bpf_graft.c		-> Demo on altering runtime behaviour
+ - legacy/bpf_map_in_map.c	-> Using map in map example
+
+Note: Users should use new BTF way to defined the maps, the examples
+in legacy folder which is using struct bpf_elf_map defined maps is not
+recommanded.
--- a/examples/bpf/bpf_graft.c
+++ b/examples/bpf/bpf_graft.c
@ -33,13 +33,13 @@
 *   [...]
 */

-struct bpf_elf_map __section_maps jmp_tc = {
-	.type		= BPF_MAP_TYPE_PROG_ARRAY,
-	.size_key	= sizeof(uint32_t),
-	.size_value	= sizeof(uint32_t),
-	.pinning	= PIN_GLOBAL_NS,
-	.max_elem	= 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint32_t));
+	__uint(max_entries, 1);
+	__uint(pinning, LIBBPF_PIN_BY_NAME);
+} jmp_tc __section(".maps");

 __section("aaa")
 int cls_aaa(struct __sk_buff *skb)
--- a/examples/bpf/bpf_map_in_map.c
+++ b/examples/bpf/bpf_map_in_map.c
@ -1,24 +1,23 @@
 #include "../../include/bpf_api.h"

-#define MAP_INNER_ID	42
+struct inner_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint32_t));
+	__uint(max_entries, 1);
+} map_inner __section(".maps");

-struct bpf_elf_map __section_maps map_inner = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.size_key	= sizeof(uint32_t),
-	.size_value	= sizeof(uint32_t),
-	.id		= MAP_INNER_ID,
-	.inner_idx	= 0,
-	.pinning	= PIN_GLOBAL_NS,
-	.max_elem	= 1,
-};
-
-struct bpf_elf_map __section_maps map_outer = {
-	.type		= BPF_MAP_TYPE_ARRAY_OF_MAPS,
-	.size_key	= sizeof(uint32_t),
-	.size_value	= sizeof(uint32_t),
-	.inner_id	= MAP_INNER_ID,
-	.pinning	= PIN_GLOBAL_NS,
-	.max_elem	= 1,
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint32_t));
+	__uint(max_entries, 1);
+	__uint(pinning, LIBBPF_PIN_BY_NAME);
+	__array(values, struct inner_map);
+} map_outer __section(".maps") = {
+	.values = {
+		[0] = &map_inner,
+	},
 };

 __section("egress")
--- a/examples/bpf/bpf_shared.c
+++ b/examples/bpf/bpf_shared.c
@ -18,13 +18,13 @@
 * instance is being created.
 */

-struct bpf_elf_map __section_maps map_sh = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.size_key	= sizeof(uint32_t),
-	.size_value	= sizeof(uint32_t),
-	.pinning	= PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */
-	.max_elem	= 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint32_t));
+	__uint(max_entries, 1);
+	__uint(pinning, LIBBPF_PIN_BY_NAME);	/* or LIBBPF_PIN_NONE */
+} map_sh __section(".maps");

 __section("egress")
 int emain(struct __sk_buff *skb)
--- a/examples/bpf/legacy/bpf_cyclic.c
+++ b/examples/bpf/legacy/bpf_cyclic.c
@ -1,4 +1,4 @@
-#include "../../include/bpf_api.h"
+#include "../../../include/bpf_api.h"

 /* Cyclic dependency example to test the kernel's runtime upper
 * bound on loops. Also demonstrates on how to use direct-actions,
--- a/examples/bpf/legacy/bpf_graft.c
+++ b/examples/bpf/legacy/bpf_graft.c
@ -0,0 +1,66 @@
+#include "../../../include/bpf_api.h"
+
+/* This example demonstrates how classifier run-time behaviour
+ * can be altered with tail calls. We start out with an empty
+ * jmp_tc array, then add section aaa to the array slot 0, and
+ * later on atomically replace it with section bbb. Note that
+ * as shown in other examples, the tc loader can prepopulate
+ * tail called sections, here we start out with an empty one
+ * on purpose to show it can also be done this way.
+ *
+ * tc filter add dev foo parent ffff: bpf obj graft.o
+ * tc exec bpf dbg
+ *   [...]
+ *   Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough
+ *   <idle>-0            [001] ..s. 138993.202265: : fallthrough
+ *   Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough
+ *   [...]
+ *
+ * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa
+ * tc exec bpf dbg
+ *   [...]
+ *   Socket Thread-19818 [002] ..s. 139012.053587: : aaa
+ *   <idle>-0            [002] ..s. 139012.172359: : aaa
+ *   Socket Thread-19818 [001] ..s. 139012.173556: : aaa
+ *   [...]
+ *
+ * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb
+ * tc exec bpf dbg
+ *   [...]
+ *   Socket Thread-19818 [002] ..s. 139022.102967: : bbb
+ *   <idle>-0            [002] ..s. 139022.155640: : bbb
+ *   Socket Thread-19818 [001] ..s. 139022.156730: : bbb
+ *   [...]
+ */
+
+struct bpf_elf_map __section_maps jmp_tc = {
+	.type		= BPF_MAP_TYPE_PROG_ARRAY,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+__section("aaa")
+int cls_aaa(struct __sk_buff *skb)
+{
+	printt("aaa\n");
+	return TC_H_MAKE(1, 42);
+}
+
+__section("bbb")
+int cls_bbb(struct __sk_buff *skb)
+{
+	printt("bbb\n");
+	return TC_H_MAKE(1, 43);
+}
+
+__section_cls_entry
+int cls_entry(struct __sk_buff *skb)
+{
+	tail_call(skb, &jmp_tc, 0);
+	printt("fallthrough\n");
+	return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
--- a/examples/bpf/legacy/bpf_map_in_map.c
+++ b/examples/bpf/legacy/bpf_map_in_map.c
@ -0,0 +1,56 @@
+#include "../../../include/bpf_api.h"
+
+#define MAP_INNER_ID	42
+
+struct bpf_elf_map __section_maps map_inner = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.id		= MAP_INNER_ID,
+	.inner_idx	= 0,
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+struct bpf_elf_map __section_maps map_outer = {
+	.type		= BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.inner_id	= MAP_INNER_ID,
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+__section("egress")
+int emain(struct __sk_buff *skb)
+{
+	struct bpf_elf_map *map_inner;
+	int key = 0, *val;
+
+	map_inner = map_lookup_elem(&map_outer, &key);
+	if (map_inner) {
+		val = map_lookup_elem(map_inner, &key);
+		if (val)
+			lock_xadd(val, 1);
+	}
+
+	return BPF_H_DEFAULT;
+}
+
+__section("ingress")
+int imain(struct __sk_buff *skb)
+{
+	struct bpf_elf_map *map_inner;
+	int key = 0, *val;
+
+	map_inner = map_lookup_elem(&map_outer, &key);
+	if (map_inner) {
+		val = map_lookup_elem(map_inner, &key);
+		if (val)
+			printt("map val: %d\n", *val);
+	}
+
+	return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
--- a/examples/bpf/legacy/bpf_shared.c
+++ b/examples/bpf/legacy/bpf_shared.c
@ -0,0 +1,53 @@
+#include "../../../include/bpf_api.h"
+
+/* Minimal, stand-alone toy map pinning example:
+ *
+ * clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c
+ * tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress
+ * tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress
+ *
+ * Both classifier will share the very same map instance in this example,
+ * so map content can be accessed from ingress *and* egress side!
+ *
+ * This example has a pinning of PIN_OBJECT_NS, so it's private and
+ * thus shared among various program sections within the object.
+ *
+ * A setting of PIN_GLOBAL_NS would place it into a global namespace,
+ * so that it can be shared among different object files. A setting
+ * of PIN_NONE (= 0) means no sharing, so each tc invocation a new map
+ * instance is being created.
+ */
+
+struct bpf_elf_map __section_maps map_sh = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.pinning	= PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */
+	.max_elem	= 1,
+};
+
+__section("egress")
+int emain(struct __sk_buff *skb)
+{
+	int key = 0, *val;
+
+	val = map_lookup_elem(&map_sh, &key);
+	if (val)
+		lock_xadd(val, 1);
+
+	return BPF_H_DEFAULT;
+}
+
+__section("ingress")
+int imain(struct __sk_buff *skb)
+{
+	int key = 0, *val;
+
+	val = map_lookup_elem(&map_sh, &key);
+	if (val)
+		printt("map val: %d\n", *val);
+
+	return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
--- a/examples/bpf/legacy/bpf_tailcall.c
+++ b/examples/bpf/legacy/bpf_tailcall.c
@ -1,4 +1,5 @@
-#include "../../include/bpf_api.h"
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../../../include/bpf_api.h"

 #define ENTRY_INIT	3
 #define ENTRY_0		0
--- a/examples/cbq.init-v0.7.3
+++ b/examples/cbq.init-v0.7.3
@ -1,983 +0,0 @@
-#!/bin/bash
-#
-#    cbq.init v0.7.3
-#    Copyright (C) 1999  Pavel Golubev <pg@ksi-linux.com>
-#    Copyright (C) 2001-2004  Lubomir Bulej <pallas@kadan.cz>
-#
-#    chkconfig:   2345 11 89
-#    description: sets up CBQ-based traffic control
-#
-#    This program is free software; you can redistribute it and/or modify
-#    it under the terms of the GNU General Public License as published by
-#    the Free Software Foundation; either version 2 of the License, or
-#    (at your option) any later version.
-#
-#    This program is distributed in the hope that it will be useful,
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#    GNU General Public License for more details.
-#
-#    You should have received a copy of the GNU General Public License
-#    along with this program; if not, see <http://www.gnu.org/licenses/>.
-#
-#    To get the latest version, check on Freshmeat for actual location:
-#
-#		http://freshmeat.net/projects/cbq.init
-#
-#
-# VERSION HISTORY
-# ---------------
-# v0.7.3- Deepak Singhal <singhal at users.sourceforge.net>
-#	  - fix timecheck to not ignore regular TIME rules after
-#	    encountering a TIME rule that spans over midnight
-#	- Nathan Shafer <nicodemus at users.sourceforge.net>
-#	  - allow symlinks to class files
-#	- Seth J. Blank <antifreeze at users.sourceforge.net>
-#	  - replace hardcoded ip/tc location with variables
-#	- Mark Davis <mark.davis at gmx.de>
-#	  - allow setting of PRIO_{MARK,RULE,REALM} in class file
-#	- Fernando Sanch <toptnc at users.sourceforge.net>
-#	  - allow underscores in interface names
-# v0.7.2- Paulo Sedrez
-#	  - fix time2abs to allow hours with leading zero in TIME rules
-#	- Svetlin Simeonov <zvero at yahoo.com>
-#	  - fix cbq_device_list to allow VLAN interfaces
-#	- Mark Davis <mark.davis at gmx.de>
-#	  - ignore *~ backup files when looking for classes
-#	- Mike Boyer <boyer at administrative.com>
-#	  - fix to allow arguments to be passed to "restart" command
-# v0.7.1- Lubomir Bulej <pallas at kadan.cz>
-#	  - default value for PERTURB
-#	  - fixed small bug in RULE parser to correctly parse rules with
-#	    identical source and destination fields
-#	  - faster initial scanning of DEVICE fields
-# v0.7	- Lubomir Bulej <pallas at kadan.cz>
-#	  - lots of various cleanups and reorganizations; the parsing is now
-#	    some 40% faster, but the class ID must be in range 0x0002-0xffff
-#	    (again). Because of the number of internal changes and the above
-#	    class ID restriction, I bumped the version to 0.7 to indicate
-#	    something might have got broken :)
-#	  - changed PRIO_{U32,FW,ROUTE} to PRIO_{RULE,MARK,REALM}
-#	    for consistency with filter keywords
-#	  - exposed "compile" command
-#	- Catalin Petrescu <taz at dntis.ro>
-#	  - support for port masks in RULE (u32) filter
-#	- Jordan Vrtanoski <obeliks at mt.net.mk>
-#	  - support for week days in TIME rules
-# v0.6.4- Lubomir Bulej <pallas at kadan.cz>
-#	  - added PRIO_* variables to allow easy control of filter priorities
-#	  - added caching to speed up CBQ start, the cache is invalidated
-#	    whenever any of the configuration files changes
-#	  - updated the readme section + some cosmetic fixes
-# v0.6.3- Lubomir Bulej <pallas at kadan.cz>
-#	  - removed setup of (unnecessary) class 1:1 - all classes
-#	    now use qdisc's default class 1:0 as their parent
-#	  - minor fix in the timecheck branch - classes
-#	    without leaf qdisc were not updated
-#	  - minor fix to avoid timecheck failure when run
-#	    at time with minutes equal to 08 or 09
-#	  - respect CBQ_PATH setting in environment
-#	  - made PRIO=5 default, rendering it optional in configs
-#	  - added support for route filter, see notes about REALM keyword
-#	  - added support for fw filter, see notes about MARK keyword
-#	  - added filter display to "list" and "stats" commands
-#	  - readme section update + various cosmetic fixes
-# v0.6.2- Catalin Petrescu <taz at dntis.ro>
-#	  - added tunnels interface handling
-# v0.6.1- Pavel Golubev <pg at ksi-linux.com>
-#	  - added sch_prio module loading
-#	    (thanks johan at iglo.virtual.or.id for reminding)
-#	  - resolved errors resulting from stricter syntax checking in bash2
-#	- Lubomir Bulej <pallas at kadan.cz>
-#	  - various cosmetic fixes
-# v0.6	- Lubomir Bulej <pallas at kadan.cz>
-#	  - attempt to limit number of spawned processes by utilizing
-#	    more of sed power (use sed instead of grep+cut)
-#	  - simplified TIME parser, using bash builtins
-#	  - added initial support for SFQ as leaf qdisc
-#	  - reworked the documentation part a little
-#	  - incorporated pending patches and ideas submitted by
-#	    following people for versions 0.3 into version 0.6
-#	- Miguel Freitas <miguel at cetuc.puc-rio.br>
-#	  - in case of overlapping TIME parameters, the last match is taken
-#	- Juanjo Ciarlante <jjo at mendoza.gov.ar>
-#	  - chkconfig tags, list + stats startup parameters
-#	  - optional tc & ip command logging (into /var/run/cbq-*)
-#	- Rafal Maszkowski <rzm at icm.edu.pl>
-#	  - PEAK parameter for setting TBF's burst peak rate
-#	  - fix for many config files (use find instead of ls)
-# v0.5.1- Lubomir Bulej <pallas at kadan.cz>
-#	  - fixed little but serious bug in RULE parser
-# v0.5	- Lubomir Bulej <pallas at kadan.cz>
-#	  - added options PARENT, LEAF, ISOLATED and BOUNDED. This allows
-#	    (with some attention to config file ordering) for creating
-#	    hierarchical structures of shapers with classes able (or unable)
-#	    to borrow bandwidth from their parents.
-#	  - class ID check allows hexadecimal numbers
-#	  - rewritten & simplified RULE parser
-#	  - cosmetic changes to improve readability
-#	  - reorganization to avoid duplicate code (timecheck etc.)
-#	  - timecheck doesn't check classes without TIME fields anymore
-# v0.4  - Lubomir Bulej <pallas at kadan.cz>
-#	  - small bugfix in RULE parsing code
-#	  - simplified configuration parsing code
-#	  - several small cosmetic changes
-#	  - TIME parameter can be now specified more than once allowing you to
-#	    differentiate RATE throughout the whole day. Time overlapping is
-#	    not checked, first match is taken. Midnight wrap (eg. 20:00-6:00)
-#	    is allowed and taken care of.
-# v0.3a4- fixed small bug in IF operator. Thanks to
-#	  Rafal Maszkowski <rzm at icm.edu.pl>
-# v0.3a3- fixed grep bug when using more than 10 eth devices. Thanks to David
-#	  Trcka <trcka at poda.cz>.
-# v0.3a2- fixed bug in "if" operator. Thanks kad at dgtu.donetsk.ua.
-# v0.3a - added TIME parameter. Example: TIME=00:00-19:00;64Kbit/6Kbit
-#	  So, between 00:00 and 19:00 the RATE will be 64Kbit.
-#	  Just start "cbq.init timecheck" periodically from cron
-#	  (every 10 minutes for example). DON'T FORGET though, to run
-#	  "cbq.init start" for CBQ to initialize.
-# v0.2  - Some cosmetic changes. Now it is more compatible with old bash
-#	  version. Thanks to Stanislav V. Voronyi <stas at cnti.uanet.kharkov.ua>.
-# v0.1  - First public release
-#
-#
-# README
-# ------
-#
-# First of all - this is just a SIMPLE EXAMPLE of CBQ power.
-# Don't ask me "why" and "how" :)
-#
-# This script is meant to simplify setup and management of relatively simple
-# CBQ-based traffic control on Linux. Access to advanced networking features
-# of Linux kernel is provided by "ip" and "tc" utilities from A. Kuznetsov's
-# iproute2 package, available at ftp://ftp.inr.ac.ru/ip-routing. Because the
-# utilities serve primarily to translate user wishes to RTNETLINK commands,
-# their interface is rather spartan, intolerant and requires quite a lot of
-# typing. And typing is what this script attempts to reduce :)
-#
-# The advanced networking stuff in Linux is pretty flexible and this script
-# aims to bring some of its features to the not-so-hard-core Linux users. Of
-# course, there is a tradeoff between simplicity and flexibility and you may
-# realize that the flexibility suffered too much for your needs -- time to
-# face "ip" and "tc" interface.
-#
-# To speed up the "start" command, simple caching was introduced in version
-# 0.6.4. The caching works so that the sequence of "tc" commands for given
-# configuration is stored in a file (/var/cache/cbq.init by default) which
-# is used next time the "start" command is run to avoid repeated parsing of
-# configuration files. This cache is invalidated whenever any of the CBQ
-# configuration files changes. If you want to run "cbq.init start" without
-# caching, run it as "cbq.init start nocache". If you want to force cache
-# invalidation, run it as "cbq.init start invalidate". Caching is disabled
-# if you have logging enabled (ie. CBQ_DEBUG is not empty).
-#
-# If you only want cqb.init to translate your configuration to "tc" commands,
-# use "compile" command which will output "tc" commands required to build
-# your configuration. Bear in mind that "compile" does not check if the "tc"
-# commands were successful - this is done (in certain places) only when the
-# "start nocache" command is used, which is also useful when creating the
-# configuration to check whether it is completely valid.
-#
-# All CBQ parameters are valid for Ethernet interfaces only, The script was
-# tested on various Linux kernel versions from series 2.1 to 2.4 and several
-# distributions with KSI Linux (Nostromo version) as the premier one.
-#
-#
-# HOW DOES IT WORK?
-# -----------------
-#
-# Every traffic class must be described by a file in the $CBQ_PATH directory
-# (/etc/sysconfig/cbq by default) - one file per class.
-#
-# The config file names must obey mandatory format: cbq-<clsid>.<name> where
-# <clsid> is two-byte hexadecimal number in range <0002-FFFF> (which in fact
-# is a CBQ class ID) and <name> is the name of the class -- anything to help
-# you distinguish the configuration files. For small amount of classes it is
-# often possible (and convenient) to let <clsid> resemble bandwidth of the
-# class.
-#
-# Example of valid config name:
-#	cbq-1280.My_first_shaper
-#
-#
-# The configuration file may contain the following parameters:
-#
-### Device parameters
-#
-# DEVICE=<ifname>,<bandwidth>[,<weight>]	mandatory
-# DEVICE=eth0,10Mbit,1Mbit
-#
-#	<ifname> is the name of the interface you want to control
-#		traffic on, e.g. eth0
-#	<bandwidth> is the physical bandwidth of the device, e.g. for
-#		ethernet 10Mbit or 100Mbit, for arcnet 2Mbit
-#	<weight> is tuning parameter that should be proportional to
-#		<bandwidth>. As a rule of thumb: <weight> = <bandwidth> / 10
-#
-# When you have more classes on one interface, it is enough to specify
-# <bandwidth> [and <weight>] only once, therefore in other files you only
-# need to set DEVICE=<ifname>.
-#
-### Class parameters
-#
-# RATE=<speed>					mandatory
-# RATE=5Mbit
-#
-#	Bandwidth allocated to the class. Traffic going through the class is
-#	shaped to conform to specified rate. You can use Kbit, Mbit or bps,
-#	Kbps and Mbps as suffices. If you don't specify any unit, bits/sec
-#	are used. Also note that "bps" means "bytes per second", not bits.
-#
-# WEIGHT=<speed> 				mandatory
-# WEIGHT=500Kbit
-#
-#	Tuning parameter that should be proportional to RATE. As a rule
-#	of thumb, use WEIGHT ~= RATE / 10.
-#
-# PRIO=<1-8>					optional, default 5
-# PRIO=5
-#
-#	Priority of class traffic. The higher the number, the lesser
-#	the priority. Priority of 5 is just fine.
-#
-# PARENT=<clsid>				optional, default not set
-# PARENT=1280
-#
-#	Specifies ID of the parent class to which you want this class be
-#	attached. You might want to use LEAF=none for the parent class as
-#	mentioned below. By using this parameter and carefully ordering the
-#	configuration files, it is possible to create simple hierarchical
-#	structures of CBQ classes. The ordering is important so that parent
-#	classes are constructed prior to their children.
-#
-# LEAF=none|tbf|sfq				optional, default "tbf"
-#
-#	Tells the script to attach specified leaf queueing discipline to CBQ
-#	class. By default, TBF is used. Note that attaching TBF to CBQ class
-#	shapes the traffic to conform to TBF parameters and prevents the class
-#	from borrowing bandwidth from its parent even if you have BOUNDED set
-#	to "no". To allow the class to borrow bandwith (provided it is not
-#	bounded), you must set LEAF to "none" or "sfq".
-#
-#	If you want to ensure (approximately) fair sharing of bandwidth among
-#	several hosts in the same class, you might want to specify LEAF=sfq to
-#	attach SFQ as leaf queueing discipline to that class.
-#
-# BOUNDED=yes|no				optional, default "yes"
-#
-#	If set to "yes", the class is not allowed to borrow bandwidth from
-#	its parent class in overlimit situation. If set to "no", the class
-#	will be allowed to borrow bandwidth from its parent.
-#
-# Note:	Don't forget to set LEAF to "none" or "sfq", otherwise the class will
-#	have TBF attached to itself and will not be able to borrow unused
-#	bandwith from its parent.
-#
-# ISOLATED=yes|no				optional, default "no"
-#
-#	If set to "yes", the class will not lend unused bandwidth to
-#	its children.
-#
-### TBF qdisc parameters
-#
-# BUFFER=<bytes>[/<bytes>]			optional, default "10Kb/8"
-#
-#	This parameter controls the depth of the token bucket. In other
-#	words it represents the maximal burst size the class can send.
-#	The optional part of parameter is used to determine the length
-#	of intervals in packet sizes, for which the transmission times
-#	are kept.
-#
-# LIMIT=<bytes>					optional, default "15Kb"
-#
-#	This parameter determines the maximal length of backlog. If
-#	the queue contains more data than specified by LIMIT, the
-#	newly arriving packets are dropped. The length of backlog
-#	determines queue latency in case of congestion.
-#
-# PEAK=<speed>					optional, default not set
-#
-#	Maximal peak rate for short-term burst traffic. This allows you
-#	to control the absolute peak rate the class can send at, because
-#	single TBF that allows 256Kbit/s would of course allow rate of
-#	512Kbit for half a second or 1Mbit for a quarter of second.
-#
-# MTU=<bytes>  					optional, default "1500"
-#
-#	Maximum number of bytes that can be sent at once over the
-#	physical medium. This parameter is required when you specify
-#	PEAK parameter. It defaults to MTU of ethernet - for other
-#	media types you might want to change it.
-#
-# Note: Setting TBF as leaf qdisc will effectively prevent the class from
-#	borrowing bandwidth from the ancestor class, because even if the
-#	class allows more traffic to pass through, it is then shaped to
-#	conform to TBF.
-#
-### SFQ qdisc parameters
-#
-# The SFQ queueing discipline is a cheap way for sharing class bandwidth
-# among several hosts. As it is stochastic, the fairness is approximate but
-# it will do the job in most cases. If you want real fairness, you should
-# probably use WRR (weighted round robin) or WFQ queueing disciplines. Note
-# that SFQ does not do any traffic shaping - the shaping is done by the CBQ
-# class the SFQ is attached to.
-#
-# QUANTUM=<bytes>				optional, default not set
-#
-#	This parameter should not be set lower than link MTU, for ethernet
-#	it is 1500b, or (with MAC header) 1514b which is the value used
-#	in Alexey Kuznetsov's examples.
-#
-# PERTURB=<seconds>				optional, default "10"
-#
-#	Period of hash function perturbation. If unset, hash reconfiguration
-#	will never take place which is what you probably don't want. The
-#	default value of 10 seconds is probably a good one.
-#
-### Filter parameters
-#
-# RULE=[[saddr[/prefix]][:port[/mask]],][daddr[/prefix]][:port[/mask]]
-#
-#	These parameters make up "u32" filter rules that select traffic for
-#	each of the classes. You can use multiple RULE fields per config.
-#
-#	The optional port mask should only be used by advanced users who
-#	understand how the u32 filter works.
-#
-# Some examples:
-#
-#	RULE=10.1.1.0/24:80
-#		selects traffic going to port 80 in network 10.1.1.0
-#
-#	RULE=10.2.2.5
-#		selects traffic going to any port on single host 10.2.2.5
-#
-#	RULE=10.2.2.5:20/0xfffe
-#		selects traffic going to ports 20 and 21 on host 10.2.2.5
-#
-#	RULE=:25,10.2.2.128/26:5000
-#		selects traffic going from anywhere on port 50 to
-#		port 5000 in network 10.2.2.128
-#
-#	RULE=10.5.5.5:80,
-#		selects traffic going from port 80 of single host 10.5.5.5
-#
-#
-#
-# REALM=[srealm,][drealm]
-#
-#	These parameters make up "route" filter rules that classify traffic
-#	according to packet source/destination realms. For information about
-#	realms, see Alexey Kuznetsov's IP Command Reference. This script
-#	does not define any realms, it justs builds "tc filter" commands
-#	for you if you need to classify traffic this way.
-#
-#	Realm is either a decimal number or a string referencing entry in
-#	/etc/iproute2/rt_realms (usually).
-#
-# Some examples:
-#
-#	REALM=russia,internet
-#		selects traffic going from realm "russia" to realm "internet"
-#
-#	REALM=freenet,
-#		selects traffic going from realm "freenet"
-#
-#	REALM=10
-#		selects traffic going to realm 10
-#
-#
-#
-# MARK=<mark>
-#
-#	These parameters make up "fw" filter rules that select traffic for
-#	each of the classes accoring to firewall "mark". Mark is a decimal
-#	number packets are tagged with if firewall rules say so. You can
-#	use multiple MARK fields per config.
-#
-#
-# Note: Rules for different filter types can be combined. Attention must be
-#	paid to the priority of filter rules, which can be set below using
-#	PRIO_{RULE,MARK,REALM} variables.
-#
-### Time ranging parameters
-#
-# TIME=[<dow>,<dow>, ...,<dow>/]<from>-<till>;<rate>/<weight>[/<peak>]
-# TIME=0,1,2,5/18:00-06:00;256Kbit/25Kbit
-# TIME=60123/18:00-06:00;256Kbit/25Kbit
-# TIME=18:00-06:00;256Kbit/25Kbit
-#
-#	This parameter allows you to differentiate the class bandwidth
-#	throughout the day. You can specify multiple TIME parameters, if
-#	the times overlap, last match is taken. The fields <rate>, <weight>
-#	and <peak> correspond to parameters RATE, WEIGHT and PEAK (which
-#	is optional and applies to TBF leaf qdisc only).
-#
-#	You can also specify days of week when the TIME rule applies. <dow>
-#	is numeric, 0 corresponds to sunday, 1 corresponds to monday, etc.
-#
-###
-#
-# Sample configuration file: cbq-1280.My_first_shaper
-#
-# --------------------------------------------------------------------------
-# DEVICE=eth0,10Mbit,1Mbit
-# RATE=128Kbit
-# WEIGHT=10Kbit
-# PRIO=5
-# RULE=192.128.1.0/24
-# --------------------------------------------------------------------------
-#
-# The configuration says that we will control traffic on 10Mbit ethernet
-# device eth0 and the traffic going to network 192.168.1.0 will be
-# processed with priority 5 and shaped to rate of 128Kbit.
-#
-# Note that you can control outgoing traffic only. If you want to control
-# traffic in both directions, you must set up CBQ for both interfaces.
-#
-# Consider the following example:
-#
-#                    +---------+      192.168.1.1
-# BACKBONE -----eth0-|  linux  |-eth1------*-[client]
-#                    +---------+
-#
-# Imagine you want to shape traffic from backbone to the client to 28Kbit
-# and traffic in the opposite direction to 128Kbit. You need to setup CBQ
-# on both eth0 and eth1 interfaces, thus you need two config files:
-#
-# cbq-028.backbone-client
-# --------------------------------------------------------------------------
-# DEVICE=eth1,10Mbit,1Mbit
-# RATE=28Kbit
-# WEIGHT=2Kbit
-# PRIO=5
-# RULE=192.168.1.1
-# --------------------------------------------------------------------------
-#
-# cbq-128.client-backbone
-# --------------------------------------------------------------------------
-# DEVICE=eth0,10Mbit,1Mbit
-# RATE=128Kbit
-# WEIGHT=10Kbit
-# PRIO=5
-# RULE=192.168.1.1,
-# --------------------------------------------------------------------------
-#
-# Pay attention to comma "," in the RULE field - it denotes source address!
-#
-# Enjoy.
-#
-#############################################################################
-
-export LC_ALL=C
-
-### Command locations
-TC=/sbin/tc
-IP=/sbin/ip
-MP=/sbin/modprobe
-
-### Default filter priorities (must be different)
-PRIO_RULE_DEFAULT=${PRIO_RULE:-100}
-PRIO_MARK_DEFAULT=${PRIO_MARK:-200}
-PRIO_REALM_DEFAULT=${PRIO_REALM:-300}
-
-### Default CBQ_PATH & CBQ_CACHE settings
-CBQ_PATH=${CBQ_PATH:-/etc/sysconfig/cbq}
-CBQ_CACHE=${CBQ_CACHE:-/var/cache/cbq.init}
-
-### Uncomment to enable logfile for debugging
-#CBQ_DEBUG="/var/run/cbq-$1"
-
-### Modules to probe for. Uncomment the last CBQ_PROBE
-### line if you have QoS support compiled into kernel
-CBQ_PROBE="sch_cbq sch_tbf sch_sfq sch_prio"
-CBQ_PROBE="$CBQ_PROBE cls_fw cls_u32 cls_route"
-#CBQ_PROBE=""
-
-### Keywords required for qdisc & class configuration
-CBQ_WORDS="DEVICE|RATE|WEIGHT|PRIO|PARENT|LEAF|BOUNDED|ISOLATED"
-CBQ_WORDS="$CBQ_WORDS|PRIO_MARK|PRIO_RULE|PRIO_REALM|BUFFER"
-CBQ_WORDS="$CBQ_WORDS|LIMIT|PEAK|MTU|QUANTUM|PERTURB"
-
-### Source AVPKT if it exists
-[ -r /etc/sysconfig/cbq/avpkt ] && . /etc/sysconfig/cbq/avpkt
-AVPKT=${AVPKT:-3000}
-
-
-#############################################################################
-############################# SUPPORT FUNCTIONS #############################
-#############################################################################
-
-### Get list of network devices
-cbq_device_list () {
-	ip link show| sed -n "/^[0-9]/ \
-		{ s/^[0-9]\+: \([a-z0-9._]\+\)[:@].*/\1/; p; }"
-} # cbq_device_list
-
-
-### Remove root class from device $1
-cbq_device_off () {
-	tc qdisc del dev $1 root 2> /dev/null
-} # cbq_device_off
-
-
-### Remove CBQ from all devices
-cbq_off () {
-	for dev in `cbq_device_list`; do
-		cbq_device_off $dev
-	done
-} # cbq_off
-
-
-### Prefixed message
-cbq_message () {
-	echo -e "**CBQ: $*"
-} # cbq_message
-
-### Failure message
-cbq_failure () {
-	cbq_message "$@"
-	exit 1
-} # cbq_failure
-
-### Failure w/ cbq-off
-cbq_fail_off () {
-	cbq_message "$@"
-	cbq_off
-	exit 1
-} # cbq_fail_off
-
-
-### Convert time to absolute value
-cbq_time2abs () {
-	local min=${1##*:}; min=${min##0}
-	local hrs=${1%%:*}; hrs=${hrs##0}
-	echo $[hrs*60 + min]
-} # cbq_time2abs
-
-
-### Display CBQ setup
-cbq_show () {
-	for dev in `cbq_device_list`; do
-		[ "`tc qdisc show dev $dev| wc -l`" -eq 0 ] && continue
-		echo -e "### $dev: queueing disciplines\n"
-		tc $1 qdisc show dev $dev; echo
-
-		[ "`tc class show dev $dev| wc -l`" -eq 0 ] && continue
-		echo -e "### $dev: traffic classes\n"
-		tc $1 class show dev $dev; echo
-
-		[ "`tc filter show dev $dev| wc -l`" -eq 0 ] && continue
-		echo -e "### $dev: filtering rules\n"
-		tc $1 filter show dev $dev; echo
-	done
-} # cbq_show
-
-
-### Check configuration and load DEVICES, DEVFIELDS and CLASSLIST from $1
-cbq_init () {
-	### Get a list of configured classes
-	CLASSLIST=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \
-		-not -name '*~' -printf "%f\n"| sort`
-	[ -z "$CLASSLIST" ] &&
-		cbq_failure "no configuration files found in $1!"
-
-	### Gather all DEVICE fields from $1/cbq-*
-	DEVFIELDS=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \
-		  -not -name '*~' -print0 | xargs -0 sed -n 's/#.*//; \
-		  s/[[:space:]]//g; /^DEVICE=[^,]*,[^,]*\(,[^,]*\)\?/ \
-		  { s/.*=//; p; }'| sort -u`
-	[ -z "$DEVFIELDS" ] &&
-		cbq_failure "no DEVICE field found in $1/cbq-*!"
-
-	### Check for different DEVICE fields for the same device
-	DEVICES=`echo "$DEVFIELDS"| sed 's/,.*//'| sort -u`
-	[ "`echo "$DEVICES"| wc -l`" -ne "`echo "$DEVFIELDS"| wc -l`" ] &&
-		cbq_failure "different DEVICE fields for single device!\n$DEVFIELDS"
-} # cbq_init
-
-
-### Load class configuration from $1/$2
-cbq_load_class () {
-	CLASS=`echo $2| sed 's/^cbq-0*//; s/^\([0-9a-fA-F]\+\).*/\1/'`
-	CFILE=`sed -n 's/#.*//; s/[[:space:]]//g; /^[[:alnum:]_]\+=[[:alnum:].,:;/*@-_]\+$/ p' $1/$2`
-
-	### Check class number
-	IDVAL=`/usr/bin/printf "%d" 0x$CLASS 2> /dev/null`
-	[ $? -ne 0 -o $IDVAL -lt 2 -o $IDVAL -gt 65535 ] &&
-		cbq_fail_off "class ID of $2 must be in range <0002-FFFF>!"
-
-	### Set defaults & load class
-	RATE=""; WEIGHT=""; PARENT=""; PRIO=5
-	LEAF=tbf; BOUNDED=yes; ISOLATED=no
-	BUFFER=10Kb/8; LIMIT=15Kb; MTU=1500
-	PEAK=""; PERTURB=10; QUANTUM=""
-
-	PRIO_RULE=$PRIO_RULE_DEFAULT
-	PRIO_MARK=$PRIO_MARK_DEFAULT
-	PRIO_REALM=$PRIO_REALM_DEFAULT
-
-	eval "`echo "$CFILE"| grep -E "^($CBQ_WORDS)="`"
-
-	### Require RATE/WEIGHT
-	[ -z "$RATE" -o -z "$WEIGHT" ] &&
-		cbq_fail_off "missing RATE or WEIGHT in $2!"
-
-	### Class device
-	DEVICE=${DEVICE%%,*}
-	[ -z "$DEVICE" ] && cbq_fail_off "missing DEVICE field in $2!"
-
-	BANDWIDTH=`echo "$DEVFIELDS"| sed -n "/^$DEVICE,/ \
-		  { s/[^,]*,\([^,]*\).*/\1/; p; q; }"`
-
-	### Convert to "tc" options
-	PEAK=${PEAK:+peakrate $PEAK}
-	PERTURB=${PERTURB:+perturb $PERTURB}
-	QUANTUM=${QUANTUM:+quantum $QUANTUM}
-
-	[ "$BOUNDED" = "no" ] && BOUNDED="" || BOUNDED="bounded"
-	[ "$ISOLATED" = "yes" ] && ISOLATED="isolated" || ISOLATED=""
-} # cbq_load_class
-
-
-#############################################################################
-#################################### INIT ###################################
-#############################################################################
-
-### Check for presence of ip-route2 in usual place
-[ -x $TC -a -x $IP ] ||
-	cbq_failure "ip-route2 utilities not installed or executable!"
-
-
-### ip/tc wrappers
-if [ "$1" = "compile" ]; then
-	### no module probing
-	CBQ_PROBE=""
-
-	ip () {
-		$IP "$@"
-	} # ip
-
-	### echo-only version of "tc" command
-	tc () {
-		echo "$TC $*"
-	} # tc
-
-elif [ -n "$CBQ_DEBUG" ]; then
-	echo -e "# `date`" > $CBQ_DEBUG
-
-	### Logging version of "ip" command
-	ip () {
-		echo -e "\n# ip $*" >> $CBQ_DEBUG
-		$IP "$@" 2>&1 | tee -a $CBQ_DEBUG
-	} # ip
-
-	### Logging version of "tc" command
-	tc () {
-		echo -e "\n# tc $*" >> $CBQ_DEBUG
-		$TC "$@" 2>&1 | tee -a $CBQ_DEBUG
-	} # tc
-else
-	### Default wrappers
-	
-	ip () {
-		$IP "$@"
-	} # ip
-	
-	tc () {
-		$TC "$@"
-	} # tc
-fi # ip/tc wrappers
-
-
-case "$1" in
-
-#############################################################################
-############################### START/COMPILE ###############################
-#############################################################################
-
-start|compile)
-
-### Probe QoS modules (start only)
-for module in $CBQ_PROBE; do
-	$MP $module || cbq_failure "failed to load module $module"
-done
-
-### If we are in compile/nocache/logging mode, don't bother with cache
-if [ "$1" != "compile" -a "$2" != "nocache" -a -z "$CBQ_DEBUG" ]; then
-	VALID=1
-
-	### validate the cache
-	[ "$2" = "invalidate" -o ! -f $CBQ_CACHE ] && VALID=0
-	if [ $VALID -eq 1 ]; then
-		[ "`find $CBQ_PATH -maxdepth 1 -newer $CBQ_CACHE| \
-		  wc -l`" -gt 0 ] && VALID=0
-	fi
-
-	### compile the config if the cache is invalid
-	if [ $VALID -ne 1 ]; then
-		$0 compile > $CBQ_CACHE ||
-			cbq_fail_off "failed to compile CBQ configuration!"
-	fi
-
-	### run the cached commands
-	exec /bin/sh $CBQ_CACHE 2> /dev/null
-fi
-
-### Load DEVICES, DEVFIELDS and CLASSLIST
-cbq_init $CBQ_PATH
-
-
-### Setup root qdisc on all configured devices
-for dev in $DEVICES; do
-	### Retrieve device bandwidth and, optionally, weight
-	DEVTEMP=`echo "$DEVFIELDS"| sed -n "/^$dev,/ { s/$dev,//; p; q; }"`
-	DEVBWDT=${DEVTEMP%%,*};	DEVWGHT=${DEVTEMP##*,}
-	[ "$DEVBWDT" = "$DEVWGHT" ] && DEVWGHT=""
-
-	### Device bandwidth is required
-	if [ -z "$DEVBWDT" ]; then
-		cbq_message "could not determine bandwidth for device $dev!"
-		cbq_failure "please set up the DEVICE fields properly!"
-	fi
-
-	### Check if the device is there
-	ip link show $dev &> /dev/null ||
-		cbq_fail_off "device $dev not found!"
-
-	### Remove old root qdisc from device
-	cbq_device_off $dev
-
-
-	### Setup root qdisc + class for device
-	tc qdisc add dev $dev root handle 1 cbq \
-	bandwidth $DEVBWDT avpkt $AVPKT cell 8
-
-	### Set weight of the root class if set
-	[ -n "$DEVWGHT" ] &&
-		tc class change dev $dev root cbq weight $DEVWGHT allot 1514
-
-	[ "$1" = "compile" ] && echo
-done # dev
-
-
-### Setup traffic classes
-for classfile in $CLASSLIST; do
-	cbq_load_class $CBQ_PATH $classfile
-
-	### Create the class
-	tc class add dev $DEVICE parent 1:$PARENT classid 1:$CLASS cbq \
-	bandwidth $BANDWIDTH rate $RATE weight $WEIGHT prio $PRIO \
-	allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED ||
-		cbq_fail_off "failed to add class $CLASS with parent $PARENT on $DEVICE!"
-
-	### Create leaf qdisc if set
-	if [ "$LEAF" = "tbf" ]; then
-		tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS tbf \
-		rate $RATE buffer $BUFFER limit $LIMIT mtu $MTU $PEAK
-	elif [ "$LEAF" = "sfq" ]; then
-		tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS sfq \
-		$PERTURB $QUANTUM
-	fi
-
-
-	### Create fw filter for MARK fields
-	for mark in `echo "$CFILE"| sed -n '/^MARK/ { s/.*=//; p; }'`; do
-		### Attach fw filter to root class
-		tc filter add dev $DEVICE parent 1:0 protocol ip \
-		prio $PRIO_MARK handle $mark fw classid 1:$CLASS
-	done ### mark
-
-	### Create route filter for REALM fields
-	for realm in `echo "$CFILE"| sed -n '/^REALM/ { s/.*=//; p; }'`; do
-		### Split realm into source & destination realms
-		SREALM=${realm%%,*}; DREALM=${realm##*,}
-		[ "$SREALM" = "$DREALM" ] && SREALM=""
-
-		### Convert asterisks to empty strings
-		SREALM=${SREALM#\*}; DREALM=${DREALM#\*}
-
-		### Attach route filter to the root class
-		tc filter add dev $DEVICE parent 1:0 protocol ip \
-		prio $PRIO_REALM route ${SREALM:+from $SREALM} \
-		${DREALM:+to $DREALM} classid 1:$CLASS
-	done ### realm
-
-	### Create u32 filter for RULE fields
-	for rule in `echo "$CFILE"| sed -n '/^RULE/ { s/.*=//; p; }'`; do
-		### Split rule into source & destination
-		SRC=${rule%%,*}; DST=${rule##*,}
-		[ "$SRC" = "$rule" ] && SRC=""
-
-
-		### Split destination into address, port & mask fields
-		DADDR=${DST%%:*}; DTEMP=${DST##*:}
-		[ "$DADDR" = "$DST" ] && DTEMP=""
-
-		DPORT=${DTEMP%%/*}; DMASK=${DTEMP##*/}
-		[ "$DPORT" = "$DTEMP" ] && DMASK="0xffff"
-
-
-		### Split up source (if specified)
-		SADDR=""; SPORT=""
-		if [ -n "$SRC" ]; then
-			SADDR=${SRC%%:*}; STEMP=${SRC##*:}
-			[ "$SADDR" = "$SRC" ] && STEMP=""
-
-			SPORT=${STEMP%%/*}; SMASK=${STEMP##*/}
-			[ "$SPORT" = "$STEMP" ] && SMASK="0xffff"
-		fi
-
-
-		### Convert asterisks to empty strings
-		SADDR=${SADDR#\*}; DADDR=${DADDR#\*}
-
-		### Compose u32 filter rules
-		u32_s="${SPORT:+match ip sport $SPORT $SMASK}"
-		u32_s="${SADDR:+match ip src $SADDR} $u32_s"
-		u32_d="${DPORT:+match ip dport $DPORT $DMASK}"
-		u32_d="${DADDR:+match ip dst $DADDR} $u32_d"
-
-		### Uncomment the following if you want to see parsed rules
-		#echo "$rule: $u32_s $u32_d"
-
-		### Attach u32 filter to the appropriate class
-		tc filter add dev $DEVICE parent 1:0 protocol ip \
-		prio $PRIO_RULE u32 $u32_s $u32_d classid 1:$CLASS
-	done ### rule
-
-	[ "$1" = "compile" ] && echo
-done ### classfile
-;;
-
-
-#############################################################################
-################################# TIME CHECK ################################
-#############################################################################
-
-timecheck)
-
-### Get time + weekday
-TIME_TMP=`date +%w/%k:%M`
-TIME_DOW=${TIME_TMP%%/*}
-TIME_NOW=${TIME_TMP##*/}
-
-### Load DEVICES, DEVFIELDS and CLASSLIST
-cbq_init $CBQ_PATH
-
-### Run through all classes
-for classfile in $CLASSLIST; do
-	### Gather all TIME rules from class config
-	TIMESET=`sed -n 's/#.*//; s/[[:space:]]//g; /^TIME/ { s/.*=//; p; }' \
-		$CBQ_PATH/$classfile`
-	[ -z "$TIMESET" ] && continue
-
-	MATCH=0; CHANGE=0
-	for timerule in $TIMESET; do
-		TIME_ABS=`cbq_time2abs $TIME_NOW`
-		
-		### Split TIME rule to pieces
-		TIMESPEC=${timerule%%;*}; PARAMS=${timerule##*;}
-		WEEKDAYS=${TIMESPEC%%/*}; INTERVAL=${TIMESPEC##*/}
-		BEG_TIME=${INTERVAL%%-*}; END_TIME=${INTERVAL##*-}
-
-		### Check the day-of-week (if present)
-		[ "$WEEKDAYS" != "$INTERVAL" -a \
-		  -n "${WEEKDAYS##*$TIME_DOW*}" ] && continue
-
-		### Compute interval boundaries
-		BEG_ABS=`cbq_time2abs $BEG_TIME`
-		END_ABS=`cbq_time2abs $END_TIME`
-
-		### Midnight wrap fixup
-		if [ $BEG_ABS -gt $END_ABS ]; then
-			[ $TIME_ABS -le $END_ABS ] &&
-				TIME_ABS=$[TIME_ABS + 24*60]
-
-			END_ABS=$[END_ABS + 24*60]
-		fi
-
-		### If the time matches, remember params and set MATCH flag
-		if [ $TIME_ABS -ge $BEG_ABS -a $TIME_ABS -lt $END_ABS ]; then
-			TMP_RATE=${PARAMS%%/*}; PARAMS=${PARAMS#*/}
-			TMP_WGHT=${PARAMS%%/*}; TMP_PEAK=${PARAMS##*/}
-
-			[ "$TMP_PEAK" = "$TMP_WGHT" ] && TMP_PEAK=""
-			TMP_PEAK=${TMP_PEAK:+peakrate $TMP_PEAK}
-
-			MATCH=1
-		fi
-	done ### timerule
-
-
-	cbq_load_class $CBQ_PATH $classfile
-
-	### Get current RATE of CBQ class
-	RATE_NOW=`tc class show dev $DEVICE| sed -n \
-		 "/cbq 1:$CLASS / { s/.*rate //; s/ .*//; p; q; }"`
-	[ -z "$RATE_NOW" ] && continue
-
-	### Time interval matched
-	if [ $MATCH -ne 0 ]; then
-
-		### Check if there is any change in class RATE
-		if [ "$RATE_NOW" != "$TMP_RATE" ]; then
-			NEW_RATE="$TMP_RATE"
-			NEW_WGHT="$TMP_WGHT"
-			NEW_PEAK="$TMP_PEAK"
-			CHANGE=1
-		fi
-
-	### Match not found, reset to default RATE if necessary
-	elif [ "$RATE_NOW" != "$RATE" ]; then
-		NEW_WGHT="$WEIGHT"
-		NEW_RATE="$RATE"
-		NEW_PEAK="$PEAK"
-		CHANGE=1
-	fi
-
-	### If there are no changes, go for next class
-	[ $CHANGE -eq 0 ] && continue
-
-	### Replace CBQ class
-	tc class replace dev $DEVICE classid 1:$CLASS cbq \
-	bandwidth $BANDWIDTH rate $NEW_RATE weight $NEW_WGHT prio $PRIO \
-	allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED
-
-	### Replace leaf qdisc (if any)
-	if [ "$LEAF" = "tbf" ]; then
-		tc qdisc replace dev $DEVICE handle $CLASS tbf \
-		rate $NEW_RATE buffer $BUFFER limit $LIMIT mtu $MTU $NEW_PEAK
-	fi
-
-	cbq_message "$TIME_NOW: class $CLASS on $DEVICE changed rate ($RATE_NOW -> $NEW_RATE)"
-done ### class file
-;;
-
-
-#############################################################################
-################################## THE REST #################################
-#############################################################################
-
-stop)
-	cbq_off
-	;;
-
-list)
-	cbq_show
-	;;
-
-stats)
-	cbq_show -s
-	;;
-
-restart)
-	shift
-	$0 stop
-	$0 start "$@"
-	;;
-
-*)
-	echo "Usage: `basename $0` {start|compile|stop|restart|timecheck|list|stats}"
-esac
--- a/examples/cbqinit.eth1
+++ b/examples/cbqinit.eth1
@ -1,76 +0,0 @@
-#! /bin/sh
-
-TC=/home/root/tc
-IP=/home/root/ip
-DEVICE=eth1
-BANDWIDTH="bandwidth 10Mbit"
-
-# Attach CBQ on $DEVICE. It will have handle 1:.
-#   $BANDWIDTH is real $DEVICE bandwidth (10Mbit).
-#   avpkt is average packet size.
-#   mpu is minimal packet size.
-
-$TC qdisc add dev $DEVICE  root  handle 1:  cbq \
-$BANDWIDTH avpkt 1000 mpu 64
-
-# Create root class with classid 1:1. This step is not necessary.
-#   bandwidth is the same as on CBQ itself.
-#   rate == all the bandwidth
-#   allot is MTU + MAC header
-#   maxburst measure allowed class burstiness (please,read S.Floyd and VJ papers)
-#   est 1sec 8sec means, that kernel will evaluate average rate
-#                 on this class with period 1sec and time constant 8sec.
-#                 This rate is viewed with "tc -s class ls dev $DEVICE"
-
-$TC class add dev $DEVICE parent 1:0 classid :1 est 1sec 8sec cbq \
-$BANDWIDTH rate 10Mbit allot 1514 maxburst 50 avpkt 1000
-
-# Bulk.
-#    New parameters are: 
-#    weight, which is set to be proportional to
-#            "rate". It is not necessary, weight=1 will work as well.
-#    defmap and split say that best effort ttraffic, not classfied
-#            by another means will fall to this class.
-
-$TC class add dev $DEVICE parent 1:1 classid :2 est 1sec 8sec cbq \
-$BANDWIDTH rate 4Mbit allot 1514 weight 500Kbit \
-prio 6 maxburst 50 avpkt 1000 split 1:0 defmap ff3d
-
-# OPTIONAL.
-# Attach "sfq" qdisc to this class, quantum is MTU, perturb
-# gives period of hash function perturbation in seconds.
-#
-$TC qdisc add dev $DEVICE parent 1:2 sfq quantum 1514b perturb 15
-
-# Interactive-burst class
-
-$TC class add dev $DEVICE parent 1:1 classid :3 est 2sec 16sec cbq \
-$BANDWIDTH rate 1Mbit allot 1514 weight 100Kbit \
-prio 2 maxburst 100 avpkt 1000 split 1:0 defmap c0
-
-$TC qdisc add dev $DEVICE parent 1:3 sfq quantum 1514b perturb 15
-
-# Background.
-
-$TC class add dev $DEVICE parent 1:1 classid :4 est 1sec 8sec cbq \
-  $BANDWIDTH rate 100Kbit allot 1514 weight 10Mbit \
-  prio 7 maxburst 10 avpkt 1000 split 1:0 defmap 2
-
-$TC qdisc add dev $DEVICE parent 1:4 sfq quantum 1514b perturb 15
-
-# Realtime class for RSVP
-
-$TC class add dev $DEVICE parent 1:1 classid 1:7FFE cbq \
-rate 5Mbit $BANDWIDTH allot 1514b avpkt 1000 \
-maxburst 20
-
-# Reclassified realtime traffic
-#
-# New element: split is not 1:0, but 1:7FFE. It means,
-#     that only real-time packets, which violated policing filters
-#     or exceeded reshaping buffers will fall to it.
-
-$TC class add dev $DEVICE parent 1:7FFE classid 1:7FFF  est 4sec 32sec cbq \
-rate 1Mbit $BANDWIDTH allot 1514b avpkt 1000 weight 10Kbit \
-prio 6 maxburst 10 split 1:7FFE defmap ffff
-
--- a/examples/dhcp-client-script
+++ b/examples/dhcp-client-script
@ -1,446 +0,0 @@
-#!/bin/bash
-#
-# dhclient-script for Linux.
-#
-#		This program is free software; you can redistribute it and/or
-#		modify it under the terms of the GNU General Public License
-#		as published by the Free Software Foundation; either version
-#		2 of the License, or (at your option) any later version.
-#
-# Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
-#
-# Probably, I did not understand, what this funny feature as "alias"
-# means exactly. For now I suppose, that it is a static address, which
-# we should install and preserve.
-#
-
-exec >> /var/log/DHS.log 2>&1
-
-echo dhc-script $* reason=$reason
-set | grep "^\(old_\|new_\|check_\)"
-
-LOG () {
-    echo LOG $* ;
-}
-
-# convert 8bit mask to length
-# arg: $1 = mask
-#
-Mask8ToLen() {
-	local l=0;
-
-	while [ $l -le 7 ]; do
-		if [ $[ ( 1 << $l ) + $1 ] -eq 256 ]; then
-			return	$[ 8 - $l ]
-		fi
-		l=$[ $l + 1 ]
-	done
-	return 0;
-}
-
-# convert inet dotted quad mask to length
-# arg: $1 = dotquad mask
-#
-MaskToLen() {
- local masklen=0
- local mask8=$1
-
- case $1 in
- 0.0.0.0)
-	return 0;
-	;;
- 255.*.0.0)
-	masklen=8
-	mask8=${mask8#255.}
-	mask8=${mask8%.0.0}
-	;;
- 255.255.*.0)
-	masklen=16
-	mask8=${mask8#255.255.}
-	mask8=${mask8%.0}
-	;;
- 255.255.255.*)
-	masklen=24
-	mask8=${mask8#255.255.255.}
-	;;
- *)
-	return 255
-	;;
- esac
- Mask8ToLen $mask8
- return $[ $? + $masklen ]
-}
-
-# calculate ABC "natural" mask
-# arg: $1 = dotquad address
-#
-ABCMask () {
- local class;
-
- class=${1%%.*}
-
- if [ "$1" = "255.255.255.255" ]; then
-    echo $1
- elif [ "$1" = "0.0.0.0" ]; then
-    echo $1
- elif [ $class -ge 224 ]; then
-    echo 240.0.0.0
- elif [ $class -ge 192 ]; then
-    echo 255.255.255.0
- elif [ $class -ge 128 ]; then
-    echo 255.255.0.0
- else
-    echo 255.0.0.0
- fi
-}
-
-# calculate ABC "natural" mask length
-# arg: $1 = dotquad address
-#
-ABCMaskLen () {
- local class;
-
- class=${1%%.*}
-
- if [ "$1" = "255.255.255.255" ]; then
-    return 32
- elif [ "$1" = "0.0.0.0" ]; then
-    return 0
- elif [ $class -ge 224 ]; then
-    return 4;
- elif [ $class -ge 192 ]; then
-    return 24;
- elif [ $class -ge 128 ]; then
-    return 16;
- else
-    return 8;
- fi
-}
-
-# Delete IP address
-# args: $1 = interface
-#       $2 = address
-#       $3 = mask
-#       $4 = broadcast
-#       $5 = label
-#
-DelINETAddr () {
-  local masklen=32
-  local addrid=$1
-
-  LOG DelINETAddr $*
-
-  if [ "$5" ]; then
-    addrid=$addrid:$5
-  fi
-  LOG ifconfig $addrid down
-  ifconfig $addrid down
-}
-
-# Add IP address
-# args: $1 = interface
-#       $2 = address
-#       $3 = mask
-#       $4 = broadcast
-#       $5 = label
-#
-AddINETAddr () {
-  local mask_arg
-  local brd_arg
-  local addrid=$1
-
-  LOG AddINETAddr $*
-
-  if [ "$5" ]; then
-    addrid=$addrid:$5
-  fi
-  if [ "$3" ]; then
-    mask_arg="netmask $3"
-  fi
-  if [ "$4" ]; then
-    brd_arg="broadcast $4"
-  fi
-
-  LOG ifconfig $addrid $2 $mask_arg $brd_arg up
-  ifconfig $addrid $2 $mask_arg $brd_arg up
-}
-
-# Add default routes
-# args: $1 = routers list
-#
-AddDefaultRoutes() {
-    local router
-
-    if [ "$1" ]; then
-      LOG AddDefaultRoutes $*
-      for router in $1; do
-        LOG route add default gw $router
-        route add default gw $router
-      done ;
-    fi
-}
-
-# Delete default routes
-# args: $1 = routers list
-#
-DelDefaultRoutes() {
-    local router
-
-    if [ "$1" ]; then
-      LOG DelDefaultRoutes $*
-
-      for router in $1; do
-        LOG route del default gw $router
-        route del default gw $router
-      done
-    fi
-}
-
-# ping a host
-# args: $1 = dotquad address of the host
-#
-PingNode() {
-    LOG PingNode $*
-    if ping -q -c 1 -w 2 $1 ; then
-	return 0;
-    fi
-    return 1;
-}
-
-# Check (and add route, if alive) default routers
-# args: $1 = routers list
-# returns: 0 if at least one router is alive.
-#
-CheckRouterList() {
-    local router
-    local succeed=1
-
-    LOG CheckRouterList $*
-
-    for router in $1; do
-      if PingNode $router ; then
-	succeed=0
-        route add default gw $router
-      fi
-    done
-    return $succeed
-}
-
-# Delete/create static routes.
-# args: $1 = operation (del/add)
-#       $2 = routes list in format "dst1 nexthop1 dst2 ..."
-#
-# BEWARE: this feature of DHCP is obsolete, because does not
-#         support subnetting.
-#
-X-StaticRouteList() {
-    local op=$1
-    local lst="$2"
-    local masklen
-
-    LOG X-StaticRouteList $*
-
-    if [ "$lst" ]; then
-      set $lst
-      while [ $# -gt 1 ]; do
-	route $op -net $1 netmask `ABCMask "$1"` gw $2
-	shift; shift;
-      done
-   fi
-}
-
-# Create static routes.
-# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
-#
-AddStaticRouteList() {
-    LOG AddStaticRouteList $*
-    X-StaticRouteList add "$1"
-}
-
-# Delete static routes.
-# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
-#
-DelStaticRouteList() {
-    LOG DelStaticRouteList $*
-    X-StaticRouteList del "$1"
-}
-
-# Broadcast unsolicited ARP to update neighbours' caches.
-# args: $1 = interface
-#       $2 = address
-#
-UnsolicitedARP() {
-    if [ -f /sbin/arping ]; then
-	/sbin/arping -A -c 1 -I "$1" "$2" &
-	(sleep 2 ; /sbin/arping -U -c 1 -I "$1" "$2" ) &
-    fi
-}
-
-# Duplicate address detection.
-# args: $1 = interface
-#       $2 = test address
-# returns: 0, if DAD succeeded.
-DAD() {
-  if [ -f /sbin/arping ]; then
-	/sbin/arping -c 2 -w 3 -D -I "$1" "$2"
-	return $?
-  fi
-  return 0
-}
-
-
-# Setup resolver.
-# args: NO
-#       domain and nameserver list are passed in global variables.
-#
-# NOTE: we try to be careful and not to break user supplied resolv.conf.
-#       The script mangles it, only if it has dhcp magic signature.
-#
-UpdateDNS() {
-    local nameserver
-    local idstring="#### Generated by DHCPCD"
-
-    LOG UpdateDNS $*
-
-    if [ "$new_domain_name" = "" -a "$new_domain_name_servers" = "" ]; then
-	return 0;
-    fi
-
-    echo $idstring > /etc/resolv.conf.dhcp
-    if [ "$new_domain_name" ]; then
-	echo search $new_domain_name >> /etc/resolv.conf.dhcp
-    fi
-    echo options ndots:1 >> /etc/resolv.conf.dhcp
-
-    if [ "$new_domain_name_servers" ]; then
-	for nameserver in $new_domain_name_servers; do
-	    echo nameserver $nameserver >> /etc/resolv.conf.dhcp
-	done
-    else
-	echo nameserver 127.0.0.1 >> /etc/resolv.conf.dhcp
-    fi
-
-    if [ -f /etc/resolv.conf ]; then
-	if [ "`head -1 /etc/resolv.conf`" != "$idstring" ]; then
-	    return 0
-	fi
-	if [ "$old_domain_name" = "$new_domain_name" -a
-	     "$new_domain_name_servers" = "$old_domain_name_servers" ]; then
-	     return 0
-	fi
-    fi
-    mv /etc/resolv.conf.dhcp /etc/resolv.conf
-}
-
-case $reason in
-NBI)
-  exit 1
-  ;;
-
-MEDIUM)
-  exit 0
-  ;;
-
-PREINIT)
-  ifconfig $interface:dhcp down
-  ifconfig $interface:dhcp1 down
-  if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
-    ifconfig $interface:dhcp 10.10.10.10 netmask 255.255.255.255
-    ifconfig $interface:dhcp down
-    if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
-	LOG The interface $interface already configured.
-    fi
-  fi
-  ifconfig $interface:dhcp up
-  exit 0
-  ;;
-
-ARPSEND)
-  exit 0
-  ;;
-
-ARPCHECK)
-  if DAD "$interface" "$check_ip_address" ; then
-    exit 0
-  fi
-  exit 1
-  ;;
-
-BOUND|RENEW|REBIND|REBOOT)
-  if [ "$old_ip_address" -a "$alias_ip_address" -a \
-	"$alias_ip_address" != "$old_ip_address" ]; then
-    DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
-  fi
-  if [ "$old_ip_address" -a "$old_ip_address" != "$new_ip_address" ]; then
-    DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
-    DelDefaultRoutes "$old_routers"
-    DelStaticRouteList "$old_static_routes"
-  fi
-  if [ "$old_ip_address" = "" -o "$old_ip_address" != "$new_ip_address" -o \
-       "$reason" = "BOUND" -o "$reason" = "REBOOT" ]; then
-    AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
-    AddStaticRouteList "$new_static_routes"
-    AddDefaultRoutes "$new_routers"
-    UnsolicitedARP "$interface" "$new_ip_address"
-  fi
-  if [ "$new_ip_address" != "$alias_ip_address" -a "$alias_ip_address" ]; then
-    AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
-  fi
-  UpdateDNS
-  exit 0
-  ;;
-
-EXPIRE|FAIL)
-  if [ "$alias_ip_address" ]; then
-    DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
-  fi
-  if [ "$old_ip_address" ]; then
-    DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
-    DelDefaultRoutes "$old_routers"
-    DelStaticRouteList "$old_static_routes"
-  fi
-  if [ "$alias_ip_address" ]; then
-    AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
-  fi
-  exit 0
-  ;;
-
-TIMEOUT)
-  if [ "$alias_ip_address" ]; then
-    DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
-  fi
-# Seems, <null address> means, that no more old leases found.
-# Or does it mean bug in dhcpcd? 8) Fail for now.
-  if [ "$new_ip_address" = "<null address>" ]; then
-    if [ "$old_ip_address" ]; then
-	DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
-    fi
-    if [ "$alias_ip_address" ]; then
-        AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
-    fi
-    exit 1
-  fi
-  if DAD "$interface" "$new_ip_address" ; then
-    AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
-    UnsolicitedARP "$interface" "$new_ip_address"
-    if [ "$alias_ip_address" -a "$alias_ip_address" != "$new_ip_address" ]; then
-      AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
-      UnsolicitedARP "$interface" "$alias_ip_address"
-    fi
-    if CheckRouterList "$new_routers" ; then
-	AddStaticRouteList "$new_static_routes"
-	UpdateDNS
-	exit 0
-    fi
-  fi
-  DelINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
-  DelDefaultRoutes "$old_routers"
-  DelStaticRouteList "$old_static_routes"
-  if [ "$alias_ip_address" ]; then
-    AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
-  fi
-  exit 1
-  ;;
-esac
-
-exit 0
--- a/examples/diffserv/Edge1
+++ b/examples/diffserv/Edge1
@ -1,68 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# This script just tags on the ingress interfac using Ipchains
-# the result is used for fast classification and re-marking
-# on the egress interface
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-#
-# tag all incoming packets from host 10.2.0.24 to value 1
-# tag all incoming packets from host 10.2.0.3 to value 2
-# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
-#These values are used in the egress
-#
-############################################################ 
-$IPCHAINS -A input -s 10.2.0.4/24 -m 3
-$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
-$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64 set_tc_index
-#
-# values of the DSCP to change depending on the class
-#
-#becomes EF
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0xb8
-#becomes AF11
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x28
-#becomes AF21
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x48
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent 1:0
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
--- a/examples/diffserv/Edge2
+++ b/examples/diffserv/Edge2
@ -1,87 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# This script tags the fwmark on the ingress interface using IPchains
-# the result is used first for policing on the Ingress interface then
-# for fast classification and re-marking
-# on the egress interface
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-#
-# tag all incoming packets from host 10.2.0.24 to value 1
-# tag all incoming packets from host 10.2.0.3 to value 2
-# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
-#These values are used in the egress
-############################################################ 
-$IPCHAINS -A input -s 10.2.0.0/24 -m 3
-$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
-$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
-############################################################ 
-#
-# install the ingress qdisc on the ingress interface
-############################################################ 
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-
-#
-# attach a fw classifier to the ingress which polices anything marked
-# by ipchains to tag value 3 (The rest of the subnet packets -- not
-# tag 1 or 2) to not go beyond 1.5Mbps
-# Allow up to at least 60 packets to burst (assuming maximum packet 
-# size of # 1.5 KB) in the long run and upto about 6 packets in the
-# shot run
-
-############################################################ 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 3 fw \
-police rate 1500kbit burst 90k mtu 9k drop flowid :1
-############################################################ 
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0xb8
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x28
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x48
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $DEV ingress
--- a/examples/diffserv/Edge31-ca-u32
+++ b/examples/diffserv/Edge31-ca-u32
@ -1,170 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities using u32 classifier
-# This script tags tcindex based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color aware mode marker with PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.1)
-#
-# The colors are defined using the Diffserv Fields
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/usr/src/iproute2-current
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-INDEV=eth0
-EGDEV="dev eth1"
-CIR1=1500kbit
-CIR2=1000kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-
-############################################################ 
-#
-# install the ingress qdisc on the ingress interface
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-#
-# Create u32 filters 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1: u32 \
-divisor 1
-############################################################ 
-
-# The meters: Note that we have shared meters in this case as identified
-# by the index parameter
-meter1=" police index 1 rate $CIR1 burst $CBS1 "
-meter2=" police index 2 rate $CIR2 burst $CBS1 "
-meter3=" police index 3 rate $CIR2 burst $CBS2 "
-meter4=" police index 4 rate $CIR1 burst $CBS2 "
-meter5=" police index 5 rate $CIR1 burst $CBS2 "
-
-# All packets are marked with a tcindex value which is used on the egress
-# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-
-# *********************** AF41 *************************** 
-#AF41 (DSCP 0x22) is passed on with a tcindex value 1
-#if it doesnt exceed its CIR/CBS 
-#policer 1  is used.
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
-match ip tos 0x88 0xfc \
-$meter1 \
-continue flowid :1
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-# tcindex value  of 2
-# policer 2 is used
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
-match ip tos 0x88 0xfc \
-$meter2 \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3 (policer 3)
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip tos 0x88 0xfc \
-$meter3 \
-drop flowid :3
-#
-
-# *********************** AF42 *************************** 
-#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
-#if it doesnt exceed its CIR/CBS 
-#policer 2 is used. Note that this is shared with the AF41
-#
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
-match ip tos 0x90 0xfc \
-$meter2 \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3 (policer 3)
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip tos 0x90 0xfc \
-$meter3 \
-drop flowid :3
-#
-# *********************** AF43 *************************** 
-#
-#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
-#if it doesnt exceed its CIR/CBS
-#policer 3 is used. Note that this is shared with the AF41 and AF42
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip tos 0x98 0xfc \
-$meter3 \
-drop flowid :3
-#
-# *********************** BE *************************** 
-#
-# Anything else (not from the AF4*) gets discarded if it 
-# exceeds 1Mbps and by default goes to BE if it doesnt
-# Note that the BE class is also used by the AF4* in the worst
-# case
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
-match ip src 0/0\
-$meter4 \
-drop flowid :4
-
-######################## Egress side ########################
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/Edge31-cb-chains
+++ b/examples/diffserv/Edge31-cb-chains
@ -1,132 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# This script fwmark tags(IPchains) based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color blind mode marker with no PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.1)
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-CIR1=1500kbit
-CIR2=1000kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-
-meter1="police rate $CIR1 burst $CBS1 "
-meter2="police rate $CIR1 burst $CBS2 "
-meter3="police rate $CIR2 burst $CBS1 "
-meter4="police rate $CIR2 burst $CBS2 "
-meter5="police rate $CIR2 burst $CBS2 "
-#
-# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
-# tag all incoming packets from any other subnet to fw tag 2
-############################################################ 
-$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
-$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
-#
-############################################################ 
-# install the ingress qdisc on the ingress interface
-$TC qdisc add dev $INDEV handle ffff: ingress
-#
-############################################################ 
-
-# All packets are marked with a tcindex value which is used on the egress
-# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-#
-############################################################ 
-# 
-# anything with fw tag of 1 is passed on with a tcindex value 1
-#if it doesnt exceed its allocated rate (CIR/CBS)
-# 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
-$meter1 \
-continue flowid 4:1
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-#tcindex value  of 2
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
-$meter2 \
-continue flowid 4:2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
-$meter3 \
-drop flowid 4:3
-#
-# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it 
-# exceeds 1Mbps and by default goes to BE if it doesnt
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 2 fw \
-$meter5 \
-drop flowid 4:4
-
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping (using tcindex; could easily have
-# replaced it with the fw classifier instead)
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/Edge32-ca-u32
+++ b/examples/diffserv/Edge32-ca-u32
@ -1,198 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities using u32 classifier
-# This script tags tcindex based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color aware mode marker with PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.2)
-#
-# The colors are defined using the Diffserv Fields
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-CIR1=1000kbit
-CIR2=500kbit
-# the PIR is what is in excess of the CIR
-PIR1=1000kbit
-PIR2=500kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-#the EBS is about 20 max sized packets
-EBS1=30k
-EBS2=30k
-
-# The meters: Note that we have shared meters in this case as identified
-# by the index parameter
-meter1=" police index 1 rate $CIR1 burst $CBS1 "
-meter1a=" police index 2 rate $PIR1 burst $EBS1 "
-meter2=" police index 3 rate $CIR2 burst $CBS1 "
-meter2a=" police index 4 rate $PIR2 burst $EBS1 "
-meter3=" police index 5 rate $CIR2 burst $CBS2 "
-meter3a=" police index 6 rate $PIR2 burst $EBS2 "
-meter4=" police index 7 rate $CIR1 burst $CBS2 "
-
-############################################################ 
-#
-# install the ingress qdisc on the ingress interface
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-#
-# All packets are marked with a tcindex value which is used on the egress
-# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-#
-# *********************** AF41 *************************** 
-#AF41 (DSCP 0x22) from is passed on with a tcindex value 1
-#if it doesnt exceed its CIR/CBS + PIR/EBS
-#policer 1  is used.
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
-match ip tos 0x88 0xfc \
-$meter1 \
-continue flowid :1
-$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
-match ip tos 0x88 0xfc \
-$meter1a \
-continue flowid :1
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-# tcindex value  of 2
-# policer 2 is used
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
-match ip tos 0x88 0xfc \
-$meter2 \
-continue flowid :2
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
-match ip tos 0x88 0xfc \
-$meter2a \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3 (policer 3)
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
-match ip tos 0x88 0xfc \
-$meter3 \
-continue flowid :3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip tos 0x88 0xfc \
-$meter3a \
-drop flowid :3
-#
-# *********************** AF42 *************************** 
-#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
-#if it doesnt exceed its CIR/CBS + PIR/EBS
-#policer 2 is used. Note that this is shared with the AF41
-#
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 8 u32 \
-match ip tos 0x90 0xfc \
-$meter2 \
-continue flowid :2
-$TC filter add dev $INDEV parent ffff: protocol ip prio 9 u32 \
-match ip tos 0x90 0xfc \
-$meter2a \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3 (policer 3)
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 10 u32 \
-match ip tos 0x90 0xfc \
-$meter3 \
-continue flowid :3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 11 u32 \
-match ip tos 0x90 0xfc \
-$meter3a \
-drop flowid :3
-
-#
-# *********************** AF43 *************************** 
-#
-#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
-#if it doesnt exceed its CIR/CBS + PIR/EBS
-#policer 3 is used. Note that this is shared with the AF41 and AF42
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 13 u32 \
-match ip tos 0x98 0xfc \
-$meter3 \
-continue flowid :3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 14 u32 \
-match ip tos 0x98 0xfc \
-$meter3a \
-drop flowid :3
-#
-## *********************** BE *************************** 
-##
-## Anything else (not from the AF4*) gets discarded if it 
-## exceeds 1Mbps and by default goes to BE if it doesnt
-## Note that the BE class is also used by the AF4* in the worst
-## case
-##
-$TC filter add dev $INDEV parent ffff: protocol ip prio 16 u32 \
-match ip src 0/0\
-$meter4 \
-drop flowid :4
-
-######################## Egress side ########################
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/Edge32-cb-chains
+++ b/examples/diffserv/Edge32-cb-chains
@ -1,144 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# This script fwmark tags(IPchains) based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color blind mode marker with no PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.1)
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-CIR1=1500kbit
-CIR2=500kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-
-meter1="police rate $CIR1 burst $CBS1 "
-meter1a="police rate $CIR2 burst $CBS1 "
-meter2="police rate $CIR1 burst $CBS2 "
-meter2a="police rate $CIR2 burst $CBS2 "
-meter3="police rate $CIR2 burst $CBS1 "
-meter3a="police rate $CIR2 burst $CBS1 "
-meter4="police rate $CIR2 burst $CBS2 "
-meter5="police rate $CIR1 burst $CBS2 "
-#
-# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
-# tag all incoming packets from any other subnet to fw tag 2
-############################################################ 
-$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
-$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
-#
-############################################################ 
-# install the ingress qdisc on the ingress interface
-$TC qdisc add dev $INDEV handle ffff: ingress
-#
-############################################################ 
-
-# All packets are marked with a tcindex value which is used on the egress
-# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-#
-############################################################ 
-# 
-# anything with fw tag of 1 is passed on with a tcindex value 1
-#if it doesnt exceed its allocated rate (CIR/CBS)
-# 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 1 handle 1 fw \
-$meter1 \
-continue flowid 4:1
-$TC filter add dev $INDEV parent ffff: protocol ip prio 2 handle 1 fw \
-$meter1a \
-continue flowid 4:1
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-#tcindex value  of 2
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 3 handle 1 fw \
-$meter2 \
-continue flowid 4:2
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
-$meter2a \
-continue flowid 4:2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
-$meter3 \
-continue flowid 4:3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
-$meter3a \
-drop flowid 4:3
-#
-# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it 
-# exceeds 1Mbps and by default goes to BE if it doesnt
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 7 handle 2 fw \
-$meter5 \
-drop flowid 4:4
-
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping (using tcindex; could easily have
-# replaced it with the fw classifier instead)
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/Edge32-cb-u32
+++ b/examples/diffserv/Edge32-cb-u32
@ -1,145 +0,0 @@
-#! /bin/sh 
-#
-# sample script on using the ingress capabilities using u32 classifier
-# This script tags tcindex based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color blind mode marker with PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.2)
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-INDEV=eth2
-EGDEV="dev eth1"
-CIR1=1000kbit
-CIR2=1000kbit
-# The PIR is the excess (in addition to the CIR i.e if always
-# going to the PIR --> average rate is CIR+PIR)
-PIR1=1000kbit
-PIR2=500kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-#the EBS is about 10 max sized packets
-EBS1=15k
-EBS2=15k
-# The meters
-meter1=" police rate $CIR1 burst $CBS1 "
-meter1a=" police rate $PIR1 burst $EBS1 "
-meter2=" police rate $CIR2 burst $CBS1 "
-meter2a="police rate $PIR2 burst $CBS1 "
-meter3=" police rate $CIR2 burst $CBS2 "
-meter3a=" police rate $PIR2 burst $EBS2 "
-meter4=" police rate $CIR1 burst $CBS2 "
-meter5=" police rate $CIR1 burst $CBS2 "
-
-
-# install the ingress qdisc on the ingress interface
-############################################################ 
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-#
-############################################################ 
-
-# All packets are marked with a tcindex value which is used on the egress
-# NOTE: tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-# 
-#anything from subnet 10.2.0.2/24 is passed on with a tcindex value 1
-#if it doesnt exceed its CIR/CBS + PIR/EBS
-# 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
-match ip src 10.2.0.0/24 $meter1 \
-continue flowid :1
-$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
-match ip src 10.2.0.0/24 $meter1a \
-continue flowid :1
-
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-#tcindex value  of 2
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
-match ip src 10.2.0.0/24 $meter2 \
-continue flowid :2
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
-match ip src 10.2.0.0/24 $meter2a \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
-match ip src 10.2.0.0/24 $meter3 \
-continue flowid :3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip src 10.2.0.0/24 $meter3a \
-drop flowid :3
-#
-#
-# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it 
-# exceeds 1Mbps and by default goes to BE if it doesnt
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
-match ip src 0/0 $meter5 \
-drop flowid :4
-
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/README
+++ b/examples/diffserv/README
@ -1,98 +0,0 @@
-
-Note all these are mere examples which can be customized to your needs
-
-AFCBQ
-----
-AF PHB built using CBQ, DSMARK,GRED (default in GRIO mode) ,RED for BE 
-and the tcindex classifier with some algorithmic mapping
-
-EFCBQ
-----
-EF PHB built using CBQ (for rate control and prioritization), 
-DSMARK( to remark DSCPs), tcindex  classifier and  RED for the BE
-traffic.
-
-EFPRIO
------
-EF PHB using the PRIO scheduler, Token Bucket to rate control EF,
-tcindex classifier, DSMARK to remark, and RED for the BE traffic
-
-EDGE scripts
-==============
-
-CB-3(1|2)-(u32/chains)
-======================
-
-
-The major differences are that the classifier is u32 on -u32 extension
-and IPchains on the chains extension. CB stands for color Blind
-and 31 is for the mode where only a CIR and CBS are defined whereas
-32 stands for a mode where a CIR/CBS + PIR/EBS are defined.
-
-Color Blind (CB)
-==========-----=
-We look at one special subnet that we are interested in for simplicty
-reasons to demonstrate the capability. We send the packets from that
-subnet to AF4*, BE or end up dropping depending on the metering results. 
-
-
-The algorithm overview is as follows:
-
-*classify:
-
-**case: subnet X
----------------
-  if !exceed meter1 tag as AF41
-	else
-	    if !exceed meter2  tag as AF42
-	        else
-		  if !exceed meter 3 tag as AF43
-		      else 
-			 drop 
-
-default case: Any other subnet
-------------------------------
-  if !exceed meter 5 tag as AF43
-      else
-	 drop 
-
-
-One Egress side change the DSCPs of the packets to reflect AF4* and BE
-based on the tags from the ingress.
-
-------------------------------------------------------------
-
-Color Aware
-===========
-
-Define some meters with + policing and give them IDs eg
-
-meter1=police index 1 rate $CIR1 burst $CBS1  
-meter2=police index 2 rate $CIR2 burst $CBS2   etc 
-
-General overview:
-classify based on the DSCPs and use the policer ids to decide tagging
-
-
-*classify on ingress:
-
-switch (dscp) {
-    case AF41: /* tos&0xfc == 0x88 */
-	if (!exceed meter1) break;
-    case AF42: /* tos&0xfc == 0x90 */
-	if (!exceed meter2) {
-	    tag as AF42;
-	    break;
-	}
-    case AF43: /* tos&0xfc == 0x98 */
-	if (!exceed meter3) {
-	    tag as AF43;
-	    break;
-	} else
-	  drop;
-    default:
-	if (!exceed meter4) tag as BE;
-	else drop;
-}
-
-On the Egress side mark the proper AF tags
--- a/examples/diffserv/afcbq
+++ b/examples/diffserv/afcbq
@ -1,105 +0,0 @@
-#!/usr/bin/perl
-#
-#
-# AF using CBQ for a single interface eth0 
-# 4 AF classes using GRED and one BE using RED
-# Things you might want to change:
-#	- the device bandwidth (set at 10Mbits)
-#	- the bandwidth allocated for each AF class and the BE class	
-#	- the drop probability associated with each AF virtual queue
-#
-# AF DSCP values used (based on AF draft 04)
-# -----------------------------------------
-# AF DSCP values
-# AF1 1. 0x0a 2. 0x0c 3. 0x0e
-# AF2 1. 0x12 2. 0x14 3. 0x16
-# AF3 1. 0x1a 2. 0x1c 3. 0x1e
-# AF4 1. 0x22 2. 0x24 3. 0x26
-
-#
-# 
-# A simple DSCP-class relationship formula used to generate
-# values in the for loop of this script; $drop stands for the
-# DP
-#	$dscp = ($class*8+$drop*2)
-#
-#  if you use GRIO buffer sharing, then GRED priority is set as follows:
-#  $gprio=$drop+1; 
-#
-
-$TC = "/usr/src/iproute2-current/tc/tc";
-$DEV = "dev lo";
-$DEV = "dev eth1";
-$DEV = "dev eth0";
-# the BE-class number
-$beclass = "5";  
-
-#GRIO buffer sharing on or off?
-$GRIO = "";
-$GRIO = "grio";
-# The bandwidth of your device
-$linerate="10Mbit";
-# The BE and AF rates
-%rate_table=();
-$berate="1500Kbit";
-$rate_table{"AF1rate"}="1500Kbit";
-$rate_table{"AF2rate"}="1500Kbit";
-$rate_table{"AF3rate"}="1500Kbit";
-$rate_table{"AF4rate"}="1500Kbit";
-#
-#
-#
-print "\n# --- General setup  ---\n";
-print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
-print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex mask 0xfc " .
-   "shift 2 pass_on\n";
-   #"shift 2\n";
-print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth $linerate ".
-  "cell 8 avpkt 1000 mpu 64\n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 1 tcindex ".
-  "mask 0xf0 shift 4 pass_on\n";
-for $class (1..4) {
-    print "\n# --- AF Class $class specific setup---\n";
-    $AFrate=sprintf("AF%drate",$class);
-    print "$TC class add $DEV parent 2:0 classid 2:$class cbq ".
-      "bandwidth $linerate rate $rate_table{$AFrate} avpkt 1000 prio ".
-      (6-$class)." bounded allot 1514 weight 1 maxburst 21\n";
-    print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle $class ".
-      "tcindex classid 2:$class\n";
-    print "$TC qdisc add $DEV parent 2:$class gred setup DPs 3 default 2 ".
-      "$GRIO\n";
-# 
-# per DP setup
-#
-    for $drop (1..3) {
-    print "\n# --- AF Class $class DP $drop---\n";
-	$dscp = $class*8+$drop*2;
-	$tcindex = sprintf("1%x%x",$class,$drop);
-	print "$TC filter add $DEV parent 1:0 protocol ip prio 1 ".
-	  "handle $dscp tcindex classid 1:$tcindex\n";
-	$prob = $drop*0.02;
-        if ($GRIO) {
-	$gprio = $drop+1;
-	print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
-	  "max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
-	  "probability $prob ".
-          "prio $gprio\n";
-        } else {
-	print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
-	  "max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
-	  "probability $prob \n";
-	}
-    }
-}
-#
-#
-print "\n#------BE Queue setup------\n";
-print "$TC filter add $DEV parent 1:0 protocol ip prio 2 ".
-          "handle 0 tcindex mask 0 classid 1:1\n";
-print "$TC class add $DEV parent 2:0 classid 2:$beclass cbq ".
-      "bandwidth $linerate rate $berate avpkt 1000 prio 6 " .
-      "bounded allot 1514 weight 1 maxburst 21 \n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle 0 tcindex ".
-  "classid 2:5\n";
-print "$TC qdisc add $DEV parent 2:5 red limit 60KB min 15KB max 45KB ".
-  "burst 20 avpkt 1000 bandwidth $linerate probability 0.4\n";
--- a/examples/diffserv/ef-prio
+++ b/examples/diffserv/ef-prio
@ -1,25 +0,0 @@
-#!/usr/bin/perl
-$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
-$DEV = "dev eth1";
-$efrate="1.5Mbit";
-$MTU="1.5kB";
-print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
-print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
-  "mask 0xfc shift 2\n";
-print "$TC qdisc add $DEV parent 1:0 handle 2:0 prio\n";
-#
-# EF class: Maximum about one MTU sized packet allowed on the queue
-#
-print "$TC qdisc add $DEV parent 2:1 tbf rate $efrate burst $MTU limit 1.6kB\n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
-	  "handle 0x2e tcindex classid 2:1 pass_on\n";
-#
-# BE class
-#
-print "#BE class(2:2) \n";
-print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
-	  "min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
-	  "probability 0.4\n";
-#
-print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
-	  "handle 0 tcindex mask 0 classid 2:2 pass_on\n";
--- a/examples/diffserv/efcbq
+++ b/examples/diffserv/efcbq
@ -1,31 +0,0 @@
-#!/usr/bin/perl
-#
-$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
-$DEV = "dev eth1";
-print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
-print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
-  "mask 0xfc shift 2\n";
-print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth ".
-	"10Mbit cell 8 avpkt 1000 mpu 64\n";
-#
-# EF class
-#
-print "$TC class add $DEV parent 2:0 classid 2:1 cbq bandwidth ". 
-	"10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated ".
-	"allot 1514 weight 1 maxburst 10 \n";
-# packet fifo for EF?
-print "$TC qdisc add $DEV parent 2:1 pfifo limit 5\n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
-	  "handle 0x2e tcindex classid 2:1 pass_on\n";
-#
-# BE class
-#
-print "#BE class(2:2) \n";
-print "$TC class add $DEV parent 2:0 classid 2:2 cbq bandwidth ". 
-	"10Mbit rate 5Mbit avpkt 1000 prio 7 allot 1514 weight 1 ".
-	"maxburst 21 borrow split 2:0 defmap 0xffff \n";
-print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
-	  "min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
-	  "probability 0.4\n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
-	  "handle 0 tcindex mask 0 classid 2:2 pass_on\n";
--- a/examples/diffserv/regression-testing
+++ b/examples/diffserv/regression-testing
@ -1,125 +0,0 @@
-
-These were the tests done to validate the Diffserv scripts.
-This document will be updated continously. If you do more
-thorough validation testing please post the details to the
-diffserv mailing list. 
-Nevertheless, these tests should serve for basic validation.
-
-AFCBQ, EFCBQ, EFPRIO
----------------------
-
-generate all possible DSCPs and observe that they 
-get sent to the proper classes. In the case of AF also
-to the correct Virtual Queues.
-
-Edge1
-----
-generate TOS values 0x0,0x10,0xbb each with IP addresses
-10.2.0.24 (mark 1), 10.2.0.3 (mark2) and 10.2.0.30 (mark 3)
-and observe that they get marked as expected.
-
-Edge2
-----
-
-Repeat the tests in Edge1
-ftp with data direction from 10.2.0.2
-	*observe that the metering/policing works correctly (and the marking
-	as well). In this case the mark used will be 3
-
-Edge31-cb-chains
----------------
-
-ftp with data direction from 10.2.0.2
-
-	*observe that the metering/policing works correctly (and the marking
-	as well). In this case the mark used will be 1. 
-
-	Metering: The data throughput should not exceed 2*CIR1 + 2*CIR2
-	which is roughly: 5mbps
-
-	Marking: the should be a variation of marked packets:
-	AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
-
-More tests required to see the interaction of several sources (other
-than subnet 10.2.0.0/24).
-
-Edge31-ca-u32
--------------
-
-Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the 
-discard port of 10.1.0.2 (behind eth1)
-
-1) generate with src tos = 0x88
-	Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
-	approximately 5mbps
-	Marking: Should vary between 0x88,0x90,0x98 and 0x0
-
-2) generate with src tos = 0x90
-	Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
-	approximately 3.5mbps
-	Marking: Should vary between 0x90,0x98 and 0x0
-
-3) generate with src tos = 0x98
-	Metering: Allocated throughput should not exceed CIR1 + CIR2
-	approximately 2.5mbps
-	Marking: Should vary between 0x98 and 0x0
-
-4) generate with src tos any other than the above
-	Metering: Allocated throughput should not exceed CIR1 
-	approximately 1.5mbps
-	Marking: Should be consistent at 0x0
-
-TODO: Testing on how each color shares when all 4 types of packets
-are going through the edge device
-
-Edge32-cb-u32, Edge32-cb-chains
-------------------------------
-
-ftp with data direction from 10.2.0.2
-
-	*observe that the metering/policing works correctly (and the marking
-	as well). 
-
-	Metering: 
-        The data throughput should not exceed 2*CIR1 + 2*CIR2
-	+ 2*PIR2 + PIR1 for u32 which is roughly: 6mbps
-        The data throughput should not exceed 2*CIR1 + 5*CIR2
-	for chains which is roughly: 6mbps
-
-	Marking: the should be a variation of marked packets:
-	AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
-
-TODO:
-More tests required to see the interaction of several sources (other
-than subnet 10.2.0.0/24).
-More tests needed to capture stats on how many times the CIR was exceeded
-but the data was not remarked etc.
-
-Edge32-ca-u32
--------------
-
-Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the 
-discard port of 10.1.0.2 (behind eth1)
-
-1) generate with src tos = 0x88
-	Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
-	+PIR1 -- approximately 4mbps
-	Marking: Should vary between 0x88,0x90,0x98 and 0x0
-
-2) generate with src tos = 0x90
-	Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
-	+ 2* PIR2 approximately 3mbps
-	Marking: Should vary between 0x90,0x98 and 0x0
-
-3) generate with src tos = 0x98
-	Metering: Allocated throughput should not exceed PIR1+ CIR1 + CIR2
-	approximately 2.5mbps
-	Marking: Should vary between 0x98 and 0x0
-
-4) generate with src tos any other than the above
-	Metering: Allocated throughput should not exceed CIR1 
-	approximately 1mbps
-	Marking: Should be consistent at 0x0
-
-TODO: Testing on how each color shares when all 4 types of packets
-are going through the edge device
--- a/examples/gaiconf
+++ b/examples/gaiconf
@ -1,134 +0,0 @@
-#!/bin/sh
-
-#
-# Setup address label from /etc/gai.conf
-#
-# Written by YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>, 2010.
-#
-
-IP=ip
-DEFAULT_GAICONF=/etc/gai.conf
-verbose=
-debug=
-
-function run ()
-{
-	if [ x"$verbose" != x"" ]; then
-		echo "$@"
-	fi
-	if [ x"$debug" = x"" ]; then
-		"$@"
-	fi
-}
-
-function do_load_config ()
-{
-	file=$1; shift
-	flush=1
-	cat $file | while read command prefix label; do
-		if [ x"$command" = x"#label" ]; then
-			if [ ${flush} = 1 ]; then
-				run ${IP} -6 addrlabel flush
-				flush=0
-			fi
-			run ${IP} -6 addrlabel add prefix $prefix label $label
-		fi
-	done
-}
-
-function do_list_config ()
-{
-	${IP} -6 addrlabel list | while read p pfx l lbl; do
-		echo label ${pfx} ${lbl}
-	done
-}
-
-function help ()
-{
-	echo "Usage: $0 [-v] {--list | --config [ ${DEFAULT_GAICONF} ] | --default}"
-	exit 1
-}
-
-TEMP=`getopt -o c::dlv -l config::,default,list,verbose -n gaiconf -- "$@"`
-
-if [ $? != 0 ]; then
-	echo "Terminating..." >&2
-	exit 1
-fi
-
-TEMPFILE=`mktemp`
-
-eval set -- "$TEMP"
-
-while true ; do
-	case "$1" in
-		-c|--config)
-			if [ x"$cmd" != x"" ]; then
-				help
-			fi
-			case "$2" in
-			"")	gai_conf="${DEFAULT_GAICONF}"
-				shift 2
-				;;
-			*)	gai_conf="$2"
-				shift 2
-			esac
-			cmd=config
-			;;
-		-d|--default)
-			if [ x"$cmd" != x"" ]; then
-				help
-			fi
-			gai_conf=${TEMPFILE}
-			cmd=config
-			;;
-		-l|--list)
-			if [ x"$cmd" != x"" ]; then
-				help
-			fi
-			cmd=list
-			shift
-			;;
-		-v)
-			verbose=1
-			shift
-			;;
-		--)
-			shift;
-			break
-			;;
-		*)
-			echo "Internal error!" >&2
-			exit 1
-			;;
-	esac
-done
-
-case "$cmd" in
-	config)
-		if [ x"$gai_conf" = x"${TEMPFILE}" ]; then
-			sed -e 's/^[[:space:]]*//' <<END_OF_DEFAULT >${TEMPFILE}
-				label ::1/128       0
-				label ::/0          1
-				label 2002::/16     2
-				label ::/96         3
-				label ::ffff:0:0/96 4
-				label fec0::/10     5
-				label fc00::/7      6
-				label 2001:0::/32   7
-END_OF_DEFAULT
-		fi
-		do_load_config "$gai_conf"
-		;;
-	list)
-		do_list_config
-		;;
-	*)
-		help
-		;;
-esac
-
-rm -f "${TEMPFILE}"
-
-exit 0
-
--- a/genl/Makefile
+++ b/genl/Makefile
@ -1,6 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
 GENLOBJ=genl.o

-include ../Config
+include ../config.mk
 SHARED_LIBS ?= y

 CFLAGS += -fno-strict-aliasing
@ -17,11 +18,6 @@ LDFLAGS += -Wl,-export-dynamic
 LDLIBS  += -lm -ldl
 endif

-ifeq ($(HAVE_MNL),y)
-	CFLAGS += -DHAVE_LIBMNL $(shell $(PKG_CONFIG) libmnl --cflags)
-	LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs)
-endif
-
 all: genl

 genl: $(GENLOBJ) $(LIBNETLINK) $(LIBUTIL) $(GENLLIB)
--- a/genl/ctrl.c
+++ b/genl/ctrl.c
@ -13,7 +13,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <syslog.h>
 #include <fcntl.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
@ -29,84 +28,18 @@
 static int usage(void)
 {
 	fprintf(stderr,"Usage: ctrl <CMD>\n" \
-		       "CMD   := get <PARMS> | list | monitor\n" \
+		       "CMD   := get <PARMS> | list | monitor | policy <PARMS>\n" \
 		       "PARMS := name <name> | id <id>\n" \
 		       "Examples:\n" \
 		       "\tctrl ls\n" \
 		       "\tctrl monitor\n" \
 		       "\tctrl get name foobar\n" \
-		       "\tctrl get id 0xF\n");
+		       "\tctrl get id 0xF\n"
+		       "\tctrl policy name foobar\n"
+		       "\tctrl policy id 0xF\n");
 	return -1;
 }

-int genl_ctrl_resolve_family(const char *family)
-{
-	struct rtnl_handle rth;
-	int ret = 0;
-	struct {
-		struct nlmsghdr         n;
-		struct genlmsghdr	g;
-		char                    buf[4096];
-	} req = {
-		.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN),
-		.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
-		.n.nlmsg_type = GENL_ID_CTRL,
-		.g.cmd = CTRL_CMD_GETFAMILY,
-	};
-	struct nlmsghdr *nlh = &req.n;
-	struct genlmsghdr *ghdr = &req.g;
-
-	if (rtnl_open_byproto(&rth, 0, NETLINK_GENERIC) < 0) {
-		fprintf(stderr, "Cannot open generic netlink socket\n");
-		exit(1);
-	}
-
-	addattr_l(nlh, 128, CTRL_ATTR_FAMILY_NAME, family, strlen(family) + 1);
-
-	if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) {
-		fprintf(stderr, "Error talking to the kernel\n");
-		goto errout;
-	}
-
-	{
-		struct rtattr *tb[CTRL_ATTR_MAX + 1];
-		int len = nlh->nlmsg_len;
-		struct rtattr *attrs;
-
-		if (nlh->nlmsg_type !=  GENL_ID_CTRL) {
-			fprintf(stderr, "Not a controller message, nlmsg_len=%d "
-				"nlmsg_type=0x%x\n", nlh->nlmsg_len, nlh->nlmsg_type);
-			goto errout;
-		}
-
-		if (ghdr->cmd != CTRL_CMD_NEWFAMILY) {
-			fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd);
-			goto errout;
-		}
-
-		len -= NLMSG_LENGTH(GENL_HDRLEN);
-
-		if (len < 0) {
-			fprintf(stderr, "wrong controller message len %d\n", len);
-			return -1;
-		}
-
-		attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
-		parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len);
-
-		if (tb[CTRL_ATTR_FAMILY_ID] == NULL) {
-			fprintf(stderr, "Missing family id TLV\n");
-			goto errout;
-		}
-
-		ret = rta_getattr_u16(tb[CTRL_ATTR_FAMILY_ID]);
-	}
-
-errout:
-	rtnl_close(&rth);
-	return ret;
-}
-
 static void print_ctrl_cmd_flags(FILE *fp, __u32 fl)
 {
 	fprintf(fp, "\n\t\tCapabilities (0x%x):\n ", fl);
@ -172,8 +105,7 @@ static int print_ctrl_grp(FILE *fp, struct rtattr *arg, __u32 ctrl_ver)
 /*
 * The controller sends one nlmsg per family
 */
-static int print_ctrl(const struct sockaddr_nl *who,
-		      struct rtnl_ctrl_data *ctrl,
+static int print_ctrl(struct rtnl_ctrl_data *ctrl,
 		      struct nlmsghdr *n, void *arg)
 {
 	struct rtattr *tb[CTRL_ATTR_MAX + 1];
@ -193,7 +125,8 @@ static int print_ctrl(const struct sockaddr_nl *who,
 	    ghdr->cmd != CTRL_CMD_DELFAMILY &&
 	    ghdr->cmd != CTRL_CMD_NEWFAMILY &&
 	    ghdr->cmd != CTRL_CMD_NEWMCAST_GRP &&
-	    ghdr->cmd != CTRL_CMD_DELMCAST_GRP) {
+	    ghdr->cmd != CTRL_CMD_DELMCAST_GRP &&
+	    ghdr->cmd != CTRL_CMD_GETPOLICY) {
 		fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd);
 		return 0;
 	}
@ -206,7 +139,7 @@ static int print_ctrl(const struct sockaddr_nl *who,
 	}

 	attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
-	parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len);
+	parse_rtattr_flags(tb, CTRL_ATTR_MAX, attrs, len, NLA_F_NESTED);

 	if (tb[CTRL_ATTR_FAMILY_NAME]) {
 		char *name = RTA_DATA(tb[CTRL_ATTR_FAMILY_NAME]);
@ -229,6 +162,36 @@ static int print_ctrl(const struct sockaddr_nl *who,
 		__u32 *ma = RTA_DATA(tb[CTRL_ATTR_MAXATTR]);
 		fprintf(fp, " max attribs: %d ",*ma);
 	}
+	if (tb[CTRL_ATTR_OP_POLICY]) {
+		const struct rtattr *pos;
+
+		rtattr_for_each_nested(pos, tb[CTRL_ATTR_OP_POLICY]) {
+			struct rtattr *ptb[CTRL_ATTR_POLICY_DUMP_MAX + 1];
+			struct rtattr *pattrs = RTA_DATA(pos);
+			int plen = RTA_PAYLOAD(pos);
+
+			parse_rtattr_flags(ptb, CTRL_ATTR_POLICY_DUMP_MAX,
+					   pattrs, plen, NLA_F_NESTED);
+
+			fprintf(fp, " op %d policies:",
+				pos->rta_type & ~NLA_F_NESTED);
+
+			if (ptb[CTRL_ATTR_POLICY_DO]) {
+				__u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DO]);
+
+				fprintf(fp, " do=%d", *v);
+			}
+
+			if (ptb[CTRL_ATTR_POLICY_DUMP]) {
+				__u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DUMP]);
+
+				fprintf(fp, " dump=%d", *v);
+			}
+		}
+	}
+	if (tb[CTRL_ATTR_POLICY])
+		nl_print_policy(tb[CTRL_ATTR_POLICY], fp);
+
 	/* end of family definitions .. */
 	fprintf(fp,"\n");
 	if (tb[CTRL_ATTR_OPS]) {
@ -277,10 +240,9 @@ static int print_ctrl(const struct sockaddr_nl *who,
 	return 0;
 }

-static int print_ctrl2(const struct sockaddr_nl *who,
-		      struct nlmsghdr *n, void *arg)
+static int print_ctrl2(struct nlmsghdr *n, void *arg)
 {
-	return print_ctrl(who, NULL, n, arg);
+	return print_ctrl(NULL, n, arg);
 }

 static int ctrl_list(int cmd, int argc, char **argv)
@ -299,13 +261,16 @@ static int ctrl_list(int cmd, int argc, char **argv)
 		.g.cmd = CTRL_CMD_GETFAMILY,
 	};
 	struct nlmsghdr *nlh = &req.n;
+	struct nlmsghdr *answer = NULL;

 	if (rtnl_open_byproto(&rth, 0, NETLINK_GENERIC) < 0) {
 		fprintf(stderr, "Cannot open generic netlink socket\n");
 		exit(1);
 	}

-	if (cmd == CTRL_CMD_GETFAMILY) {
+	if (cmd == CTRL_CMD_GETFAMILY || cmd == CTRL_CMD_GETPOLICY) {
+		req.g.cmd = cmd;
+
 		if (argc != 2) {
 			fprintf(stderr, "Wrong number of params\n");
 			return -1;
@ -330,20 +295,22 @@ static int ctrl_list(int cmd, int argc, char **argv)
 			fprintf(stderr, "Wrong params\n");
 			goto ctrl_done;
 		}
+	}

-		if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) {
+	if (cmd == CTRL_CMD_GETFAMILY) {
+		if (rtnl_talk(&rth, nlh, &answer) < 0) {
 			fprintf(stderr, "Error talking to the kernel\n");
 			goto ctrl_done;
 		}

-		if (print_ctrl2(NULL, nlh, (void *) stdout) < 0) {
+		if (print_ctrl2(answer, (void *) stdout) < 0) {
 			fprintf(stderr, "Dump terminated\n");
 			goto ctrl_done;
 		}

 	}

-	if (cmd == CTRL_CMD_UNSPEC) {
+	if (cmd == CTRL_CMD_UNSPEC || cmd == CTRL_CMD_GETPOLICY) {
 		nlh->nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
 		nlh->nlmsg_seq = rth.dump = ++rth.seq;

@ -358,6 +325,7 @@ static int ctrl_list(int cmd, int argc, char **argv)

 	ret = 0;
 ctrl_done:
+	free(answer);
 	rtnl_close(&rth);
 	return ret;
 }
@ -393,6 +361,8 @@ static int parse_ctrl(struct genl_util *a, int argc, char **argv)
 	    matches(*argv, "show") == 0 ||
 	    matches(*argv, "lst") == 0)
 		return ctrl_list(CTRL_CMD_UNSPEC, argc-1, argv+1);
+	if (matches(*argv, "policy") == 0)
+		return ctrl_list(CTRL_CMD_GETPOLICY, argc-1, argv+1);
 	if (matches(*argv, "help") == 0)
 		return usage();

--- a/genl/genl.c
+++ b/genl/genl.c
@ -13,7 +13,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <syslog.h>
 #include <fcntl.h>
 #include <dlfcn.h>
 #include <sys/socket.h>
@ -23,21 +22,19 @@
 #include <errno.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h> /* until we put our own header */
-#include "SNAPSHOT.h"
+#include "version.h"
 #include "utils.h"
 #include "genl_utils.h"

-int show_stats = 0;
-int show_details = 0;
-int show_raw = 0;
-int resolve_hosts = 0;
+int show_stats;
+int show_details;
+int show_raw;

 static void *BODY;
-static struct genl_util * genl_list;
+static struct genl_util *genl_list;


-static int print_nofopt(const struct sockaddr_nl *who, struct nlmsghdr *n,
-			void *arg)
+static int print_nofopt(struct nlmsghdr *n, void *arg)
 {
 	fprintf((FILE *) arg, "unknown genl type ..\n");
 	return 0;
@ -46,8 +43,9 @@ static int print_nofopt(const struct sockaddr_nl *who, struct nlmsghdr *n,
 static int parse_nofopt(struct genl_util *f, int argc, char **argv)
 {
 	if (argc) {
-		fprintf(stderr, "Unknown genl \"%s\", hence option \"%s\" "
-			"is unparsable\n", f->name, *argv);
+		fprintf(stderr,
+			"Unknown genl \"%s\", hence option \"%s\" is unparsable\n",
+			f->name, *argv);
 		return -1;
 	}

@ -100,9 +98,10 @@ static void usage(void) __attribute__((noreturn));

 static void usage(void)
 {
-	fprintf(stderr, "Usage: genl [ OPTIONS ] OBJECT | help }\n"
-	                "where  OBJECT := { ctrl etc }\n"
-	                "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] }\n");
+	fprintf(stderr,
+		"Usage: genl [ OPTIONS ] OBJECT [help] }\n"
+		"where  OBJECT := { ctrl etc }\n"
+		"       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -V[ersion] | -h[elp] }\n");
 	exit(-1);
 }

@ -119,24 +118,26 @@ int main(int argc, char **argv)
 		} else if (matches(argv[1], "-raw") == 0) {
 			++show_raw;
 		} else if (matches(argv[1], "-Version") == 0) {
-			printf("genl utility, iproute2-ss%s\n", SNAPSHOT);
+			printf("genl utility, iproute2-%s\n", version);
 			exit(0);
 		} else if (matches(argv[1], "-help") == 0) {
 			usage();
 		} else {
-			fprintf(stderr, "Option \"%s\" is unknown, try "
-				"\"genl -help\".\n", argv[1]);
+			fprintf(stderr,
+				"Option \"%s\" is unknown, try \"genl -help\".\n",
+				argv[1]);
 			exit(-1);
 		}
 		argc--;	argv++;
 	}

 	if (argc > 1) {
+		struct genl_util *a;
 		int ret;
-		struct genl_util *a = NULL;
+
 		a = get_genl_kind(argv[1]);
 		if (!a) {
-			fprintf(stderr,"bad genl %s\n", argv[1]);
+			fprintf(stderr, "bad genl %s\n", argv[1]);
 			exit(-1);
 		}

--- a/genl/genl_utils.h
+++ b/genl/genl_utils.h
@ -1,17 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _TC_UTIL_H_
 #define _TC_UTIL_H_ 1

+#include <linux/genetlink.h>
 #include "utils.h"
-#include "linux/genetlink.h"

-struct genl_util
-{
+struct genl_util {
 	struct  genl_util *next;
 	char	name[16];
 	int	(*parse_genlopt)(struct genl_util *fu, int argc, char **argv);
-	int	(*print_genlopt)(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
+	int	(*print_genlopt)(struct nlmsghdr *n, void *arg);
 };

-extern int genl_ctrl_resolve_family(const char *family);
-
 #endif
--- a/genl/static-syms.c
+++ b/genl/static-syms.c
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
 * This file creates a dummy version of dynamic loading
 * for environments where dynamic linking
--- a/include/SNAPSHOT.h
+++ b/include/SNAPSHOT.h
@ -1 +0,0 @@
-static const char SNAPSHOT[] = "170905";
--- a/include/bpf_api.h
+++ b/include/bpf_api.h
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __BPF_API__
 #define __BPF_API__

@ -18,6 +19,19 @@

 #include "bpf_elf.h"

+/** libbpf pin type. */
+enum libbpf_pin_type {
+	LIBBPF_PIN_NONE,
+	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+	LIBBPF_PIN_BY_NAME,
+};
+
+/** Type helper macros. */
+
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) typeof(val) *name
+#define __array(name, val) typeof(val) *name[]
+
 /** Misc macros. */

 #ifndef __stringify
--- a/include/bpf_elf.h
+++ b/include/bpf_elf.h
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __BPF_ELF__
 #define __BPF_ELF__

@ -40,4 +41,13 @@ struct bpf_elf_map {
 	__u32 inner_idx;
 };

+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)		\
+	struct ____btf_map_##name {				\
+		type_key key;					\
+		type_val value;					\
+	};							\
+	struct ____btf_map_##name				\
+	    __attribute__ ((section(".maps." #name), used))	\
+	    ____btf_map_##name = { }
+
 #endif /* __BPF_ELF__ */
--- a/include/bpf_scm.h
+++ b/include/bpf_scm.h
@ -1,8 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __BPF_SCM__
 #define __BPF_SCM__

 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/un.h>

 #include "utils.h"
 #include "bpf_elf.h"
--- a/include/bpf_util.h
+++ b/include/bpf_util.h
@ -14,6 +14,7 @@
 #define __BPF_UTIL__

 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/filter.h>
 #include <linux/magic.h>
 #include <linux/elf-em.h>
@ -56,13 +57,29 @@ struct bpf_cfg_ops {
 	void (*ebpf_cb)(void *nl, int fd, const char *annotation);
 };

+enum bpf_mode {
+	CBPF_BYTECODE,
+	CBPF_FILE,
+	EBPF_OBJECT,
+	EBPF_PINNED,
+	BPF_MODE_MAX,
+};
+
 struct bpf_cfg_in {
 	const char *object;
 	const char *section;
 	const char *uds;
+	enum bpf_prog_type type;
+	enum bpf_mode mode;
+	__u32 ifindex;
+	bool verbose;
 	int argc;
 	char **argv;
-	struct sock_filter *ops;
+	struct sock_filter opcodes[BPF_MAXINSNS];
+	union {
+		int n_opcodes;
+		int prog_fd;
+	};
 };

 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
@ -244,22 +261,29 @@ struct bpf_cfg_in {
 		.off   = 0,					\
 		.imm   = 0 })

-int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
-		     const struct bpf_cfg_ops *ops, void *nl);
+int bpf_parse_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops);
+int bpf_load_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops,
+		    void *nl);
+int bpf_parse_and_load_common(struct bpf_cfg_in *cfg,
+			      const struct bpf_cfg_ops *ops, void *nl);

 const char *bpf_prog_to_default_section(enum bpf_prog_type type);

 int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
 int bpf_trace_pipe(void);

-void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+void bpf_print_ops(struct rtattr *bpf_ops, __u16 len);

-int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
-		  size_t size_insns, const char *license, char *log,
-		  size_t size_log);
+int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns,
+		      size_t size_insns, const char *license, __u32 ifindex,
+		      char *log, size_t size_log);
+int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+		     size_t size_insns, const char *license, char *log,
+		     size_t size_log);

 int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type);
 int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type);
+int bpf_program_attach(int prog_fd, int target_fd, enum bpf_attach_type type);

 int bpf_dump_prog_info(FILE *f, uint32_t id);

@ -267,6 +291,16 @@ int bpf_dump_prog_info(FILE *f, uint32_t id);
 int bpf_send_map_fds(const char *path, const char *obj);
 int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
 		     unsigned int entries);
+#ifdef HAVE_LIBBPF
+int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg);
+int iproute2_bpf_fetch_ancillary(void);
+int iproute2_get_root_path(char *root_path, size_t len);
+bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname);
+bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap,
+			    struct bpf_elf_map *omap, char *omap_name);
+int iproute2_find_map_name_by_id(unsigned int map_id, char *name);
+int iproute2_load_libbpf(struct bpf_cfg_in *cfg);
+#endif /* HAVE_LIBBPF */
 #else
 static inline int bpf_send_map_fds(const char *path, const char *obj)
 {
@ -279,5 +313,15 @@ static inline int bpf_recv_map_fds(const char *path, int *fds,
 {
 	return -1;
 }
+#ifdef HAVE_LIBBPF
+static inline int iproute2_load_libbpf(struct bpf_cfg_in *cfg)
+{
+	fprintf(stderr, "No ELF library support compiled in.\n");
+	return -1;
+}
+#endif /* HAVE_LIBBPF */
 #endif /* HAVE_ELF */
+
+const char *get_libbpf_version(void);
+
 #endif /* __BPF_UTIL__ */
--- a/include/cg_map.h
+++ b/include/cg_map.h
@ -0,0 +1,6 @@
+#ifndef __CG_MAP_H__
+#define __CG_MAP_H__
+
+const char *cg_id_to_path(__u64 id);
+
+#endif /* __CG_MAP_H__ */
--- a/include/color.h
+++ b/include/color.h
@ -1,6 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __COLOR_H__
 #define __COLOR_H__ 1

+#include <stdbool.h>
+
 enum color_attr {
 	COLOR_IFNAME,
 	COLOR_MAC,
@ -8,11 +11,17 @@ enum color_attr {
 	COLOR_INET6,
 	COLOR_OPERSTATE_UP,
 	COLOR_OPERSTATE_DOWN,
-	COLOR_CLEAR
+	COLOR_NONE
 };

-void enable_color(void);
-void set_color_palette(void);
+enum color_opt {
+	COLOR_OPT_NEVER = 0,
+	COLOR_OPT_AUTO = 1,
+	COLOR_OPT_ALWAYS = 2
+};
+
+bool check_enable_color(int color, int json);
+bool matches_color(const char *arg, int *val);
 int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...);
 enum color_attr ifa_family_color(__u8 ifa_family);
 enum color_attr oper_state_color(__u8 state);
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Stub dlfcn implementation for systems that lack shared library support
 * but obviously can still reference compiled-in symbols.
--- a/include/ip6tables.h
+++ b/include/ip6tables.h
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _IP6TABLES_USER_H
 #define _IP6TABLES_USER_H

--- a/include/iptables.h
+++ b/include/iptables.h
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _IPTABLES_USER_H
 #define _IPTABLES_USER_H

@ -11,7 +12,7 @@ extern int do_command4(int argc, char *argv[], char **table,
 		      struct xtc_handle **handle, bool restore);
 extern int delete_chain4(const xt_chainlabel chain, int verbose,
 			struct xtc_handle *handle);
-extern int flush_entries4(const xt_chainlabel chain, int verbose, 
+extern int flush_entries4(const xt_chainlabel chain, int verbose,
 			struct xtc_handle *handle);
 extern int for_each_chain4(int (*fn)(const xt_chainlabel, int, struct xtc_handle *),
 		int verbose, int builtinstoo, struct xtc_handle *handle);
--- a/include/iptables/internal.h
+++ b/include/iptables/internal.h
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef IPTABLES_INTERNAL_H
 #define IPTABLES_INTERNAL_H 1

--- a/include/json_print.h
+++ b/include/json_print.h
@ -0,0 +1,107 @@
+/*
+ * json_print.h		"print regular or json output, based on json_writer".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Julien Fortin, <julien@cumulusnetworks.com>
+ */
+
+#ifndef _JSON_PRINT_H_
+#define _JSON_PRINT_H_
+
+#include "json_writer.h"
+#include "color.h"
+
+#define _IS_JSON_CONTEXT(type) (is_json_context() && (type & PRINT_JSON || type & PRINT_ANY))
+#define _IS_FP_CONTEXT(type)   (!is_json_context() && (type & PRINT_FP || type & PRINT_ANY))
+
+json_writer_t *get_json_writer(void);
+
+/*
+ * use:
+ *      - PRINT_ANY for context based output
+ *      - PRINT_FP for non json specific output
+ *      - PRINT_JSON for json specific output
+ */
+enum output_type {
+	PRINT_FP = 1,
+	PRINT_JSON = 2,
+	PRINT_ANY = 4,
+};
+
+void new_json_obj(int json);
+void delete_json_obj(void);
+void new_json_obj_plain(int json);
+void delete_json_obj_plain(void);
+
+bool is_json_context(void);
+
+void open_json_object(const char *str);
+void close_json_object(void);
+void open_json_array(enum output_type type, const char *delim);
+void close_json_array(enum output_type type, const char *delim);
+
+void print_nl(void);
+
+#define _PRINT_FUNC(type_name, type)					\
+	int print_color_##type_name(enum output_type t,			\
+				    enum color_attr color,		\
+				    const char *key,			\
+				    const char *fmt,			\
+				    type value);			\
+									\
+	static inline int print_##type_name(enum output_type t,		\
+					    const char *key,		\
+					    const char *fmt,		\
+					    type value)			\
+	{								\
+		return print_color_##type_name(t, COLOR_NONE, key, fmt,	\
+					       value);			\
+	}
+
+/* These functions return 0 if printing to a JSON context, number of
+ * characters printed otherwise (as calculated by printf(3)).
+ */
+_PRINT_FUNC(int, int)
+_PRINT_FUNC(s64, int64_t)
+_PRINT_FUNC(bool, bool)
+_PRINT_FUNC(on_off, bool)
+_PRINT_FUNC(null, const char*)
+_PRINT_FUNC(string, const char*)
+_PRINT_FUNC(uint, unsigned int)
+_PRINT_FUNC(size, __u32)
+_PRINT_FUNC(u64, uint64_t)
+_PRINT_FUNC(hhu, unsigned char)
+_PRINT_FUNC(hu, unsigned short)
+_PRINT_FUNC(hex, unsigned int)
+_PRINT_FUNC(0xhex, unsigned long long)
+_PRINT_FUNC(luint, unsigned long)
+_PRINT_FUNC(lluint, unsigned long long)
+_PRINT_FUNC(float, double)
+_PRINT_FUNC(tv, const struct timeval *)
+#undef _PRINT_FUNC
+
+#define _PRINT_NAME_VALUE_FUNC(type_name, type, format_char)		  \
+	void print_##type_name##_name_value(const char *name, type value) \
+
+_PRINT_NAME_VALUE_FUNC(uint, unsigned int, u);
+_PRINT_NAME_VALUE_FUNC(string, const char*, s);
+#undef _PRINT_NAME_VALUE_FUNC
+
+int print_color_rate(bool use_iec, enum output_type t, enum color_attr color,
+		     const char *key, const char *fmt, unsigned long long rate);
+
+static inline int print_rate(bool use_iec, enum output_type t,
+			     const char *key, const char *fmt,
+			     unsigned long long rate)
+{
+	return print_color_rate(use_iec, t, COLOR_NONE, key, fmt, rate);
+}
+
+/* A backdoor to the size formatter. Please use print_size() instead. */
+char *sprint_size(__u32 sz, char *buf);
+
+#endif /* _JSON_PRINT_H_ */
--- a/include/json_writer.h
+++ b/include/json_writer.h
@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
 /*
 * Simple streaming JSON writer
 *
 * This takes care of the annoying bits of JSON syntax like the commas
 * after elements
 *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
 * Authors:	Stephen Hemminger <stephen@networkplumber.org>
 */

@ -33,20 +29,39 @@ void jsonw_pretty(json_writer_t *self, bool on);
 void jsonw_name(json_writer_t *self, const char *name);

 /* Add value  */
+__attribute__((format(printf, 2, 3)))
+void jsonw_printf(json_writer_t *self, const char *fmt, ...);
 void jsonw_string(json_writer_t *self, const char *value);
 void jsonw_bool(json_writer_t *self, bool value);
 void jsonw_float(json_writer_t *self, double number);
-void jsonw_uint(json_writer_t *self, uint64_t number);
-void jsonw_int(json_writer_t *self, int64_t number);
+void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num);
+void jsonw_uint(json_writer_t *self, unsigned int number);
+void jsonw_u64(json_writer_t *self, uint64_t number);
+void jsonw_xint(json_writer_t *self, uint64_t number);
+void jsonw_hhu(json_writer_t *self, unsigned char num);
+void jsonw_hu(json_writer_t *self, unsigned short number);
+void jsonw_int(json_writer_t *self, int number);
+void jsonw_s64(json_writer_t *self, int64_t number);
 void jsonw_null(json_writer_t *self);
+void jsonw_luint(json_writer_t *self, unsigned long num);
+void jsonw_lluint(json_writer_t *self, unsigned long long num);

 /* Useful Combinations of name and value */
 void jsonw_string_field(json_writer_t *self, const char *prop, const char *val);
 void jsonw_bool_field(json_writer_t *self, const char *prop, bool value);
 void jsonw_float_field(json_writer_t *self, const char *prop, double num);
-void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num);
-void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num);
+void jsonw_uint_field(json_writer_t *self, const char *prop, unsigned int num);
+void jsonw_u64_field(json_writer_t *self, const char *prop, uint64_t num);
+void jsonw_xint_field(json_writer_t *self, const char *prop, uint64_t num);
+void jsonw_hhu_field(json_writer_t *self, const char *prop, unsigned char num);
+void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num);
+void jsonw_int_field(json_writer_t *self, const char *prop, int num);
+void jsonw_s64_field(json_writer_t *self, const char *prop, int64_t num);
 void jsonw_null_field(json_writer_t *self, const char *prop);
+void jsonw_luint_field(json_writer_t *self, const char *prop,
+			unsigned long num);
+void jsonw_lluint_field(json_writer_t *self, const char *prop,
+			unsigned long long num);

 /* Collections */
 void jsonw_start_object(json_writer_t *self);
--- a/include/libgenl.h
+++ b/include/libgenl.h
@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LIBGENL_H__
 #define __LIBGENL_H__

@ -20,8 +21,9 @@ struct {								\
 	},								\
 }

-extern int genl_resolve_family(struct rtnl_handle *grth, const char *family);
-extern int genl_init_handle(struct rtnl_handle *grth, const char *family,
-			    int *genl_family);
+int genl_add_mcast_grp(struct rtnl_handle *grth, __u16 genl_family, const char *group);
+int genl_resolve_family(struct rtnl_handle *grth, const char *family);
+int genl_init_handle(struct rtnl_handle *grth, const char *family,
+		     int *genl_family);

 #endif /* __LIBGENL_H__ */
--- a/Show More
+++ b/Show More