tc: flower: Fix buffer overflow on large labels

Buffer is 64bytes, but label printing can take 66bytes printing in hex, and will overflow when setting the string delimiter ('\0'). Fix that by increasing the print buffer size. Example of overflowing ct_label: ct_label 11111111111111111111111111111111/11111111111111111111111111111111 Fixes: 2fffb1c030 ("tc: flower: Add matching on conntrack info") Signed-off-by: Paul Blakey <paulb@nvidia.com> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
uapi: update to if_ether.h
2021-12-06 13:44:50 -08:00 · 2021-12-03 12:20:02 -08:00 · 2021-11-18 15:01:48 -08:00 · 2021-11-18 15:01:04 -08:00 · 2021-11-18 10:00:19 -08:00 · 2021-11-18 09:56:57 -08:00
517 changed files with 49763 additions and 11364 deletions
--- a/.clang-format
+++ b/.clang-format
@ -0,0 +1,130 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-format configuration file. Intended for clang-format >= 4.
+#
+# For more information, see:
+#
+#   Documentation/process/clang-format.rst
+#   https://clang.llvm.org/docs/ClangFormat.html
+#   https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+#
+---
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+#AlignEscapedNewlines: Left # Unknown to clang-format-4.0
+AlignOperands: true
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: false
+  AfterStruct: false
+  AfterUnion: false
+  #AfterExternBlock: false # Unknown to clang-format-5.0
+  BeforeCatch: false
+  BeforeElse: false
+  IndentBraces: false
+  #SplitEmptyFunction: true # Unknown to clang-format-4.0
+  #SplitEmptyRecord: true # Unknown to clang-format-4.0
+  #SplitEmptyNamespace: true # Unknown to clang-format-4.0
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: false
+#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+#CompactNamespaces: false # Unknown to clang-format-4.0
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 8
+ContinuationIndentWidth: 8
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+#FixNamespaceComments: false # Unknown to clang-format-4.0
+
+# Taken from:
+#   git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \
+#   | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$,  - '\1'," \
+#   | sort | uniq
+ForEachMacros:
+  - 'list_for_each_entry'
+  - 'list_for_each_entry_safe'
+  - 'mnl_attr_for_each_nested'
+  - 'hlist_for_each'
+  - 'hlist_for_each_safe'
+  - 'hlist_for_each_entry'
+
+#IncludeBlocks: Preserve # Unknown to clang-format-5.0
+IncludeCategories:
+  - Regex: '.*'
+    Priority: 1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+#IndentPPDirectives: None # Unknown to clang-format-5.0
+IndentWidth: 8
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: Inner
+#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0
+ObjCBlockIndentWidth: 8
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+
+# Taken from git's rules
+#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+PointerAlignment: Right
+ReflowComments: false
+SortIncludes: false
+#SortUsingDeclarations: false # Unknown to clang-format-4.0
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0
+#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0
+SpaceBeforeParens: ControlStatements
+#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+TabWidth: 8
+UseTab: Always
+...
--- a/.mailmap
+++ b/.mailmap
@ -0,0 +1,22 @@
+#
+# This list is used by git-shortlog to fix a few botched name translations
+# in the git archive, either because the author's full name was messed up
+# and/or not always written the same way, making contributions from the
+# same person appearing not to be so or badly displayed.
+#
+# Format
+#  Full name <goodaddress> <badaddress> 
+Steve Wise <larrystevenwise@gmail.com> <swise@opengridcomputing.com>
+Steve Wise <larrystevenwise@gmail.com> <swise@chelsio.com>
+
+Stephen Hemminger <stephen@networkplumber.org> <sthemmin@microsoft.com>
+Stephen Hemminger <stephen@networkplumber.org> <shemming@brocade.com>
+Stephen Hemminger <stephen@networkplumber.org> <stephen.hemminger@vyatta.com>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger@vyatta.com>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
+Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
+Stephen Hemminger <stephen@networkplumber.org> <osdl.org!shemminger>
+Stephen Hemminger <stephen@networkplumber.org> <osdl.net!shemminger>
+
+David Ahern <dsahern@gmail.com> <dsa@cumulusnetworks.com>
--- a/54
+++ b/54
@ -1,6 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Top level Makefile for iproute2

+-include config.mk
+
 ifeq ("$(origin V)", "command line")
 VERBOSE = $(V)
 endif
@ -13,7 +15,6 @@ MAKEFLAGS += --no-print-directory
 endif

 PREFIX?=/usr
-LIBDIR?=$(PREFIX)/lib
 SBINDIR?=/sbin
 CONFDIR?=/etc/iproute2
 NETNS_RUN_DIR?=/var/run/netns
@ -40,28 +41,31 @@ DEFINES+=-DCONFDIR=\"$(CONFDIR)\" \
         -DNETNS_RUN_DIR=\"$(NETNS_RUN_DIR)\" \
         -DNETNS_ETC_DIR=\"$(NETNS_ETC_DIR)\"

-#options for decnet
-ADDLIB+=dnet_ntop.o dnet_pton.o
+#options for AX.25
+ADDLIB+=ax25_ntop.o

-#options for ipx
-ADDLIB+=ipx_ntop.o ipx_pton.o
+#options for AX.25
+ADDLIB+=rose_ntop.o

 #options for mpls
 ADDLIB+=mpls_ntop.o mpls_pton.o

+#options for NETROM
+ADDLIB+=netrom_ntop.o
+
 CC := gcc
 HOSTCC ?= $(CC)
 DEFINES += -D_GNU_SOURCE
 # Turn on transparent support for LFS
 DEFINES += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-CCOPTS = -O2
+CCOPTS = -O2 -pipe
 WFLAGS := -Wall -Wstrict-prototypes  -Wmissing-prototypes
 WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2

 CFLAGS := $(WFLAGS) $(CCOPTS) -I../include -I../include/uapi $(DEFINES) $(CFLAGS)
 YACCFLAGS = -d -t -v

-SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma man
+SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma dcb man vdpa

 LIBNETLINK=../lib/libutil.a ../lib/libnetlink.a
 LDLIBS += $(LIBNETLINK)
@ -69,7 +73,9 @@ LDLIBS += $(LIBNETLINK)
 all: config.mk
 	@set -e; \
 	for i in $(SUBDIRS); \
-	do echo; echo $$i; $(MAKE) $(MFLAGS) -C $$i; done
+	do echo; echo $$i; $(MAKE) -C $$i; done
+
+.PHONY: clean clobber distclean check cscope version

 help:
 	@echo "Make Targets:"
@ -79,48 +85,52 @@ help:
 	@echo " install             - install binaries on local machine"
 	@echo " check               - run tests"
 	@echo " cscope              - build cscope database"
-	@echo " snapshot            - generate version number header"
+	@echo " version             - update version"
 	@echo ""
 	@echo "Make Arguments:"
 	@echo " V=[0|1]             - set build verbosity level"

 config.mk:
-	sh configure $(KERNEL_INCLUDE)
+	@if [ ! -f config.mk -o configure -nt config.mk ]; then \
+		sh configure $(KERNEL_INCLUDE); \
+	fi

 install: all
 	install -m 0755 -d $(DESTDIR)$(SBINDIR)
 	install -m 0755 -d $(DESTDIR)$(CONFDIR)
 	install -m 0755 -d $(DESTDIR)$(ARPDDIR)
 	install -m 0755 -d $(DESTDIR)$(HDRDIR)
-	install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples
-	install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv
-	install -m 0644 README.iproute2+tc $(shell find examples -maxdepth 1 -type f) \
-		$(DESTDIR)$(DOCDIR)/examples
-	install -m 0644 $(shell find examples/diffserv -maxdepth 1 -type f) \
-		$(DESTDIR)$(DOCDIR)/examples/diffserv
 	@for i in $(SUBDIRS);  do $(MAKE) -C $$i install; done
 	install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
 	install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR)
 	install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR)
+	install -m 0644 bash-completion/devlink $(DESTDIR)$(BASH_COMPDIR)
 	install -m 0644 include/bpf_elf.h $(DESTDIR)$(HDRDIR)

-snapshot:
-	echo "static const char SNAPSHOT[] = \""`date +%y%m%d`"\";" \
-		> include/SNAPSHOT.h
+version:
+	echo "static const char version[] = \""`git describe --tags --long`"\";" \
+		> include/version.h

 clean:
 	@for i in $(SUBDIRS) testsuite; \
-	do $(MAKE) $(MFLAGS) -C $$i clean; done
+	do $(MAKE) -C $$i clean; done

 clobber:
 	touch config.mk
-	$(MAKE) $(MFLAGS) clean
+	$(MAKE) clean
 	rm -f config.mk cscope.*

 distclean: clobber

 check: all
-	cd testsuite && $(MAKE) && $(MAKE) alltests
+	$(MAKE) -C testsuite
+	$(MAKE) -C testsuite alltests
+	@if command -v man >/dev/null 2>&1; then \
+		echo "Checking manpages for syntax errors..."; \
+		$(MAKE) -C man check; \
+	else \
+		echo "man not installed, skipping checks for syntax errors."; \
+	fi

 cscope:
 	cscope -b -q -R -Iinclude -sip -slib -smisc -snetem -stc
--- a/15
+++ b/15
@ -28,17 +28,12 @@ The makefile will automatically build a config.mk file which
 contains definitions of libraries that may or may not be available
 on the system such as: ATM, ELF, MNL, and SELINUX.

-3. To make documentation, cd to doc/ directory , then
-   look at start of Makefile and set correct values for
-   PAGESIZE=a4		, ie: a4 , letter ...	(string)
-   PAGESPERPAGE=2	, ie: 1 , 2 ...		(numeric)
-   and make there. It assumes, that latex, dvips and psnup
-   are in your path.
+3. include/uapi

-4. This package includes matching sanitized kernel headers because
-   the build environment may not have up to date versions. See Makefile
-   if you have special requirements and need to point at different
-   kernel include files.
+This package includes matching sanitized kernel headers because
+the build environment may not have up to date versions. See Makefile
+if you have special requirements and need to point at different
+kernel include files.

 Stephen Hemminger
 stephen@networkplumber.org
--- a/README.decnet
+++ b/README.decnet
@ -1,33 +0,0 @@
-
-Here are a few quick points about DECnet support...
-
- o iproute2 is the tool of choice for configuring the DECnet support for
-   Linux. For many features, it is the only tool which can be used to
-   configure them.
-
- o No name resolution is available as yet, all addresses must be
-   entered numerically.
-
- o Remember to set the hardware address of the interface using: 
-
-   ip link set ethX address xx:xx:xx:xx:xx:xx
-      (where xx:xx:xx:xx:xx:xx is the MAC address for your DECnet node
-       address)
-
-   if your Ethernet card won't listen to more than one unicast
-   mac address at once. If the Linux DECnet stack doesn't talk to
-   any other DECnet nodes, then check this with tcpdump and if its
-   a problem, change the mac address (but do this _before_ starting
-   any other network protocol on the interface)
-
- o Whilst you can use ip addr add to add more than one DECnet address to an
-   interface, don't expect addresses which are not the same as the
-   kernels node address to work properly with 2.4 kernels. This should
-   be fine with 2.6 kernels as the routing code has been extensively
-   modified and improved.
-
- o The DECnet support is currently self contained. It does not depend on
-   the libdnet library.
-
-Steve Whitehouse <steve@chygwyn.com>
-
--- a/README.distribution
+++ b/README.distribution
@ -1,95 +0,0 @@
-I. About the distribution tables
-
-The table used for "synthesizing" the distribution is essentially a scaled,
-translated, inverse to the cumulative distribution function.
-
-Here's how to think about it: Let F() be the cumulative distribution
-function for a probability distribution X.  We'll assume we've scaled
-things so that X has mean 0 and standard deviation 1, though that's not
-so important here.  Then:
-
-	F(x) = P(X <= x) = \int_{-inf}^x f
-
-where f is the probability density function.
-
-F is monotonically increasing, so has an inverse function G, with range
-0 to 1.  Here, G(t) = the x such that P(X <= x) = t.  (In general, G may
-have singularities if X has point masses, i.e., points x such that
-P(X = x) > 0.)
-
-Now we create a tabular representation of G as follows:  Choose some table
-size N, and for the ith entry, put in G(i/N).  Let's call this table T.
-
-The claim now is, I can create a (discrete) random variable Y whose
-distribution has the same approximate "shape" as X, simply by letting
-Y = T(U), where U is a discrete uniform random variable with range 1 to N.
-To see this, it's enough to show that Y's cumulative distribution function,
-(let's call it H), is a discrete approximation to F.  But
-
-	H(x) = P(Y <= x)
-	     = (# of entries in T <= x) / N   -- as Y chosen uniformly from T
-	     = i/N, where i is the largest integer such that G(i/N) <= x
-	     = i/N, where i is the largest integer such that i/N <= F(x)
-	     		-- since G and F are inverse functions (and F is
-	     		   increasing)
-	     = floor(N*F(x))/N
-
-as desired.
-
-II. How to create distribution tables (in theory)
-
-How can we create this table in practice? In some cases, F may have a
-simple expression which allows evaluating its inverse directly.  The
-Pareto distribution is one example of this.  In other cases, and
-especially for matching an experimentally observed distribution, it's
-easiest simply to create a table for F and "invert" it.  Here, we give
-a concrete example, namely how the new "experimental" distribution was
-created.
-
-1. Collect enough data points to characterize the distribution.  Here, I
-collected 25,000 "ping" roundtrip times to a "distant" point (time.nist.gov).
-That's far more data than is really necessary, but it was fairly painless to
-collect it, so...
-
-2. Normalize the data so that it has mean 0 and standard deviation 1.
-
-3. Determine the cumulative distribution.  The code I wrote creates a table
-covering the range -10 to +10, with granularity .00005.  Obviously, this
-is absurdly over-precise, but since it's a one-time only computation, I
-figured it hardly mattered.
-
-4. Invert the table: for each table entry F(x) = y, make the y*TABLESIZE
-(here, 4096) entry be x*TABLEFACTOR (here, 8192).  This creates a table
-for the ("normalized") inverse of size TABLESIZE, covering its domain 0
-to 1 with granularity 1/TABLESIZE.  Note that even with the granularity
-used in creating the table for F, it's possible not all the entries in
-the table for G will be filled in.  So, make a pass through the
-inverse's table, filling in any missing entries by linear interpolation.
-
-III. How to create distribution tables (in practice)
-
-If you want to do all this yourself, I've provided several tools to help:
-
-1. maketable does the steps 2-4 above, and then generates the appropriate
-header file.  So if you have your own time distribution, you can generate
-the header simply by:
-
-	maketable < time.values > header.h
-
-2. As explained in the other README file, the somewhat sleazy way I have
-of generating correlated values needs correction.  You can generate your
-own correction tables by compiling makesigtable and makemutable with
-your header file.  Check the Makefile to see how this is done.
-
-3. Warning: maketable, makesigtable and especially makemutable do
-enormous amounts of floating point arithmetic.  Don't try running
-these on an old 486.  (NIST Net itself will run fine on such a
-system, since in operation, it just needs to do a few simple integral
-calculations.  But getting there takes some work.)
-
-4. The tables produced are all normalized for mean 0 and standard
-deviation 1.  How do you know what values to use for real?  Here, I've
-provided a simple "stats" utility.  Give it a series of floating point
-values, and it will return their mean (mu), standard deviation (sigma),
-and correlation coefficient (rho).  You can then plug these values
-directly into NIST Net.
--- a/README.iproute2+tc
+++ b/README.iproute2+tc
@ -1,123 +0,0 @@
-iproute2+tc*
-
-It's the first release of Linux traffic control engine.
-
-
-NOTES.
-* csz scheduler is inoperational at the moment, and probably
-  never will be repaired but replaced with h-pfq scheduler.
-* To use "fw" classifier you will need ipfwchains patch.
-* No manual available. Ask me, if you have problems (only try to guess
-  answer yourself at first 8)).
-
-
-Micro-manual how to start it the first time
-------------------------------------------
-
-A. Attach CBQ to eth1:
-
-tc qdisc add dev eth1 root handle 1: cbq bandwidth 10Mbit allot 1514 cell 8 \
-avpkt 1000 mpu 64
-
-B. Add root class:
-
-tc class add dev eth1 parent 1:0 classid 1:1 cbq bandwidth 10Mbit rate 10Mbit \
-allot 1514 cell 8 weight 1Mbit prio 8 maxburst 20 avpkt 1000
-
-C. Add default interactive class:
-
-tc class add dev eth1 parent 1:1 classid 1:2 cbq bandwidth 10Mbit rate 1Mbit \
-allot 1514 cell 8 weight 100Kbit prio 3 maxburst 20 avpkt 1000 split 1:0 \
-defmap c0
-
-D. Add default class:
-
-tc class add dev eth1 parent 1:1 classid 1:3 cbq bandwidth 10Mbit rate 8Mbit \
-allot 1514 cell 8 weight 800Kbit prio 7 maxburst 20 avpkt 1000 split 1:0 \
-defmap 3f
-
-etc. etc. etc. Well, it is enough to start 8) The rest can be guessed 8)
-Look also at more elaborated example, ready to start rsvpd,
-in rsvp/cbqinit.eth1.
-
-
-Terminology and advices about setting CBQ parameters may be found in Sally Floyd
-papers. 
-
-
-Pairs X:Y are class handles, X:0 are qdisc handles.
-weight should be proportional to rate for leaf classes
-(I repeated it ten times less, but it is not necessary)
-
-defmap is bitmap of logical priorities served by this class.
-
-E. Another qdiscs are simpler. F.e. let's join TBF on class 1:2
-
-tc qdisc add dev eth1 parent 1:2 tbf rate 64Kbit buffer 5Kb/8 limit 10Kb
-
-F. Look at all that we created:
-
-tc qdisc ls dev eth1
-tc class ls dev eth1
-
-G. Install "route" classifier on root of cbq and map destination from realm
-1 to class 1:2
-
-tc filter add dev eth1 parent 1:0 protocol ip prio 100 route to 1 classid 1:2
-
-H. Assign routes to 10.11.12.0/24 to realm 1
-
-ip route add 10.11.12.0/24 dev eth1 via whatever realm 1
-
-etc. The same thing can be made with rules.
-I still did not test ipchains, but they should work too.
-
-
-Setup and code example of BPF classifier and action can be found under
-examples/bpf/, which should explain everything for getting started.
-
-
-Setup of rsvp and u32 classifiers is more hairy.
-If you read RSVP specs, you will understand how rsvp classifier
-works easily. What's about u32... That's example:
-
-
-#! /bin/sh
-
-TC=/home/root/tc
-
-# Setup classifier root on eth1 root (it is cbq)
-$TC filter add dev eth1 parent 1:0 prio 5 protocol ip u32
-
-# Create hash table of 256 slots with ID 1:
-$TC filter add dev eth1 parent 1:0 prio 5 handle 1: u32 divisor 256
-
-# Add to 6th slot of hash table rule to select tcp/telnet to 193.233.7.75
-# direct it to class 1:4 and prescribe to fall to best effort,
-# if traffic violate TBF (32kbit,5K)
-$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:6: \
-	match ip dst 193.233.7.75 \
-	match tcp dst 0x17 0xffff \
-	flowid 1:4 \
-	police rate 32kbit buffer 5kb/8 mpu 64 mtu 1514 index 1
-
-# Add to 1th slot of hash table rule to select icmp to 193.233.7.75
-# direct it to class 1:4 and prescribe to fall to best effort,
-# if traffic violate TBF (10kbit,5K)
-$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:: \
-	sample ip protocol 1 0xff \
-	match ip dst 193.233.7.75 \
-	flowid 1:4 \
-	police rate 10kbit buffer 5kb/8 mpu 64 mtu 1514 index 2
-
-# Lookup hash table, if it is not fragmented frame
-# Use protocol as hash key
-$TC filter add dev eth1 parent 1:0 prio 5 handle ::1 u32 ht 800:: \
-	match ip nofrag \
-	offset mask 0x0F00 shift 6 \
-	hashkey mask 0x00ff0000 at 8 \
-	link 1:
-
-
-Alexey Kuznetsov
-kuznet@ms2.inr.ac.ru
--- a/README.lnstat
+++ b/README.lnstat
@ -1,81 +0,0 @@
-lnstat - linux networking statistics
-(C) 2004 Harald Welte <laforge@gnumonks.org
-======================================================================
-
-This tool is a generalized and more feature-complete replacement for the old
-'rtstat' program.
-
-In addition to routing cache statistics, it supports any kind of statistics
-the linux kernel exports via a file in /proc/net/stat.  In a stock 2.6.9
-kernel, this is 
-	per-protocol neighbour cache statistics 
-		(ipv4, ipv6, atm, decnet)
-	routing cache statistics
-		(ipv4)
-	connection tracking statistics
-		(ipv4)
-
-Please note that lnstat will adopt to any additional statistics that might be
-added to the kernel at some later point
-
-I personally always like examples more than any reference documentation, so I
-list the following examples.  If somebody wants to do a manpage, feel free
-to send me a patch :)
-
-EXAMPLES:
-
-In order to get a list of supported statistics files, you can run
-
-	lnstat -d
-
-It will display something like
- 
-/proc/net/stat/arp_cache:
-         1: entries
-         2: allocs
-         3: destroys
-[...]
-/proc/net/stat/rt_cache:
-         1: entries
-         2: in_hit
-         3: in_slow_tot
-
-You can now select the files/keys you are interested by something like
-
-	lnstat -k arp_cache:entries,rt_cache:in_hit,arp_cache:destroys
-
-arp_cach|rt_cache|arp_cach|
- entries|  in_hit|destroys|
-       6|       6|       0|
-       6|       0|       0|
-       6|       2|       0|
-
-
-You can specify the interval (e.g. 10 seconds) by:
-	
-	lnstat -i 10
-
-You can specify to only use one particular statistics file:
-
-	lnstat -f ip_conntrack
-
-You can specify individual field widths 
-
-	lnstat -k arp_cache:entries,rt_cache:entries -w 20,8
-
-You can specify not to print a header at all
-	
-	lnstat -s 0
-
-You can specify to print a header only at start of the program
-
-	lnstat -s 1
-
-You can specify to print a header at start and every 20 lines:
-
-	lnstat -s 20
-
-You can specify the number of samples you want to take (e.g. 5):
-	
-	lnstat -c 5
-
--- a/bash-completion/devlink
+++ b/bash-completion/devlink
--- a/bash-completion/tc
+++ b/bash-completion/tc
@ -3,8 +3,8 @@
 # Copyright 2016 Quentin Monnet <quentin.monnet@6wind.com>

 QDISC_KIND=' choke codel bfifo pfifo pfifo_head_drop fq fq_codel gred hhf \
-            mqprio multiq netem pfifo_fast pie red rr sfb sfq tbf atm cbq drr \
-            dsmark hfsc htb prio qfq '
+            mqprio multiq netem pfifo_fast pie fq_pie red rr sfb sfq tbf atm \
+            cbq drr dsmark hfsc htb prio qfq '
 FILTER_KIND=' basic bpf cgroup flow flower fw route rsvp tcindex u32 matchall '
 ACTION_KIND=' gact mirred bpf sample '

@ -302,7 +302,7 @@ _tc_qdisc_options()
            ;;
        gred)
            _tc_once_attr 'setup vqs default grio vq prio limit min max avpkt \
-                burst probability bandwidth'
+                burst probability bandwidth ecn harddrop'
            return 0
            ;;
        hhf)
@ -323,6 +323,15 @@ _tc_qdisc_options()
            _tc_once_attr 'limit target tupdate alpha beta'
            _tc_one_of_list 'bytemode nobytemode'
            _tc_one_of_list 'ecn noecn'
+            _tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
+            return 0
+            ;;
+        fq_pie)
+            _tc_once_attr 'limit flows target tupdate \
+                alpha beta quantum memory_limit ecn_prob'
+            _tc_one_of_list 'ecn noecn'
+            _tc_one_of_list 'bytemode nobytemode'
+            _tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
            return 0
            ;;
        red)
--- a/bridge/br_common.h
+++ b/bridge/br_common.h
@ -8,8 +8,13 @@

 void print_vlan_info(struct rtattr *tb, int ifindex);
 int print_linkinfo(struct nlmsghdr *n, void *arg);
+int print_mdb_mon(struct nlmsghdr *n, void *arg);
 int print_fdb(struct nlmsghdr *n, void *arg);
-int print_mdb(struct nlmsghdr *n, void *arg);
+void print_stp_state(__u8 state);
+int parse_stp_state(const char *arg);
+int print_vlan_rtm(struct nlmsghdr *n, void *arg, bool monitor,
+		   bool global_only);
+void br_print_router_port_stats(struct rtattr *pattr);

 int do_fdb(int argc, char **argv);
 int do_mdb(int argc, char **argv);
--- a/bridge/bridge.c
+++ b/bridge/bridge.c
@ -12,7 +12,7 @@
 #include <string.h>
 #include <errno.h>

-#include "SNAPSHOT.h"
+#include "version.h"
 #include "utils.h"
 #include "br_common.h"
 #include "namespace.h"
@ -37,10 +37,10 @@ static void usage(void)
 	fprintf(stderr,
 "Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n"
 "       bridge [ -force ] -batch filename\n"
-"where	OBJECT := { link | fdb | mdb | vlan | monitor }\n"
-"	OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n"
-"		     -o[neline] | -t[imestamp] | -n[etns] name |\n"
-"		     -c[ompressvlans] -color -p[retty] -j[son] }\n");
+"where  OBJECT := { link | fdb | mdb | vlan | monitor }\n"
+"       OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n"
+"                    -o[neline] | -t[imestamp] | -n[etns] name |\n"
+"                    -c[ompressvlans] -color -p[retty] -j[son] }\n");
 	exit(-1);
 }

@ -77,45 +77,23 @@ static int do_cmd(const char *argv0, int argc, char **argv)
 	return -1;
 }

+static int br_batch_cmd(int argc, char *argv[], void *data)
+{
+	return do_cmd(argv[0], argc, argv);
+}
+
 static int batch(const char *name)
 {
-	char *line = NULL;
-	size_t len = 0;
-	int ret = EXIT_SUCCESS;
-
-	if (name && strcmp(name, "-") != 0) {
-		if (freopen(name, "r", stdin) == NULL) {
-			fprintf(stderr,
-				"Cannot open file \"%s\" for reading: %s\n",
-				name, strerror(errno));
-			return EXIT_FAILURE;
-		}
-	}
+	int ret;

 	if (rtnl_open(&rth, 0) < 0) {
 		fprintf(stderr, "Cannot open rtnetlink\n");
 		return EXIT_FAILURE;
 	}

-	cmdlineno = 0;
-	while (getcmdline(&line, &len, stdin) != -1) {
-		char *largv[100];
-		int largc;
+	rtnl_set_strict_dump(&rth);

-		largc = makeargs(line, largv, 100);
-		if (largc == 0)
-			continue;       /* blank line */
-
-		if (do_cmd(largv[0], largc, largv)) {
-			fprintf(stderr, "Command failed %s:%d\n",
-				name, cmdlineno);
-			ret = EXIT_FAILURE;
-			if (!force)
-				break;
-		}
-	}
-	if (line)
-		free(line);
+	ret = do_batch(name, force, br_batch_cmd, NULL);

 	rtnl_close(&rth);
 	return ret;
@ -139,7 +117,7 @@ main(int argc, char **argv)
 		if (matches(opt, "-help") == 0) {
 			usage();
 		} else if (matches(opt, "-Version") == 0) {
-			printf("bridge utility, 0.0\n");
+			printf("bridge utility, %s\n", version);
 			exit(0);
 		} else if (matches(opt, "-stats") == 0 ||
 			   matches(opt, "-statistics") == 0) {
@ -171,9 +149,9 @@ main(int argc, char **argv)
 			NEXT_ARG();
 			if (netns_switch(argv[1]))
 				exit(-1);
+		} else if (matches_color(opt, &color)) {
 		} else if (matches(opt, "-compressvlans") == 0) {
 			++compress_vlans;
-		} else if (matches_color(opt, &color)) {
 		} else if (matches(opt, "-force") == 0) {
 			++force;
 		} else if (matches(opt, "-json") == 0) {
@ -205,6 +183,8 @@ main(int argc, char **argv)
 	if (rtnl_open(&rth, 0) < 0)
 		exit(1);

+	rtnl_set_strict_dump(&rth);
+
 	if (argc > 1)
 		return do_cmd(argv[1], argc-1, argv+1);

--- a/bridge/fdb.c
+++ b/bridge/fdb.c
@ -30,16 +30,21 @@
 #include "rt_names.h"
 #include "utils.h"

-static unsigned int filter_index, filter_vlan, filter_state;
+static unsigned int filter_index, filter_dynamic, filter_master,
+	filter_state, filter_vlan;

 static void usage(void)
 {
 	fprintf(stderr,
 		"Usage: bridge fdb { add | append | del | replace } ADDR dev DEV\n"
 		"              [ self ] [ master ] [ use ] [ router ] [ extern_learn ]\n"
-		"              [ sticky ] [ local | static | dynamic ] [ dst IPADDR ]\n"
-		"              [ vlan VID ] [ port PORT] [ vni VNI ] [ via DEV ]\n"
-		"       bridge fdb [ show [ br BRDEV ] [ brport DEV ] [ vlan VID ] [ state STATE ] ]\n");
+		"              [ sticky ] [ local | static | dynamic ] [ vlan VID ]\n"
+		"              { [ dst IPADDR ] [ port PORT] [ vni VNI ] | [ nhid NHID ] }\n"
+		"	       [ via DEV ] [ src_vni VNI ]\n"
+		"       bridge fdb [ show [ br BRDEV ] [ brport DEV ] [ vlan VID ]\n"
+		"              [ state STATE ] [ dynamic ] ]\n"
+		"       bridge fdb get [ to ] LLADDR [ br BRDEV ] { brport | dev } DEV\n"
+		"              [ vlan VID ] [ vni VNI ] [ self ] [ master ] [ dynamic ]\n");
 	exit(-1);
 }

@ -59,7 +64,10 @@ static const char *state_n2a(unsigned int s)
 	if (s & NUD_REACHABLE)
 		return "";

-	sprintf(buf, "state=%#x", s);
+	if (is_json_context())
+		sprintf(buf, "%#x", s);
+	else
+		sprintf(buf, "state=%#x", s);
 	return buf;
 }

@ -164,6 +172,9 @@ int print_fdb(struct nlmsghdr *n, void *arg)
 	if (filter_vlan && filter_vlan != vid)
 		return 0;

+	if (filter_dynamic && (r->ndm_state & NUD_PERMANENT))
+		return 0;
+
 	open_json_object(NULL);
 	if (n->nlmsg_type == RTM_DELNEIGH)
 		print_bool(PRINT_ANY, "deleted", "Deleted ", true);
@ -181,10 +192,13 @@ int print_fdb(struct nlmsghdr *n, void *arg)
 				   "mac", "%s ", lladdr);
 	}

-	if (!filter_index && r->ndm_ifindex)
+	if (!filter_index && r->ndm_ifindex) {
+		print_string(PRINT_FP, NULL, "dev ", NULL);
+
 		print_color_string(PRINT_ANY, COLOR_IFNAME,
-				   "ifname", "dev %s ",
+				   "ifname", "%s ",
 				   ll_index_to_name(r->ndm_ifindex));
+	}

 	if (tb[NDA_DST]) {
 		int family = AF_INET;
@ -197,9 +211,11 @@ int print_fdb(struct nlmsghdr *n, void *arg)
 				  RTA_PAYLOAD(tb[NDA_DST]),
 				  RTA_DATA(tb[NDA_DST]));

+		print_string(PRINT_FP, NULL, "dst ", NULL);
+
 		print_color_string(PRINT_ANY,
 				   ifa_family_color(family),
-				    "dst", "dst %s ", dst);
+				   "dst", "%s ", dst);
 	}

 	if (vid)
@ -234,6 +250,10 @@ int print_fdb(struct nlmsghdr *n, void *arg)
 					   ll_index_to_name(ifindex));
 	}

+	if (tb[NDA_NH_ID])
+		print_uint(PRINT_ANY, "nhid", "nhid %u ",
+			   rta_getattr_u32(tb[NDA_NH_ID]));
+
 	if (tb[NDA_LINK_NETNSID])
 		print_uint(PRINT_ANY,
 				 "linkNetNsId", "link-netnsid %d ",
@ -256,20 +276,49 @@ int print_fdb(struct nlmsghdr *n, void *arg)
 	return 0;
 }

+static int fdb_linkdump_filter(struct nlmsghdr *nlh, int reqlen)
+{
+	int err;
+
+	if (filter_index) {
+		struct ifinfomsg *ifm = NLMSG_DATA(nlh);
+
+		ifm->ifi_index = filter_index;
+	}
+
+	if (filter_master) {
+		err = addattr32(nlh, reqlen, IFLA_MASTER, filter_master);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int fdb_dump_filter(struct nlmsghdr *nlh, int reqlen)
+{
+	int err;
+
+	if (filter_index) {
+		struct ndmsg *ndm = NLMSG_DATA(nlh);
+
+		ndm->ndm_ifindex = filter_index;
+	}
+
+	if (filter_master) {
+		err = addattr32(nlh, reqlen, NDA_MASTER, filter_master);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int fdb_show(int argc, char **argv)
 {
-	struct {
-		struct nlmsghdr	n;
-		struct ifinfomsg	ifm;
-		char			buf[256];
-	} req = {
-		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
-		.ifm.ifi_family = PF_BRIDGE,
-	};
-
 	char *filter_dev = NULL;
 	char *br = NULL;
-	int msg_size = sizeof(struct ifinfomsg);
+	int rc;

 	while (argc > 0) {
 		if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
@ -290,6 +339,8 @@ static int fdb_show(int argc, char **argv)
 			if (state_a2n(&state, *argv))
 				invarg("invalid state", *argv);
 			filter_state |= state;
+		} else if (strcmp(*argv, "dynamic") == 0) {
+			filter_dynamic = 1;
 		} else {
 			if (matches(*argv, "help") == 0)
 				usage();
@ -304,8 +355,7 @@ static int fdb_show(int argc, char **argv)
 			fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
 			return -1;
 		}
-		addattr32(&req.n, sizeof(req), IFLA_MASTER, br_ifindex);
-		msg_size += RTA_LENGTH(4);
+		filter_master = br_ifindex;
 	}

 	/*we'll keep around filter_dev for older kernels */
@ -313,10 +363,13 @@ static int fdb_show(int argc, char **argv)
 		filter_index = ll_name_to_index(filter_dev);
 		if (!filter_index)
 			return nodev(filter_dev);
-		req.ifm.ifi_index = filter_index;
 	}

-	if (rtnl_dump_request(&rth, RTM_GETNEIGH, &req.ifm, msg_size) < 0) {
+	if (rth.flags & RTNL_HANDLE_F_STRICT_CHK)
+		rc = rtnl_neighdump_req(&rth, PF_BRIDGE, fdb_dump_filter);
+	else
+		rc = rtnl_fdb_linkdump_req_filter_fn(&rth, fdb_linkdump_filter);
+	if (rc < 0) {
 		perror("Cannot send dump request");
 		exit(1);
 	}
@ -352,9 +405,11 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 	inet_prefix dst;
 	unsigned long port = 0;
 	unsigned long vni = ~0;
+	unsigned long src_vni = ~0;
 	unsigned int via = 0;
 	char *endptr;
 	short vid = -1;
+	__u32 nhid = 0;

 	while (argc > 0) {
 		if (strcmp(*argv, "dev") == 0) {
@ -366,6 +421,10 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 				duparg2("dst", *argv);
 			get_addr(&dst, *argv, preferred_family);
 			dst_ok = 1;
+		} else if (strcmp(*argv, "nhid") == 0) {
+			NEXT_ARG();
+			if (get_u32(&nhid, *argv, 0))
+				invarg("\"id\" value is invalid\n", *argv);
 		} else if (strcmp(*argv, "port") == 0) {

 			NEXT_ARG();
@ -385,6 +444,12 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 			if ((endptr && *endptr) ||
 			    (vni >> 24) || vni == ULONG_MAX)
 				invarg("invalid VNI\n", *argv);
+		} else if (strcmp(*argv, "src_vni") == 0) {
+			NEXT_ARG();
+			src_vni = strtoul(*argv, &endptr, 0);
+			if ((endptr && *endptr) ||
+			    (src_vni >> 24) || src_vni == ULONG_MAX)
+				invarg("invalid src VNI\n", *argv);
 		} else if (strcmp(*argv, "via") == 0) {
 			NEXT_ARG();
 			via = ll_name_to_index(*argv);
@ -434,6 +499,11 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 		return -1;
 	}

+	if (nhid && (dst_ok || port || vni != ~0)) {
+		fprintf(stderr, "dst, port, vni are mutually exclusive with nhid\n");
+		return -1;
+	}
+
 	/* Assume self */
 	if (!(req.ndm.ndm_flags&(NTF_SELF|NTF_MASTER)))
 		req.ndm.ndm_flags |= NTF_SELF;
@ -455,6 +525,8 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)

 	if (vid >= 0)
 		addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
+	if (nhid > 0)
+		addattr32(&req.n, sizeof(req), NDA_NH_ID, nhid);

 	if (port) {
 		unsigned short dport;
@ -464,6 +536,8 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 	}
 	if (vni != ~0)
 		addattr32(&req.n, sizeof(req), NDA_VNI, vni);
+	if (src_vni != ~0)
+		addattr32(&req.n, sizeof(req), NDA_SRC_VNI, src_vni);
 	if (via)
 		addattr32(&req.n, sizeof(req), NDA_IFINDEX, via);

@ -477,6 +551,121 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
 	return 0;
 }

+static int fdb_get(int argc, char **argv)
+{
+	struct {
+		struct nlmsghdr	n;
+		struct ndmsg		ndm;
+		char			buf[1024];
+	} req = {
+		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
+		.n.nlmsg_flags = NLM_F_REQUEST,
+		.n.nlmsg_type = RTM_GETNEIGH,
+		.ndm.ndm_family = AF_BRIDGE,
+	};
+	char  *d = NULL, *br = NULL;
+	struct nlmsghdr *answer;
+	unsigned long vni = ~0;
+	char abuf[ETH_ALEN];
+	int br_ifindex = 0;
+	char *addr = NULL;
+	short vlan = -1;
+	char *endptr;
+
+	while (argc > 0) {
+		if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
+			NEXT_ARG();
+			d = *argv;
+		} else if (strcmp(*argv, "br") == 0) {
+			NEXT_ARG();
+			br = *argv;
+		} else if (strcmp(*argv, "dev") == 0) {
+			NEXT_ARG();
+			d = *argv;
+		} else if (strcmp(*argv, "vni") == 0) {
+			NEXT_ARG();
+			vni = strtoul(*argv, &endptr, 0);
+			if ((endptr && *endptr) ||
+			    (vni >> 24) || vni == ULONG_MAX)
+				invarg("invalid VNI\n", *argv);
+		} else if (strcmp(*argv, "self") == 0) {
+			req.ndm.ndm_flags |= NTF_SELF;
+		} else if (matches(*argv, "master") == 0) {
+			req.ndm.ndm_flags |= NTF_MASTER;
+		} else if (matches(*argv, "vlan") == 0) {
+			if (vlan >= 0)
+				duparg2("vlan", *argv);
+			NEXT_ARG();
+			vlan = atoi(*argv);
+		} else if (matches(*argv, "dynamic") == 0) {
+			filter_dynamic = 1;
+		} else {
+			if (strcmp(*argv, "to") == 0)
+				NEXT_ARG();
+
+			if (matches(*argv, "help") == 0)
+				usage();
+			if (addr)
+				duparg2("to", *argv);
+			addr = *argv;
+		}
+		argc--; argv++;
+	}
+
+	if ((d == NULL && br == NULL) || addr == NULL) {
+		fprintf(stderr, "Device or master and address are required arguments.\n");
+		return -1;
+	}
+
+	if (sscanf(addr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+		   abuf, abuf+1, abuf+2,
+		   abuf+3, abuf+4, abuf+5) != 6) {
+		fprintf(stderr, "Invalid mac address %s\n", addr);
+		return -1;
+	}
+
+	addattr_l(&req.n, sizeof(req), NDA_LLADDR, abuf, ETH_ALEN);
+
+	if (vlan >= 0)
+		addattr16(&req.n, sizeof(req), NDA_VLAN, vlan);
+
+	if (vni != ~0)
+		addattr32(&req.n, sizeof(req), NDA_VNI, vni);
+
+	if (d) {
+		req.ndm.ndm_ifindex = ll_name_to_index(d);
+		if (!req.ndm.ndm_ifindex) {
+			fprintf(stderr, "Cannot find device \"%s\"\n", d);
+			return -1;
+		}
+	}
+
+	if (br) {
+		br_ifindex = ll_name_to_index(br);
+		if (!br_ifindex) {
+			fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
+			return -1;
+		}
+		addattr32(&req.n, sizeof(req), NDA_MASTER, br_ifindex);
+	}
+
+	if (rtnl_talk(&rth, &req.n, &answer) < 0)
+		return -2;
+
+	/*
+	 * Initialize a json_writer and open an array object
+	 * if -json was specified.
+	 */
+	new_json_obj(json);
+	if (print_fdb(answer, stdout) < 0) {
+		fprintf(stderr, "An error :-)\n");
+		return -1;
+	}
+	delete_json_obj();
+
+	return 0;
+}
+
 int do_fdb(int argc, char **argv)
 {
 	ll_init_map(&rth);
@ -490,6 +679,8 @@ int do_fdb(int argc, char **argv)
 			return fdb_modify(RTM_NEWNEIGH, NLM_F_CREATE|NLM_F_REPLACE, argc-1, argv+1);
 		if (matches(*argv, "delete") == 0)
 			return fdb_modify(RTM_DELNEIGH, 0, argc-1, argv+1);
+		if (matches(*argv, "get") == 0)
+			return fdb_get(argc-1, argv+1);
 		if (matches(*argv, "show") == 0 ||
 		    matches(*argv, "lst") == 0 ||
 		    matches(*argv, "list") == 0)
--- a/bridge/link.c
+++ b/bridge/link.c
@ -19,7 +19,7 @@

 static unsigned int filter_index;

-static const char *port_states[] = {
+static const char *stp_states[] = {
 	[BR_STATE_DISABLED] = "disabled",
 	[BR_STATE_LISTENING] = "listening",
 	[BR_STATE_LEARNING] = "learning",
@ -68,22 +68,29 @@ static void print_link_flags(FILE *fp, unsigned int flags, unsigned int mdown)
 	close_json_array(PRINT_ANY, "> ");
 }

-static void print_portstate(__u8 state)
+void print_stp_state(__u8 state)
 {
 	if (state <= BR_STATE_BLOCKING)
 		print_string(PRINT_ANY, "state",
-			     "state %s ", port_states[state]);
+			     "state %s ", stp_states[state]);
 	else
 		print_uint(PRINT_ANY, "state",
 			     "state (%d) ", state);
 }

-static void print_onoff(FILE *fp, const char *flag, __u8 val)
+int parse_stp_state(const char *arg)
 {
-	if (is_json_context())
-		print_bool(PRINT_JSON, flag, NULL, val);
-	else
-		fprintf(fp, "%s %s ", flag, val ? "on" : "off");
+	size_t nstates = ARRAY_SIZE(stp_states);
+	int state;
+
+	for (state = 0; state < nstates; state++)
+		if (strcmp(stp_states[state], arg) == 0)
+			break;
+
+	if (state == nstates)
+		state = -1;
+
+	return state;
 }

 static void print_hwmode(__u16 mode)
@ -104,7 +111,7 @@ static void print_protinfo(FILE *fp, struct rtattr *attr)
 		parse_rtattr_nested(prtb, IFLA_BRPORT_MAX, attr);

 		if (prtb[IFLA_BRPORT_STATE])
-			print_portstate(rta_getattr_u8(prtb[IFLA_BRPORT_STATE]));
+			print_stp_state(rta_getattr_u8(prtb[IFLA_BRPORT_STATE]));

 		if (prtb[IFLA_BRPORT_PRIORITY])
 			print_uint(PRINT_ANY, "priority",
@ -123,35 +130,38 @@ static void print_protinfo(FILE *fp, struct rtattr *attr)
 			fprintf(fp, "%s    ", _SL_);

 		if (prtb[IFLA_BRPORT_MODE])
-			print_onoff(fp, "hairpin",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_MODE]));
+			print_on_off(PRINT_ANY, "hairpin", "hairpin %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_MODE]));
 		if (prtb[IFLA_BRPORT_GUARD])
-			print_onoff(fp, "guard",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_GUARD]));
+			print_on_off(PRINT_ANY, "guard", "guard %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_GUARD]));
 		if (prtb[IFLA_BRPORT_PROTECT])
-			print_onoff(fp, "root_block",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT]));
+			print_on_off(PRINT_ANY, "root_block", "root_block %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT]));
 		if (prtb[IFLA_BRPORT_FAST_LEAVE])
-			print_onoff(fp, "fastleave",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE]));
+			print_on_off(PRINT_ANY, "fastleave", "fastleave %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE]));
 		if (prtb[IFLA_BRPORT_LEARNING])
-			print_onoff(fp, "learning",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING]));
+			print_on_off(PRINT_ANY, "learning", "learning %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING]));
 		if (prtb[IFLA_BRPORT_LEARNING_SYNC])
-			print_onoff(fp, "learning_sync",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC]));
+			print_on_off(PRINT_ANY, "learning_sync", "learning_sync %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC]));
 		if (prtb[IFLA_BRPORT_UNICAST_FLOOD])
-			print_onoff(fp, "flood",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD]));
+			print_on_off(PRINT_ANY, "flood", "flood %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD]));
 		if (prtb[IFLA_BRPORT_MCAST_FLOOD])
-			print_onoff(fp, "mcast_flood",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_FLOOD]));
+			print_on_off(PRINT_ANY, "mcast_flood", "mcast_flood %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_FLOOD]));
+		if (prtb[IFLA_BRPORT_MCAST_TO_UCAST])
+			print_on_off(PRINT_ANY, "mcast_to_unicast", "mcast_to_unicast %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_TO_UCAST]));
 		if (prtb[IFLA_BRPORT_NEIGH_SUPPRESS])
-			print_onoff(fp, "neigh_suppress",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_NEIGH_SUPPRESS]));
+			print_on_off(PRINT_ANY, "neigh_suppress", "neigh_suppress %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_NEIGH_SUPPRESS]));
 		if (prtb[IFLA_BRPORT_VLAN_TUNNEL])
-			print_onoff(fp, "vlan_tunnel",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_VLAN_TUNNEL]));
+			print_on_off(PRINT_ANY, "vlan_tunnel", "vlan_tunnel %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_VLAN_TUNNEL]));

 		if (prtb[IFLA_BRPORT_BACKUP_PORT]) {
 			int ifidx;
@ -163,10 +173,10 @@ static void print_protinfo(FILE *fp, struct rtattr *attr)
 		}

 		if (prtb[IFLA_BRPORT_ISOLATED])
-			print_onoff(fp, "isolated",
-				    rta_getattr_u8(prtb[IFLA_BRPORT_ISOLATED]));
+			print_on_off(PRINT_ANY, "isolated", "isolated %s ",
+				     rta_getattr_u8(prtb[IFLA_BRPORT_ISOLATED]));
 	} else
-		print_portstate(rta_getattr_u8(attr));
+		print_stp_state(rta_getattr_u8(attr));
 }


@ -251,41 +261,27 @@ int print_linkinfo(struct nlmsghdr *n, void *arg)

 static void usage(void)
 {
-	fprintf(stderr, "Usage: bridge link set dev DEV [ cost COST ] [ priority PRIO ] [ state STATE ]\n");
-	fprintf(stderr, "                               [ guard {on | off} ]\n");
-	fprintf(stderr, "                               [ hairpin {on | off} ]\n");
-	fprintf(stderr, "                               [ fastleave {on | off} ]\n");
-	fprintf(stderr,	"                               [ root_block {on | off} ]\n");
-	fprintf(stderr,	"                               [ learning {on | off} ]\n");
-	fprintf(stderr,	"                               [ learning_sync {on | off} ]\n");
-	fprintf(stderr,	"                               [ flood {on | off} ]\n");
-	fprintf(stderr,	"                               [ mcast_flood {on | off} ]\n");
-	fprintf(stderr,	"                               [ neigh_suppress {on | off} ]\n");
-	fprintf(stderr,	"                               [ vlan_tunnel {on | off} ]\n");
-	fprintf(stderr,	"                               [ isolated {on | off} ]\n");
-	fprintf(stderr, "                               [ hwmode {vepa | veb} ]\n");
-	fprintf(stderr,	"                               [ backup_port DEVICE ] [ nobackup_port ]\n");
-	fprintf(stderr, "                               [ self ] [ master ]\n");
-	fprintf(stderr, "       bridge link show [dev DEV]\n");
+	fprintf(stderr,
+		"Usage: bridge link set dev DEV [ cost COST ] [ priority PRIO ] [ state STATE ]\n"
+		"                               [ guard {on | off} ]\n"
+		"                               [ hairpin {on | off} ]\n"
+		"                               [ fastleave {on | off} ]\n"
+		"                               [ root_block {on | off} ]\n"
+		"                               [ learning {on | off} ]\n"
+		"                               [ learning_sync {on | off} ]\n"
+		"                               [ flood {on | off} ]\n"
+		"                               [ mcast_flood {on | off} ]\n"
+		"                               [ mcast_to_unicast {on | off} ]\n"
+		"                               [ neigh_suppress {on | off} ]\n"
+		"                               [ vlan_tunnel {on | off} ]\n"
+		"                               [ isolated {on | off} ]\n"
+		"                               [ hwmode {vepa | veb} ]\n"
+		"                               [ backup_port DEVICE ] [ nobackup_port ]\n"
+		"                               [ self ] [ master ]\n"
+		"       bridge link show [dev DEV]\n");
 	exit(-1);
 }

-static bool on_off(char *arg, __s8 *attr, char *val)
-{
-	if (strcmp(val, "on") == 0)
-		*attr = 1;
-	else if (strcmp(val, "off") == 0)
-		*attr = 0;
-	else {
-		fprintf(stderr,
-			"Error: argument of \"%s\" must be \"on\" or \"off\"\n",
-			arg);
-		return false;
-	}
-
-	return true;
-}
-
 static int brlink_modify(int argc, char **argv)
 {
 	struct {
@ -306,6 +302,7 @@ static int brlink_modify(int argc, char **argv)
 	__s8 flood = -1;
 	__s8 vlan_tunnel = -1;
 	__s8 mcast_flood = -1;
+	__s8 mcast_to_unicast = -1;
 	__s8 isolated = -1;
 	__s8 hairpin = -1;
 	__s8 bpdu_guard = -1;
@ -317,6 +314,7 @@ static int brlink_modify(int argc, char **argv)
 	__s16 mode = -1;
 	__u16 flags = 0;
 	struct rtattr *nest;
+	int ret;

 	while (argc > 0) {
 		if (strcmp(*argv, "dev") == 0) {
@ -324,36 +322,49 @@ static int brlink_modify(int argc, char **argv)
 			d = *argv;
 		} else if (strcmp(*argv, "guard") == 0) {
 			NEXT_ARG();
-			if (!on_off("guard", &bpdu_guard, *argv))
-				return -1;
+			bpdu_guard = parse_on_off("guard", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "hairpin") == 0) {
 			NEXT_ARG();
-			if (!on_off("hairpin", &hairpin, *argv))
-				return -1;
+			hairpin = parse_on_off("hairpin", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "fastleave") == 0) {
 			NEXT_ARG();
-			if (!on_off("fastleave", &fast_leave, *argv))
-				return -1;
+			fast_leave = parse_on_off("fastleave", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "root_block") == 0) {
 			NEXT_ARG();
-			if (!on_off("root_block", &root_block, *argv))
-				return -1;
+			root_block = parse_on_off("root_block", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "learning") == 0) {
 			NEXT_ARG();
-			if (!on_off("learning", &learning, *argv))
-				return -1;
+			learning = parse_on_off("learning", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "learning_sync") == 0) {
 			NEXT_ARG();
-			if (!on_off("learning_sync", &learning_sync, *argv))
-				return -1;
+			learning_sync = parse_on_off("learning_sync", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "flood") == 0) {
 			NEXT_ARG();
-			if (!on_off("flood", &flood, *argv))
-				return -1;
+			flood = parse_on_off("flood", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "mcast_flood") == 0) {
 			NEXT_ARG();
-			if (!on_off("mcast_flood", &mcast_flood, *argv))
-				return -1;
+			mcast_flood = parse_on_off("mcast_flood", *argv, &ret);
+			if (ret)
+				return ret;
+		} else if (strcmp(*argv, "mcast_to_unicast") == 0) {
+			NEXT_ARG();
+			mcast_to_unicast = parse_on_off("mcast_to_unicast", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "cost") == 0) {
 			NEXT_ARG();
 			cost = atoi(*argv);
@ -363,14 +374,11 @@ static int brlink_modify(int argc, char **argv)
 		} else if (strcmp(*argv, "state") == 0) {
 			NEXT_ARG();
 			char *endptr;
-			size_t nstates = ARRAY_SIZE(port_states);

 			state = strtol(*argv, &endptr, 10);
 			if (!(**argv != '\0' && *endptr == '\0')) {
-				for (state = 0; state < nstates; state++)
-					if (strcmp(port_states[state], *argv) == 0)
-						break;
-				if (state == nstates) {
+				state = parse_stp_state(*argv);
+				if (state == -1) {
 					fprintf(stderr,
 						"Error: invalid STP port state\n");
 					return -1;
@ -394,18 +402,19 @@ static int brlink_modify(int argc, char **argv)
 			flags |= BRIDGE_FLAGS_MASTER;
 		} else if (strcmp(*argv, "neigh_suppress") == 0) {
 			NEXT_ARG();
-			if (!on_off("neigh_suppress", &neigh_suppress,
-				    *argv))
-				return -1;
+			neigh_suppress = parse_on_off("neigh_suppress", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "vlan_tunnel") == 0) {
 			NEXT_ARG();
-			if (!on_off("vlan_tunnel", &vlan_tunnel,
-				    *argv))
-				return -1;
+			vlan_tunnel = parse_on_off("vlan_tunnel", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "isolated") == 0) {
 			NEXT_ARG();
-			if (!on_off("isolated", &isolated, *argv))
-				return -1;
+			isolated = parse_on_off("isolated", *argv, &ret);
+			if (ret)
+				return ret;
 		} else if (strcmp(*argv, "backup_port") == 0) {
 			NEXT_ARG();
 			backup_port_idx = ll_name_to_index(*argv);
@ -453,6 +462,9 @@ static int brlink_modify(int argc, char **argv)
 	if (mcast_flood >= 0)
 		addattr8(&req.n, sizeof(req), IFLA_BRPORT_MCAST_FLOOD,
 			 mcast_flood);
+	if (mcast_to_unicast >= 0)
+		addattr8(&req.n, sizeof(req), IFLA_BRPORT_MCAST_TO_UCAST,
+			 mcast_to_unicast);
 	if (learning >= 0)
 		addattr8(&req.n, sizeof(req), IFLA_BRPORT_LEARNING, learning);
 	if (learning_sync >= 0)
--- a/bridge/mdb.c
+++ b/bridge/mdb.c
@ -16,9 +16,9 @@
 #include <arpa/inet.h>

 #include "libnetlink.h"
+#include "utils.h"
 #include "br_common.h"
 #include "rt_names.h"
-#include "utils.h"
 #include "json_print.h"

 #ifndef MDBA_RTA
@ -30,8 +30,9 @@ static unsigned int filter_index, filter_vlan;

 static void usage(void)
 {
-	fprintf(stderr, "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [permanent | temp] [vid VID]\n");
-	fprintf(stderr, "       bridge mdb {show} [ dev DEV ] [ vid VID ]\n");
+	fprintf(stderr,
+		"Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n"
+		"       bridge mdb {show} [ dev DEV ] [ vid VID ]\n");
 	exit(-1);
 }

@ -40,20 +41,25 @@ static bool is_temp_mcast_rtr(__u8 type)
 	return type == MDB_RTR_TYPE_TEMP_QUERY || type == MDB_RTR_TYPE_TEMP;
 }

-static const char *format_timer(__u32 ticks)
+static const char *format_timer(__u32 ticks, int align)
 {
 	struct timeval tv;
 	static char tbuf[32];

 	__jiffies_to_tv(&tv, ticks);
-	snprintf(tbuf, sizeof(tbuf), "%4lu.%.2lu",
-		 (unsigned long)tv.tv_sec,
-		 (unsigned long)tv.tv_usec / 10000);
+	if (align)
+		snprintf(tbuf, sizeof(tbuf), "%4lu.%.2lu",
+			 (unsigned long)tv.tv_sec,
+			 (unsigned long)tv.tv_usec / 10000);
+	else
+		snprintf(tbuf, sizeof(tbuf), "%lu.%.2lu",
+			 (unsigned long)tv.tv_sec,
+			 (unsigned long)tv.tv_usec / 10000);

 	return tbuf;
 }

-static void __print_router_port_stats(FILE *f, struct rtattr *pattr)
+void br_print_router_port_stats(struct rtattr *pattr)
 {
 	struct rtattr *tb[MDBA_ROUTER_PATTR_MAX + 1];

@ -64,7 +70,7 @@ static void __print_router_port_stats(FILE *f, struct rtattr *pattr)
 		__u32 timer = rta_getattr_u32(tb[MDBA_ROUTER_PATTR_TIMER]);

 		print_string(PRINT_ANY, "timer", " %s",
-			     format_timer(timer));
+			     format_timer(timer, 1));
 	}

 	if (tb[MDBA_ROUTER_PATTR_TYPE]) {
@ -95,13 +101,13 @@ static void br_print_router_ports(FILE *f, struct rtattr *attr,
 			print_string(PRINT_JSON, "port", NULL, port_ifname);

 			if (show_stats)
-				__print_router_port_stats(f, i);
+				br_print_router_port_stats(i);
 			close_json_object();
 		} else if (show_stats) {
 			fprintf(f, "router ports on %s: %s",
 				brifname, port_ifname);

-			__print_router_port_stats(f, i);
+			br_print_router_port_stats(i);
 			fprintf(f, "\n");
 		} else {
 			fprintf(f, "%s ", port_ifname);
@ -114,42 +120,120 @@ static void br_print_router_ports(FILE *f, struct rtattr *attr,
 	close_json_array(PRINT_JSON, NULL);
 }

+static void print_src_entry(struct rtattr *src_attr, int af, const char *sep)
+{
+	struct rtattr *stb[MDBA_MDB_SRCATTR_MAX + 1];
+	SPRINT_BUF(abuf);
+	const char *addr;
+	__u32 timer_val;
+
+	parse_rtattr_nested(stb, MDBA_MDB_SRCATTR_MAX, src_attr);
+	if (!stb[MDBA_MDB_SRCATTR_ADDRESS] || !stb[MDBA_MDB_SRCATTR_TIMER])
+		return;
+
+	addr = inet_ntop(af, RTA_DATA(stb[MDBA_MDB_SRCATTR_ADDRESS]), abuf,
+			 sizeof(abuf));
+	if (!addr)
+		return;
+	timer_val = rta_getattr_u32(stb[MDBA_MDB_SRCATTR_TIMER]);
+
+	open_json_object(NULL);
+	print_string(PRINT_FP, NULL, "%s", sep);
+	print_color_string(PRINT_ANY, ifa_family_color(af),
+			   "address", "%s", addr);
+	print_string(PRINT_ANY, "timer", "/%s", format_timer(timer_val, 0));
+	close_json_object();
+}
+
 static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e,
 			    struct nlmsghdr *n, struct rtattr **tb)
 {
+	const void *grp, *src;
+	const char *addr;
 	SPRINT_BUF(abuf);
 	const char *dev;
-	const void *src;
 	int af;

 	if (filter_vlan && e->vid != filter_vlan)
 		return;

-	af = e->addr.proto == htons(ETH_P_IP) ? AF_INET : AF_INET6;
-	src = af == AF_INET ? (const void *)&e->addr.u.ip4 :
-			      (const void *)&e->addr.u.ip6;
+	if (!e->addr.proto) {
+		af = AF_PACKET;
+		grp = &e->addr.u.mac_addr;
+	} else if (e->addr.proto == htons(ETH_P_IP)) {
+		af = AF_INET;
+		grp = &e->addr.u.ip4;
+	} else {
+		af = AF_INET6;
+		grp = &e->addr.u.ip6;
+	}
 	dev = ll_index_to_name(ifindex);

 	open_json_object(NULL);

-	if (n->nlmsg_type == RTM_DELMDB)
-		print_bool(PRINT_ANY, "deleted", "Deleted ", true);
-
-	print_int(PRINT_ANY, "index", "%u: ", ifindex);
-	print_color_string(PRINT_ANY, COLOR_IFNAME, "dev", "%s ", dev);
-	print_string(PRINT_ANY, "port", " %s ",
+	print_int(PRINT_JSON, "index", NULL, ifindex);
+	print_color_string(PRINT_ANY, COLOR_IFNAME, "dev", "dev %s", dev);
+	print_string(PRINT_ANY, "port", " port %s",
 		     ll_index_to_name(e->ifindex));

-	print_color_string(PRINT_ANY, ifa_family_color(af),
-			    "grp", " %s ",
-			    inet_ntop(af, src, abuf, sizeof(abuf)));
+	/* The ETH_ALEN argument is ignored for all cases but AF_PACKET */
+	addr = rt_addr_n2a_r(af, ETH_ALEN, grp, abuf, sizeof(abuf));
+	if (!addr)
+		return;

-	print_string(PRINT_ANY, "state", " %s ",
+	print_color_string(PRINT_ANY, ifa_family_color(af),
+			    "grp", " grp %s", addr);
+
+	if (tb && tb[MDBA_MDB_EATTR_SOURCE]) {
+		src = (const void *)RTA_DATA(tb[MDBA_MDB_EATTR_SOURCE]);
+		print_color_string(PRINT_ANY, ifa_family_color(af),
+				   "src", " src %s",
+				   inet_ntop(af, src, abuf, sizeof(abuf)));
+	}
+	print_string(PRINT_ANY, "state", " %s",
 			   (e->state & MDB_PERMANENT) ? "permanent" : "temp");
+	if (show_details && tb) {
+		if (tb[MDBA_MDB_EATTR_GROUP_MODE]) {
+			__u8 mode = rta_getattr_u8(tb[MDBA_MDB_EATTR_GROUP_MODE]);
+
+			print_string(PRINT_ANY, "filter_mode", " filter_mode %s",
+				     mode == MCAST_INCLUDE ? "include" :
+							     "exclude");
+		}
+		if (tb[MDBA_MDB_EATTR_SRC_LIST]) {
+			struct rtattr *i, *attr = tb[MDBA_MDB_EATTR_SRC_LIST];
+			const char *sep = " ";
+			int rem;
+
+			open_json_array(PRINT_ANY, is_json_context() ?
+								"source_list" :
+								" source_list");
+			rem = RTA_PAYLOAD(attr);
+			for (i = RTA_DATA(attr); RTA_OK(i, rem);
+			     i = RTA_NEXT(i, rem)) {
+				print_src_entry(i, af, sep);
+				sep = ",";
+			}
+			close_json_array(PRINT_JSON, NULL);
+		}
+		if (tb[MDBA_MDB_EATTR_RTPROT]) {
+			__u8 rtprot = rta_getattr_u8(tb[MDBA_MDB_EATTR_RTPROT]);
+			SPRINT_BUF(rtb);
+
+			print_string(PRINT_ANY, "protocol", " proto %s ",
+				     rtnl_rtprot_n2a(rtprot, rtb, sizeof(rtb)));
+		}
+	}

 	open_json_array(PRINT_JSON, "flags");
 	if (e->flags & MDB_FLAGS_OFFLOAD)
-		print_string(PRINT_ANY, NULL, "%s ", "offload");
+		print_string(PRINT_ANY, NULL, " %s", "offload");
+	if (e->flags & MDB_FLAGS_FAST_LEAVE)
+		print_string(PRINT_ANY, NULL, " %s", "fast_leave");
+	if (e->flags & MDB_FLAGS_STAR_EXCL)
+		print_string(PRINT_ANY, NULL, " %s", "added_by_star_ex");
+	if (e->flags & MDB_FLAGS_BLOCKED)
+		print_string(PRINT_ANY, NULL, " %s", "blocked");
 	close_json_array(PRINT_JSON, NULL);

 	if (e->vid)
@ -159,7 +243,7 @@ static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e,
 		__u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]);

 		print_string(PRINT_ANY, "timer", " %s",
-			     format_timer(timer));
+			     format_timer(timer, 1));
 	}

 	print_nl();
@ -177,8 +261,9 @@ static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr,
 	rem = RTA_PAYLOAD(attr);
 	for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
 		e = RTA_DATA(i);
-		parse_rtattr(etb, MDBA_MDB_EATTR_MAX, MDB_RTA(RTA_DATA(i)),
-			     RTA_PAYLOAD(i) - RTA_ALIGN(sizeof(*e)));
+		parse_rtattr_flags(etb, MDBA_MDB_EATTR_MAX, MDB_RTA(RTA_DATA(i)),
+				   RTA_PAYLOAD(i) - RTA_ALIGN(sizeof(*e)),
+				   NLA_F_NESTED);
 		print_mdb_entry(f, ifindex, e, n, etb);
 	}
 }
@ -189,10 +274,8 @@ static void print_mdb_entries(FILE *fp, struct nlmsghdr *n,
 	int rem = RTA_PAYLOAD(mdb);
 	struct rtattr *i;

-	open_json_array(PRINT_JSON, "mdb");
 	for (i = RTA_DATA(mdb); RTA_OK(i, rem); i = RTA_NEXT(i, rem))
 		br_print_mdb_entry(fp, ifindex, i, n);
-	close_json_array(PRINT_JSON, NULL);
 }

 static void print_router_entries(FILE *fp, struct nlmsghdr *n,
@ -200,7 +283,6 @@ static void print_router_entries(FILE *fp, struct nlmsghdr *n,
 {
 	const char *brifname = ll_index_to_name(ifindex);

-	open_json_array(PRINT_JSON, "router");
 	if (n->nlmsg_type == RTM_GETMDB) {
 		if (show_details)
 			br_print_router_ports(fp, router, brifname);
@ -222,15 +304,12 @@ static void print_router_entries(FILE *fp, struct nlmsghdr *n,
 				port_name, brifname);
 		}
 	}
-	close_json_array(PRINT_JSON, NULL);
 }

-int print_mdb(struct nlmsghdr *n, void *arg)
+static int __parse_mdb_nlmsg(struct nlmsghdr *n, struct rtattr **tb)
 {
-	FILE *fp = arg;
 	struct br_port_msg *r = NLMSG_DATA(n);
 	int len = n->nlmsg_len;
-	struct rtattr *tb[MDBA_MAX+1];

 	if (n->nlmsg_type != RTM_GETMDB &&
 	    n->nlmsg_type != RTM_NEWMDB &&
@ -253,6 +332,54 @@ int print_mdb(struct nlmsghdr *n, void *arg)

 	parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));

+	return 1;
+}
+
+static int print_mdbs(struct nlmsghdr *n, void *arg)
+{
+	struct br_port_msg *r = NLMSG_DATA(n);
+	struct rtattr *tb[MDBA_MAX+1];
+	FILE *fp = arg;
+	int ret;
+
+	ret = __parse_mdb_nlmsg(n, tb);
+	if (ret != 1)
+		return ret;
+
+	if (tb[MDBA_MDB])
+		print_mdb_entries(fp, n, r->ifindex, tb[MDBA_MDB]);
+
+	return 0;
+}
+
+static int print_rtrs(struct nlmsghdr *n, void *arg)
+{
+	struct br_port_msg *r = NLMSG_DATA(n);
+	struct rtattr *tb[MDBA_MAX+1];
+	FILE *fp = arg;
+	int ret;
+
+	ret = __parse_mdb_nlmsg(n, tb);
+	if (ret != 1)
+		return ret;
+
+	if (tb[MDBA_ROUTER])
+		print_router_entries(fp, n, r->ifindex, tb[MDBA_ROUTER]);
+
+	return 0;
+}
+
+int print_mdb_mon(struct nlmsghdr *n, void *arg)
+{
+	struct br_port_msg *r = NLMSG_DATA(n);
+	struct rtattr *tb[MDBA_MAX+1];
+	FILE *fp = arg;
+	int ret;
+
+	ret = __parse_mdb_nlmsg(n, tb);
+	if (ret != 1)
+		return ret;
+
 	if (n->nlmsg_type == RTM_DELMDB)
 		print_bool(PRINT_ANY, "deleted", "Deleted ", true);

@ -291,24 +418,60 @@ static int mdb_show(int argc, char **argv)
 	}

 	new_json_obj(json);
+	open_json_object(NULL);

-	/* get mdb entries*/
+	/* get mdb entries */
 	if (rtnl_mdbdump_req(&rth, PF_BRIDGE) < 0) {
 		perror("Cannot send dump request");
 		return -1;
 	}

-	if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) {
+	open_json_array(PRINT_JSON, "mdb");
+	if (rtnl_dump_filter(&rth, print_mdbs, stdout) < 0) {
 		fprintf(stderr, "Dump terminated\n");
 		return -1;
 	}
+	close_json_array(PRINT_JSON, NULL);

+	/* get router ports */
+	if (rtnl_mdbdump_req(&rth, PF_BRIDGE) < 0) {
+		perror("Cannot send dump request");
+		return -1;
+	}
+
+	open_json_object("router");
+	if (rtnl_dump_filter(&rth, print_rtrs, stdout) < 0) {
+		fprintf(stderr, "Dump terminated\n");
+		return -1;
+	}
+	close_json_object();
+
+	close_json_object();
 	delete_json_obj();
 	fflush(stdout);

 	return 0;
 }

+static int mdb_parse_grp(const char *grp, struct br_mdb_entry *e)
+{
+	if (inet_pton(AF_INET, grp, &e->addr.u.ip4)) {
+		e->addr.proto = htons(ETH_P_IP);
+		return 0;
+	}
+	if (inet_pton(AF_INET6, grp, &e->addr.u.ip6)) {
+		e->addr.proto = htons(ETH_P_IPV6);
+		return 0;
+	}
+	if (ll_addr_a2n((char *)e->addr.u.mac_addr, sizeof(e->addr.u.mac_addr),
+			grp) == ETH_ALEN) {
+		e->addr.proto = 0;
+		return 0;
+	}
+
+	return -1;
+}
+
 static int mdb_modify(int cmd, int flags, int argc, char **argv)
 {
 	struct {
@ -321,8 +484,8 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
 		.n.nlmsg_type = cmd,
 		.bpm.family = PF_BRIDGE,
 	};
+	char *d = NULL, *p = NULL, *grp = NULL, *src = NULL;
 	struct br_mdb_entry entry = {};
-	char *d = NULL, *p = NULL, *grp = NULL;
 	short vid = 0;

 	while (argc > 0) {
@ -343,6 +506,9 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
 		} else if (strcmp(*argv, "vid") == 0) {
 			NEXT_ARG();
 			vid = atoi(*argv);
+		} else if (strcmp(*argv, "src") == 0) {
+			NEXT_ARG();
+			src = *argv;
 		} else {
 			if (matches(*argv, "help") == 0)
 				usage();
@ -363,17 +529,31 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
 	if (!entry.ifindex)
 		return nodev(p);

-	if (!inet_pton(AF_INET, grp, &entry.addr.u.ip4)) {
-		if (!inet_pton(AF_INET6, grp, &entry.addr.u.ip6)) {
-			fprintf(stderr, "Invalid address \"%s\"\n", grp);
-			return -1;
-		} else
-			entry.addr.proto = htons(ETH_P_IPV6);
-	} else
-		entry.addr.proto = htons(ETH_P_IP);
+	if (mdb_parse_grp(grp, &entry)) {
+		fprintf(stderr, "Invalid address \"%s\"\n", grp);
+		return -1;
+	}

 	entry.vid = vid;
 	addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry));
+	if (src) {
+		struct rtattr *nest = addattr_nest(&req.n, sizeof(req),
+						   MDBA_SET_ENTRY_ATTRS);
+		struct in6_addr src_ip6;
+		__be32 src_ip4;
+
+		nest->rta_type |= NLA_F_NESTED;
+		if (!inet_pton(AF_INET, src, &src_ip4)) {
+			if (!inet_pton(AF_INET6, src, &src_ip6)) {
+				fprintf(stderr, "Invalid source address \"%s\"\n", src);
+				return -1;
+			}
+			addattr_l(&req.n, sizeof(req), MDBE_ATTR_SOURCE, &src_ip6, sizeof(src_ip6));
+		} else {
+			addattr32(&req.n, sizeof(req), MDBE_ATTR_SOURCE, src_ip4);
+		}
+		addattr_nest_end(&req.n, nest);
+	}

 	if (rtnl_talk(&rth, &req.n, NULL) < 0)
 		return -1;
--- a/bridge/monitor.c
+++ b/bridge/monitor.c
@ -31,7 +31,7 @@ static int prefix_banner;

 static void usage(void)
 {
-	fprintf(stderr, "Usage: bridge monitor [file | link | fdb | mdb | all]\n");
+	fprintf(stderr, "Usage: bridge monitor [file | link | fdb | mdb | vlan | all]\n");
 	exit(-1);
 }

@ -61,12 +61,18 @@ static int accept_msg(struct rtnl_ctrl_data *ctrl,
 	case RTM_DELMDB:
 		if (prefix_banner)
 			fprintf(fp, "[MDB]");
-		return print_mdb(n, arg);
+		return print_mdb_mon(n, arg);

 	case NLMSG_TSTAMP:
 		print_nlmsg_timestamp(fp, n);
 		return 0;

+	case RTM_NEWVLAN:
+	case RTM_DELVLAN:
+		if (prefix_banner)
+			fprintf(fp, "[VLAN]");
+		return print_vlan_rtm(n, arg, true, false);
+
 	default:
 		return 0;
 	}
@ -79,6 +85,7 @@ int do_monitor(int argc, char **argv)
 	int llink = 0;
 	int lneigh = 0;
 	int lmdb = 0;
+	int lvlan = 0;

 	rtnl_close(&rth);

@ -95,8 +102,12 @@ int do_monitor(int argc, char **argv)
 		} else if (matches(*argv, "mdb") == 0) {
 			lmdb = 1;
 			groups = 0;
+		} else if (matches(*argv, "vlan") == 0) {
+			lvlan = 1;
+			groups = 0;
 		} else if (strcmp(*argv, "all") == 0) {
 			groups = ~RTMGRP_TC;
+			lvlan = 1;
 			prefix_banner = 1;
 		} else if (matches(*argv, "help") == 0) {
 			usage();
@ -134,6 +145,12 @@ int do_monitor(int argc, char **argv)

 	if (rtnl_open(&rth, groups) < 0)
 		exit(1);
+
+	if (lvlan && rtnl_add_nl_group(&rth, RTNLGRP_BRVLAN) < 0) {
+		fprintf(stderr, "Failed to add bridge vlan group to list\n");
+		exit(1);
+	}
+
 	ll_init_map(&rth);

 	if (rtnl_listen(&rth, accept_msg, stdout) < 0)
--- a/bridge/vlan.c
+++ b/bridge/vlan.c
--- a/228
+++ b/228
@ -1,8 +1,10 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 # This is not an autoconf generated configure
-#
-INCLUDE=${1:-"$PWD/include"}
+
+INCLUDE="$PWD/include"
+PREFIX="/usr"
+LIBDIR="\${prefix}/lib"

 # Output file which is input to Makefile
 CONFIG=config.mk
@ -16,9 +18,11 @@ check_toolchain()
    : ${PKG_CONFIG:=pkg-config}
    : ${AR=ar}
    : ${CC=gcc}
+    : ${YACC=bison}
    echo "PKG_CONFIG:=${PKG_CONFIG}" >>$CONFIG
    echo "AR:=${AR}" >>$CONFIG
    echo "CC:=${CC}" >>$CONFIG
+    echo "YACC:=${YACC}" >>$CONFIG
 }

 check_atm()
@ -115,7 +119,7 @@ EOF
 check_xt_old_internal_h()
 {
    # bail if previous XT checks has already succeeded.
-    grep -q if grep -q TC_CONFIG_XT $CONFIG && return
+    grep -q TC_CONFIG_XT $CONFIG && return

    #check if we need our own internal.h
    cat >$TMPDIR/ipttest.c <<EOF
@ -146,6 +150,15 @@ EOF
 	rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
 }

+check_lib_dir()
+{
+	LIBDIR=$(echo $LIBDIR | sed "s|\${prefix}|$PREFIX|")
+
+	echo -n "lib directory: "
+	echo "$LIBDIR"
+	echo "LIBDIR:=$LIBDIR" >> $CONFIG
+}
+
 check_ipt()
 {
 	if ! grep TC_CONFIG_XT $CONFIG > /dev/null; then
@ -195,6 +208,31 @@ EOF
    rm -f $TMPDIR/setnstest.c $TMPDIR/setnstest
 }

+check_name_to_handle_at()
+{
+    cat >$TMPDIR/name_to_handle_at_test.c <<EOF
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+int main(int argc, char **argv)
+{
+	struct file_handle *fhp;
+	int mount_id, flags, dirfd;
+	char *pathname;
+	name_to_handle_at(dirfd, pathname, fhp, &mount_id, flags);
+	return 0;
+}
+EOF
+    if $CC -I$INCLUDE -o $TMPDIR/name_to_handle_at_test $TMPDIR/name_to_handle_at_test.c >/dev/null 2>&1; then
+        echo "yes"
+        echo "CFLAGS += -DHAVE_HANDLE_AT" >>$CONFIG
+    else
+        echo "no"
+    fi
+    rm -f $TMPDIR/name_to_handle_at_test.c $TMPDIR/name_to_handle_at_test
+}
+
 check_ipset()
 {
    cat >$TMPDIR/ipsettest.c <<EOF
@ -206,7 +244,7 @@ typedef unsigned short ip_set_id_t;
 #include <linux/netfilter/xt_set.h>

 struct xt_set_info info;
-#if IPSET_PROTOCOL == 6
+#if IPSET_PROTOCOL == 6 || IPSET_PROTOCOL == 7
 int main(void)
 {
 	return IPSET_MAXNAMELEN;
@ -238,6 +276,111 @@ check_elf()
    fi
 }

+have_libbpf_basic()
+{
+    cat >$TMPDIR/libbpf_test.c <<EOF
+#include <bpf/libbpf.h>
+int main(int argc, char **argv) {
+    bpf_program__set_autoload(NULL, false);
+    bpf_map__ifindex(NULL);
+    bpf_map__set_pin_path(NULL, NULL);
+    bpf_object__open_file(NULL, NULL);
+    return 0;
+}
+EOF
+
+    $CC -o $TMPDIR/libbpf_test $TMPDIR/libbpf_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1
+    local ret=$?
+
+    rm -f $TMPDIR/libbpf_test.c $TMPDIR/libbpf_test
+    return $ret
+}
+
+have_libbpf_sec_name()
+{
+    cat >$TMPDIR/libbpf_sec_test.c <<EOF
+#include <bpf/libbpf.h>
+int main(int argc, char **argv) {
+    void *ptr;
+    bpf_program__section_name(NULL);
+    return 0;
+}
+EOF
+
+    $CC -o $TMPDIR/libbpf_sec_test $TMPDIR/libbpf_sec_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1
+    local ret=$?
+
+    rm -f $TMPDIR/libbpf_sec_test.c $TMPDIR/libbpf_sec_test
+    return $ret
+}
+
+check_force_libbpf_on()
+{
+    # if set LIBBPF_FORCE=on but no libbpf support, just exist the config
+    # process to make sure we don't build without libbpf.
+    if [ "$LIBBPF_FORCE" = on ]; then
+        echo "	LIBBPF_FORCE=on set, but couldn't find a usable libbpf"
+        exit 1
+    fi
+}
+
+check_libbpf()
+{
+    # if set LIBBPF_FORCE=off, disable libbpf entirely
+    if [ "$LIBBPF_FORCE" = off ]; then
+        echo "no"
+        return
+    fi
+
+    if ! ${PKG_CONFIG} libbpf --exists && [ -z "$LIBBPF_DIR" ] ; then
+        echo "no"
+        check_force_libbpf_on
+        return
+    fi
+
+    if [ $(uname -m) = x86_64 ]; then
+        local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib64"
+    else
+        local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib"
+    fi
+
+    if [ -n "$LIBBPF_DIR" ]; then
+        LIBBPF_CFLAGS="-I${LIBBPF_DIR}/usr/include"
+        LIBBPF_LDLIBS="${LIBBPF_LIBDIR}/libbpf.a -lz -lelf"
+        LIBBPF_VERSION=$(PKG_CONFIG_LIBDIR=${LIBBPF_LIBDIR}/pkgconfig ${PKG_CONFIG} libbpf --modversion)
+    else
+        LIBBPF_CFLAGS=$(${PKG_CONFIG} libbpf --cflags)
+        LIBBPF_LDLIBS=$(${PKG_CONFIG} libbpf --libs)
+        LIBBPF_VERSION=$(${PKG_CONFIG} libbpf --modversion)
+    fi
+
+    if ! have_libbpf_basic; then
+        echo "no"
+        echo "	libbpf version $LIBBPF_VERSION is too low, please update it to at least 0.1.0"
+        check_force_libbpf_on
+        return
+    else
+        echo "HAVE_LIBBPF:=y" >> $CONFIG
+        echo 'CFLAGS += -DHAVE_LIBBPF ' $LIBBPF_CFLAGS >> $CONFIG
+        echo "CFLAGS += -DLIBBPF_VERSION=\\\"$LIBBPF_VERSION\\\"" >> $CONFIG
+        echo 'LDLIBS += ' $LIBBPF_LDLIBS >> $CONFIG
+
+        if [ -z "$LIBBPF_DIR" ]; then
+            echo "CFLAGS += -DLIBBPF_DYNAMIC" >> $CONFIG
+        fi
+    fi
+
+    # bpf_program__title() is deprecated since libbpf 0.2.0, use
+    # bpf_program__section_name() instead if we support
+    if have_libbpf_sec_name; then
+        echo "HAVE_LIBBPF_SECTION_NAME:=y" >> $CONFIG
+        echo 'CFLAGS += -DHAVE_LIBBPF_SECTION_NAME ' >> $CONFIG
+    fi
+
+    echo "yes"
+    echo "	libbpf version $LIBBPF_VERSION"
+}
+
 check_selinux()
 # SELinux is a compile time option in the ss utility
 {
@ -349,6 +492,76 @@ endif
 EOF
 }

+usage()
+{
+	cat <<EOF
+Usage: $0 [OPTIONS]
+	--include_dir <dir>		Path to iproute2 include dir
+	--libdir <dir>			Path to iproute2 lib dir
+	--libbpf_dir <dir>		Path to libbpf DESTDIR
+	--libbpf_force <on|off>		Enable/disable libbpf by force. Available options:
+					  on: require link against libbpf, quit config if no libbpf support
+					  off: disable libbpf probing
+	--prefix <dir>			Path prefix of the lib files to install
+	-h | --help			Show this usage info
+EOF
+	exit $1
+}
+
+# Compat with the old INCLUDE path setting method.
+if [ $# -eq 1 ] && [ "$(echo $1 | cut -c 1)" != '-' ]; then
+	INCLUDE="$1"
+else
+	while [ "$#" -gt 0 ]; do
+		case "$1" in
+			--include_dir)
+				shift
+				INCLUDE="$1" ;;
+			--include_dir=*)
+				INCLUDE="${1#*=}" ;;
+			--libdir)
+				shift
+				LIBDIR="$1" ;;
+			--libdir=*)
+				LIBDIR="${1#*=}" ;;
+			--libbpf_dir)
+				shift
+				LIBBPF_DIR="$1" ;;
+			--libbpf_dir=*)
+				LIBBPF_DIR="${1#*=}" ;;
+			--libbpf_force)
+				shift
+				LIBBPF_FORCE="$1" ;;
+			--libbpf_force=*)
+				LIBBPF_FORCE="${1#*=}" ;;
+			--prefix)
+				shift
+				PREFIX="$1" ;;
+			--prefix=*)
+				PREFIX="${1#*=}" ;;
+			-h | --help)
+				usage 0 ;;
+			--*)
+				;;
+			*)
+				usage 1 ;;
+		esac
+		[ "$#" -gt 0 ] && shift
+	done
+fi
+
+[ -d "$INCLUDE" ] || usage 1
+if [ "${LIBBPF_DIR-unused}" != "unused" ]; then
+	[ -d "$LIBBPF_DIR" ] || usage 1
+fi
+if [ "${LIBBPF_FORCE-unused}" != "unused" ]; then
+	if [ "$LIBBPF_FORCE" != 'on' ] && [ "$LIBBPF_FORCE" != 'off' ]; then
+		usage 1
+	fi
+fi
+[ -z "$PREFIX" ] && usage 1
+[ -z "$LIBDIR" ] && usage 1
+
 echo "# Generated config based on" $INCLUDE >$CONFIG
 quiet_config >> $CONFIG

@ -372,6 +585,7 @@ if ! grep -q TC_CONFIG_NO_XT $CONFIG; then
 fi

 echo
+check_lib_dir
 if ! grep -q TC_CONFIG_NO_XT $CONFIG; then
 	echo -n "iptables modules directory: "
 	check_ipt_lib_dir
@ -380,9 +594,15 @@ fi
 echo -n "libc has setns: "
 check_setns

+echo -n "libc has name_to_handle_at: "
+check_name_to_handle_at
+
 echo -n "SELinux support: "
 check_selinux

+echo -n "libbpf support: "
+check_libbpf
+
 echo -n "ELF support: "
 check_elf

--- a/dcb/.gitignore
+++ b/dcb/.gitignore
@ -0,0 +1 @@
+dcb
--- a/dcb/Makefile
+++ b/dcb/Makefile
@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../config.mk
+
+TARGETS :=
+
+ifeq ($(HAVE_MNL),y)
+
+DCBOBJ = dcb.o \
+         dcb_app.o \
+         dcb_buffer.o \
+         dcb_dcbx.o \
+         dcb_ets.o \
+         dcb_maxrate.o \
+         dcb_pfc.o
+TARGETS += dcb
+LDLIBS += -lm
+
+endif
+
+all: $(TARGETS) $(LIBS)
+
+dcb: $(DCBOBJ) $(LIBNETLINK)
+	$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
+
+install: all
+	for i in $(TARGETS); \
+	do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \
+	done
+
+clean:
+	rm -f $(DCBOBJ) $(TARGETS)
--- a/dcb/dcb.c
+++ b/dcb/dcb.c
@ -0,0 +1,611 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+#include <libmnl/libmnl.h>
+#include <getopt.h>
+
+#include "dcb.h"
+#include "mnl_utils.h"
+#include "namespace.h"
+#include "utils.h"
+#include "version.h"
+
+static int dcb_init(struct dcb *dcb)
+{
+	dcb->buf = malloc(MNL_SOCKET_BUFFER_SIZE);
+	if (dcb->buf == NULL) {
+		perror("Netlink buffer allocation");
+		return -1;
+	}
+
+	dcb->nl = mnlu_socket_open(NETLINK_ROUTE);
+	if (dcb->nl == NULL) {
+		perror("Open netlink socket");
+		goto err_socket_open;
+	}
+
+	new_json_obj_plain(dcb->json_output);
+	return 0;
+
+err_socket_open:
+	free(dcb->buf);
+	return -1;
+}
+
+static void dcb_fini(struct dcb *dcb)
+{
+	delete_json_obj_plain();
+	mnl_socket_close(dcb->nl);
+	free(dcb->buf);
+}
+
+static struct dcb *dcb_alloc(void)
+{
+	struct dcb *dcb;
+
+	dcb = calloc(1, sizeof(*dcb));
+	if (!dcb)
+		return NULL;
+	return dcb;
+}
+
+static void dcb_free(struct dcb *dcb)
+{
+	free(dcb);
+}
+
+struct dcb_get_attribute {
+	struct dcb *dcb;
+	int attr;
+	void *payload;
+	__u16 payload_len;
+};
+
+static int dcb_get_attribute_attr_ieee_cb(const struct nlattr *attr, void *data)
+{
+	struct dcb_get_attribute *ga = data;
+
+	if (mnl_attr_get_type(attr) != ga->attr)
+		return MNL_CB_OK;
+
+	ga->payload = mnl_attr_get_payload(attr);
+	ga->payload_len = mnl_attr_get_payload_len(attr);
+	return MNL_CB_STOP;
+}
+
+static int dcb_get_attribute_attr_cb(const struct nlattr *attr, void *data)
+{
+	if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE)
+		return MNL_CB_OK;
+
+	return mnl_attr_parse_nested(attr, dcb_get_attribute_attr_ieee_cb, data);
+}
+
+static int dcb_get_attribute_cb(const struct nlmsghdr *nlh, void *data)
+{
+	return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_get_attribute_attr_cb, data);
+}
+
+static int dcb_get_attribute_bare_cb(const struct nlmsghdr *nlh, void *data)
+{
+	/* Bare attributes (e.g. DCB_ATTR_DCBX) are not wrapped inside an IEEE
+	 * container, so this does not have to go through unpacking in
+	 * dcb_get_attribute_attr_cb().
+	 */
+	return mnl_attr_parse(nlh, sizeof(struct dcbmsg),
+			      dcb_get_attribute_attr_ieee_cb, data);
+}
+
+struct dcb_set_attribute_response {
+	int response_attr;
+};
+
+static int dcb_set_attribute_attr_cb(const struct nlattr *attr, void *data)
+{
+	struct dcb_set_attribute_response *resp = data;
+	uint16_t len;
+	uint8_t err;
+
+	if (mnl_attr_get_type(attr) != resp->response_attr)
+		return MNL_CB_OK;
+
+	len = mnl_attr_get_payload_len(attr);
+	if (len != 1) {
+		fprintf(stderr, "Response attribute expected to have size 1, not %d\n", len);
+		return MNL_CB_ERROR;
+	}
+
+	err = mnl_attr_get_u8(attr);
+	if (err) {
+		fprintf(stderr, "Error when attempting to set attribute: %s\n",
+			strerror(err));
+		return MNL_CB_ERROR;
+	}
+
+	return MNL_CB_STOP;
+}
+
+static int dcb_set_attribute_cb(const struct nlmsghdr *nlh, void *data)
+{
+	return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_set_attribute_attr_cb, data);
+}
+
+static int dcb_talk(struct dcb *dcb, struct nlmsghdr *nlh, mnl_cb_t cb, void *data)
+{
+	int ret;
+
+	ret = mnl_socket_sendto(dcb->nl, nlh, nlh->nlmsg_len);
+	if (ret < 0) {
+		perror("mnl_socket_sendto");
+		return -1;
+	}
+
+	return mnlu_socket_recv_run(dcb->nl, nlh->nlmsg_seq, dcb->buf, MNL_SOCKET_BUFFER_SIZE,
+				    cb, data);
+}
+
+static struct nlmsghdr *dcb_prepare(struct dcb *dcb, const char *dev,
+				    uint32_t nlmsg_type, uint8_t dcb_cmd)
+{
+	struct dcbmsg dcbm = {
+		.cmd = dcb_cmd,
+	};
+	struct nlmsghdr *nlh;
+
+	nlh = mnlu_msg_prepare(dcb->buf, nlmsg_type, NLM_F_REQUEST, &dcbm, sizeof(dcbm));
+	mnl_attr_put_strz(nlh, DCB_ATTR_IFNAME, dev);
+	return nlh;
+}
+
+static int __dcb_get_attribute(struct dcb *dcb, int command,
+			       const char *dev, int attr,
+			       void **payload_p, __u16 *payload_len_p,
+			       int (*get_attribute_cb)(const struct nlmsghdr *nlh,
+						       void *data))
+{
+	struct dcb_get_attribute ga;
+	struct nlmsghdr *nlh;
+	int ret;
+
+	nlh = dcb_prepare(dcb, dev, RTM_GETDCB, command);
+
+	ga = (struct dcb_get_attribute) {
+		.dcb = dcb,
+		.attr = attr,
+		.payload = NULL,
+	};
+	ret = dcb_talk(dcb, nlh, get_attribute_cb, &ga);
+	if (ret) {
+		perror("Attribute read");
+		return ret;
+	}
+	if (ga.payload == NULL) {
+		perror("Attribute not found");
+		return -ENOENT;
+	}
+
+	*payload_p = ga.payload;
+	*payload_len_p = ga.payload_len;
+	return 0;
+}
+
+int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr,
+			 void **payload_p, __u16 *payload_len_p)
+{
+	return __dcb_get_attribute(dcb, DCB_CMD_IEEE_GET, dev, attr,
+				   payload_p, payload_len_p,
+				   dcb_get_attribute_cb);
+}
+
+int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr,
+			   void **payload_p, __u16 *payload_len_p)
+{
+	return __dcb_get_attribute(dcb, cmd, dev, attr,
+				   payload_p, payload_len_p,
+				   dcb_get_attribute_bare_cb);
+}
+
+int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr, void *data, size_t data_len)
+{
+	__u16 payload_len;
+	void *payload;
+	int ret;
+
+	ret = dcb_get_attribute_va(dcb, dev, attr, &payload, &payload_len);
+	if (ret)
+		return ret;
+
+	if (payload_len != data_len) {
+		fprintf(stderr, "Wrong len %d, expected %zd\n", payload_len, data_len);
+		return -EINVAL;
+	}
+
+	memcpy(data, payload, data_len);
+	return 0;
+}
+
+static int __dcb_set_attribute(struct dcb *dcb, int command, const char *dev,
+			       int (*cb)(struct dcb *, struct nlmsghdr *, void *),
+			       void *data, int response_attr)
+{
+	struct dcb_set_attribute_response resp = {
+		.response_attr = response_attr,
+	};
+	struct nlmsghdr *nlh;
+	int ret;
+
+	nlh = dcb_prepare(dcb, dev, RTM_SETDCB, command);
+
+	ret = cb(dcb, nlh, data);
+	if (ret)
+		return ret;
+
+	ret = dcb_talk(dcb, nlh, dcb_set_attribute_cb, &resp);
+	if (ret) {
+		perror("Attribute write");
+		return ret;
+	}
+	return 0;
+}
+
+struct dcb_set_attribute_ieee_cb {
+	int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data);
+	void *data;
+};
+
+static int dcb_set_attribute_ieee_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
+{
+	struct dcb_set_attribute_ieee_cb *ieee_data = data;
+	struct nlattr *nest;
+	int ret;
+
+	nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE);
+	ret = ieee_data->cb(dcb, nlh, ieee_data->data);
+	if (ret)
+		return ret;
+	mnl_attr_nest_end(nlh, nest);
+
+	return 0;
+}
+
+int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev,
+			 int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data),
+			 void *data)
+{
+	struct dcb_set_attribute_ieee_cb ieee_data = {
+		.cb = cb,
+		.data = data,
+	};
+
+	return __dcb_set_attribute(dcb, command, dev,
+				   &dcb_set_attribute_ieee_cb, &ieee_data,
+				   DCB_ATTR_IEEE);
+}
+
+struct dcb_set_attribute {
+	int attr;
+	const void *data;
+	size_t data_len;
+};
+
+static int dcb_set_attribute_put(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
+{
+	struct dcb_set_attribute *dsa = data;
+
+	mnl_attr_put(nlh, dsa->attr, dsa->data_len, dsa->data);
+	return 0;
+}
+
+int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr, const void *data, size_t data_len)
+{
+	struct dcb_set_attribute dsa = {
+		.attr = attr,
+		.data = data,
+		.data_len = data_len,
+	};
+
+	return dcb_set_attribute_va(dcb, DCB_CMD_IEEE_SET, dev,
+				    &dcb_set_attribute_put, &dsa);
+}
+
+int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev,
+			   int attr, const void *data, size_t data_len,
+			   int response_attr)
+{
+	struct dcb_set_attribute dsa = {
+		.attr = attr,
+		.data = data,
+		.data_len = data_len,
+	};
+
+	return __dcb_set_attribute(dcb, command, dev,
+				   &dcb_set_attribute_put, &dsa, response_attr);
+}
+
+void dcb_print_array_u8(const __u8 *array, size_t size)
+{
+	SPRINT_BUF(b);
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%d ", i);
+		print_uint(PRINT_ANY, NULL, b, array[i]);
+	}
+}
+
+void dcb_print_array_u64(const __u64 *array, size_t size)
+{
+	SPRINT_BUF(b);
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%" PRIu64 " ", i);
+		print_u64(PRINT_ANY, NULL, b, array[i]);
+	}
+}
+
+void dcb_print_array_on_off(const __u8 *array, size_t size)
+{
+	SPRINT_BUF(b);
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%s ", i);
+		print_on_off(PRINT_ANY, NULL, b, array[i]);
+	}
+}
+
+void dcb_print_array_kw(const __u8 *array, size_t array_size,
+			const char *const kw[], size_t kw_size)
+{
+	SPRINT_BUF(b);
+	size_t i;
+
+	for (i = 0; i < array_size; i++) {
+		__u8 emt = array[i];
+
+		snprintf(b, sizeof(b), "%zd:%%s ", i);
+		if (emt < kw_size && kw[emt])
+			print_string(PRINT_ANY, NULL, b, kw[emt]);
+		else
+			print_string(PRINT_ANY, NULL, b, "???");
+	}
+}
+
+void dcb_print_named_array(const char *json_name, const char *fp_name,
+			   const __u8 *array, size_t size,
+			   void (*print_array)(const __u8 *, size_t))
+{
+	open_json_array(PRINT_JSON, json_name);
+	print_string(PRINT_FP, NULL, "%s ", fp_name);
+	print_array(array, size);
+	close_json_array(PRINT_JSON, json_name);
+}
+
+int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key,
+		      const char *what_value, __u64 value, __u64 max_value,
+		      void (*set_array)(__u32 index, __u64 value, void *data),
+		      void *set_array_data)
+{
+	bool is_all = key == (__u32) -1;
+
+	if (!is_all && key > max_key) {
+		fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%d\n",
+			what_key, what_value, what_key, max_key);
+		return -EINVAL;
+	}
+
+	if (value > max_value) {
+		fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%llu\n",
+			what_key, what_value, what_value, max_value);
+		return -EINVAL;
+	}
+
+	if (is_all) {
+		for (key = 0; key <= max_key; key++)
+			set_array(key, value, set_array_data);
+	} else {
+		set_array(key, value, set_array_data);
+	}
+
+	return 0;
+}
+
+void dcb_set_u8(__u32 key, __u64 value, void *data)
+{
+	__u8 *array = data;
+
+	array[key] = value;
+}
+
+void dcb_set_u32(__u32 key, __u64 value, void *data)
+{
+	__u32 *array = data;
+
+	array[key] = value;
+}
+
+void dcb_set_u64(__u32 key, __u64 value, void *data)
+{
+	__u64 *array = data;
+
+	array[key] = value;
+}
+
+int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv,
+		      int (*and_then)(struct dcb *dcb, const char *dev,
+				      int argc, char **argv),
+		      void (*help)(void))
+{
+	const char *dev;
+
+	if (!argc || matches(*argv, "help") == 0) {
+		help();
+		return 0;
+	} else if (matches(*argv, "dev") == 0) {
+		NEXT_ARG();
+		dev = *argv;
+		if (check_ifname(dev)) {
+			invarg("not a valid ifname", *argv);
+			return -EINVAL;
+		}
+		NEXT_ARG_FWD();
+		return and_then(dcb, dev, argc, argv);
+	} else {
+		fprintf(stderr, "Expected `dev DEV', not `%s'", *argv);
+		help();
+		return -EINVAL;
+	}
+}
+
+static void dcb_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb [ OPTIONS ] OBJECT { COMMAND | help }\n"
+		"       dcb [ -f | --force ] { -b | --batch } filename [ -n | --netns ] netnsname\n"
+		"where  OBJECT := { app | buffer | dcbx | ets | maxrate | pfc }\n"
+		"       OPTIONS := [ -V | --Version | -i | --iec | -j | --json\n"
+		"                  | -N | --Numeric | -p | --pretty\n"
+		"                  | -s | --statistics | -v | --verbose]\n");
+}
+
+static int dcb_cmd(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_help();
+		return 0;
+	} else if (matches(*argv, "app") == 0) {
+		return dcb_cmd_app(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "buffer") == 0) {
+		return dcb_cmd_buffer(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "dcbx") == 0) {
+		return dcb_cmd_dcbx(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "ets") == 0) {
+		return dcb_cmd_ets(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "maxrate") == 0) {
+		return dcb_cmd_maxrate(dcb, argc - 1, argv + 1);
+	} else if (matches(*argv, "pfc") == 0) {
+		return dcb_cmd_pfc(dcb, argc - 1, argv + 1);
+	}
+
+	fprintf(stderr, "Object \"%s\" is unknown\n", *argv);
+	return -ENOENT;
+}
+
+static int dcb_batch_cmd(int argc, char *argv[], void *data)
+{
+	struct dcb *dcb = data;
+
+	return dcb_cmd(dcb, argc, argv);
+}
+
+static int dcb_batch(struct dcb *dcb, const char *name, bool force)
+{
+	return do_batch(name, force, dcb_batch_cmd, dcb);
+}
+
+int main(int argc, char **argv)
+{
+	static const struct option long_options[] = {
+		{ "Version",		no_argument,		NULL, 'V' },
+		{ "force",		no_argument,		NULL, 'f' },
+		{ "batch",		required_argument,	NULL, 'b' },
+		{ "iec",		no_argument,		NULL, 'i' },
+		{ "json",		no_argument,		NULL, 'j' },
+		{ "Numeric",		no_argument,		NULL, 'N' },
+		{ "pretty",		no_argument,		NULL, 'p' },
+		{ "statistics",		no_argument,		NULL, 's' },
+		{ "netns",		required_argument,	NULL, 'n' },
+		{ "help",		no_argument,		NULL, 'h' },
+		{ NULL, 0, NULL, 0 }
+	};
+	const char *batch_file = NULL;
+	bool force = false;
+	struct dcb *dcb;
+	int opt;
+	int err;
+	int ret;
+
+	dcb = dcb_alloc();
+	if (!dcb) {
+		fprintf(stderr, "Failed to allocate memory for dcb\n");
+		return EXIT_FAILURE;
+	}
+
+	while ((opt = getopt_long(argc, argv, "b:fhijn:psvNV",
+				  long_options, NULL)) >= 0) {
+
+		switch (opt) {
+		case 'V':
+			printf("dcb utility, iproute2-%s\n", version);
+			ret = EXIT_SUCCESS;
+			goto dcb_free;
+		case 'f':
+			force = true;
+			break;
+		case 'b':
+			batch_file = optarg;
+			break;
+		case 'j':
+			dcb->json_output = true;
+			break;
+		case 'N':
+			dcb->numeric = true;
+			break;
+		case 'p':
+			pretty = true;
+			break;
+		case 's':
+			dcb->stats = true;
+			break;
+		case 'n':
+			if (netns_switch(optarg)) {
+				ret = EXIT_FAILURE;
+				goto dcb_free;
+			}
+			break;
+		case 'i':
+			dcb->use_iec = true;
+			break;
+		case 'h':
+			dcb_help();
+			ret = EXIT_SUCCESS;
+			goto dcb_free;
+		default:
+			fprintf(stderr, "Unknown option.\n");
+			dcb_help();
+			ret = EXIT_FAILURE;
+			goto dcb_free;
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	err = dcb_init(dcb);
+	if (err) {
+		ret = EXIT_FAILURE;
+		goto dcb_free;
+	}
+
+	if (batch_file)
+		err = dcb_batch(dcb, batch_file, force);
+	else
+		err = dcb_cmd(dcb, argc, argv);
+
+	if (err) {
+		ret = EXIT_FAILURE;
+		goto dcb_fini;
+	}
+
+	ret = EXIT_SUCCESS;
+
+dcb_fini:
+	dcb_fini(dcb);
+dcb_free:
+	dcb_free(dcb);
+
+	return ret;
+}
--- a/dcb/dcb.h
+++ b/dcb/dcb.h
@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DCB_H__
+#define __DCB_H__ 1
+
+#include <libmnl/libmnl.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+/* dcb.c */
+
+struct dcb {
+	char *buf;
+	struct mnl_socket *nl;
+	bool json_output;
+	bool stats;
+	bool use_iec;
+	bool numeric;
+};
+
+int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key,
+		      const char *what_value, __u64 value, __u64 max_value,
+		      void (*set_array)(__u32 index, __u64 value, void *data),
+		      void *set_array_data);
+int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv,
+		      int (*and_then)(struct dcb *dcb, const char *dev,
+				      int argc, char **argv),
+		      void (*help)(void));
+
+void dcb_set_u8(__u32 key, __u64 value, void *data);
+void dcb_set_u32(__u32 key, __u64 value, void *data);
+void dcb_set_u64(__u32 key, __u64 value, void *data);
+
+int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr,
+		      void *data, size_t data_len);
+int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr,
+		      const void *data, size_t data_len);
+int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr,
+			 void **payload_p, __u16 *payload_len_p);
+int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev,
+			 int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data),
+			 void *data);
+int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr,
+			   void **payload_p, __u16 *payload_len_p);
+int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev,
+			   int attr, const void *data, size_t data_len,
+			   int response_attr);
+
+void dcb_print_named_array(const char *json_name, const char *fp_name,
+			   const __u8 *array, size_t size,
+			   void (*print_array)(const __u8 *, size_t));
+void dcb_print_array_u8(const __u8 *array, size_t size);
+void dcb_print_array_u64(const __u64 *array, size_t size);
+void dcb_print_array_on_off(const __u8 *array, size_t size);
+void dcb_print_array_kw(const __u8 *array, size_t array_size,
+			const char *const kw[], size_t kw_size);
+
+/* dcb_app.c */
+
+int dcb_cmd_app(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_buffer.c */
+
+int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_dcbx.c */
+
+int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_ets.c */
+
+int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_maxrate.c */
+
+int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv);
+
+/* dcb_pfc.c */
+
+int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv);
+
+#endif /* __DCB_H__ */
--- a/dcb/dcb_app.c
+++ b/dcb/dcb_app.c
@ -0,0 +1,795 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <libmnl/libmnl.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+#include "rt_names.h"
+
+static void dcb_app_help_add(void)
+{
+	fprintf(stderr,
+		"Usage: dcb app { add | del | replace } dev STRING\n"
+		"           [ default-prio PRIO ]\n"
+		"           [ ethtype-prio ET:PRIO ]\n"
+		"           [ stream-port-prio PORT:PRIO ]\n"
+		"           [ dgram-port-prio PORT:PRIO ]\n"
+		"           [ port-prio PORT:PRIO ]\n"
+		"           [ dscp-prio INTEGER:PRIO ]\n"
+		"\n"
+		" where PRIO := { 0 .. 7 }\n"
+		"       ET := { 0x600 .. 0xffff }\n"
+		"       PORT := { 1 .. 65535 }\n"
+		"       DSCP := { 0 .. 63 }\n"
+		"\n"
+	);
+}
+
+static void dcb_app_help_show_flush(void)
+{
+	fprintf(stderr,
+		"Usage: dcb app { show | flush } dev STRING\n"
+		"           [ default-prio ]\n"
+		"           [ ethtype-prio ]\n"
+		"           [ stream-port-prio ]\n"
+		"           [ dgram-port-prio ]\n"
+		"           [ port-prio ]\n"
+		"           [ dscp-prio ]\n"
+		"\n"
+	);
+}
+
+static void dcb_app_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb app help\n"
+		"\n"
+	);
+	dcb_app_help_show_flush();
+	dcb_app_help_add();
+}
+
+struct dcb_app_table {
+	struct dcb_app *apps;
+	size_t n_apps;
+};
+
+static void dcb_app_table_fini(struct dcb_app_table *tab)
+{
+	free(tab->apps);
+}
+
+static int dcb_app_table_push(struct dcb_app_table *tab, struct dcb_app *app)
+{
+	struct dcb_app *apps = realloc(tab->apps, (tab->n_apps + 1) * sizeof(*tab->apps));
+
+	if (apps == NULL) {
+		perror("Cannot allocate APP table");
+		return -ENOMEM;
+	}
+
+	tab->apps = apps;
+	tab->apps[tab->n_apps++] = *app;
+	return 0;
+}
+
+static void dcb_app_table_remove_existing(struct dcb_app_table *a,
+					  const struct dcb_app_table *b)
+{
+	size_t ia, ja;
+	size_t ib;
+
+	for (ia = 0, ja = 0; ia < a->n_apps; ia++) {
+		struct dcb_app *aa = &a->apps[ia];
+		bool found = false;
+
+		for (ib = 0; ib < b->n_apps; ib++) {
+			const struct dcb_app *ab = &b->apps[ib];
+
+			if (aa->selector == ab->selector &&
+			    aa->protocol == ab->protocol &&
+			    aa->priority == ab->priority) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			a->apps[ja++] = *aa;
+	}
+
+	a->n_apps = ja;
+}
+
+static void dcb_app_table_remove_replaced(struct dcb_app_table *a,
+					  const struct dcb_app_table *b)
+{
+	size_t ia, ja;
+	size_t ib;
+
+	for (ia = 0, ja = 0; ia < a->n_apps; ia++) {
+		struct dcb_app *aa = &a->apps[ia];
+		bool present = false;
+		bool found = false;
+
+		for (ib = 0; ib < b->n_apps; ib++) {
+			const struct dcb_app *ab = &b->apps[ib];
+
+			if (aa->selector == ab->selector &&
+			    aa->protocol == ab->protocol)
+				present = true;
+			else
+				continue;
+
+			if (aa->priority == ab->priority) {
+				found = true;
+				break;
+			}
+		}
+
+		/* Entries that remain in A will be removed, so keep in the
+		 * table only APP entries whose sel/pid is mentioned in B,
+		 * but that do not have the full sel/pid/prio match.
+		 */
+		if (present && !found)
+			a->apps[ja++] = *aa;
+	}
+
+	a->n_apps = ja;
+}
+
+static int dcb_app_table_copy(struct dcb_app_table *a,
+			      const struct dcb_app_table *b)
+{
+	size_t i;
+	int ret;
+
+	for (i = 0; i < b->n_apps; i++) {
+		ret = dcb_app_table_push(a, &b->apps[i]);
+		if (ret != 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int dcb_app_cmp(const struct dcb_app *a, const struct dcb_app *b)
+{
+	if (a->protocol < b->protocol)
+		return -1;
+	if (a->protocol > b->protocol)
+		return 1;
+	return a->priority - b->priority;
+}
+
+static int dcb_app_cmp_cb(const void *a, const void *b)
+{
+	return dcb_app_cmp(a, b);
+}
+
+static void dcb_app_table_sort(struct dcb_app_table *tab)
+{
+	qsort(tab->apps, tab->n_apps, sizeof(*tab->apps), dcb_app_cmp_cb);
+}
+
+struct dcb_app_parse_mapping {
+	__u8 selector;
+	struct dcb_app_table *tab;
+	int err;
+};
+
+static void dcb_app_parse_mapping_cb(__u32 key, __u64 value, void *data)
+{
+	struct dcb_app_parse_mapping *pm = data;
+	struct dcb_app app = {
+		.selector = pm->selector,
+		.priority = value,
+		.protocol = key,
+	};
+
+	if (pm->err)
+		return;
+
+	pm->err = dcb_app_table_push(pm->tab, &app);
+}
+
+static int dcb_app_parse_mapping_ethtype_prio(__u32 key, char *value, void *data)
+{
+	__u8 prio;
+
+	if (key < 0x600) {
+		fprintf(stderr, "Protocol IDs < 0x600 are reserved for EtherType\n");
+		return -EINVAL;
+	}
+
+	if (get_u8(&prio, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("ETHTYPE", key, 0xffff,
+				 "PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
+				 dcb_app_parse_mapping_cb, data);
+}
+
+static int dcb_app_parse_dscp(__u32 *key, const char *arg)
+{
+	if (parse_mapping_num_all(key, arg) == 0)
+		return 0;
+
+	if (rtnl_dsfield_a2n(key, arg) != 0)
+		return -1;
+
+	if (*key & 0x03) {
+		fprintf(stderr, "The values `%s' uses non-DSCP bits.\n", arg);
+		return -1;
+	}
+
+	/* Unshift the value to convert it from dsfield to DSCP. */
+	*key >>= 2;
+	return 0;
+}
+
+static int dcb_app_parse_mapping_dscp_prio(__u32 key, char *value, void *data)
+{
+	__u8 prio;
+
+	if (get_u8(&prio, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("DSCP", key, 63,
+				 "PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
+				 dcb_app_parse_mapping_cb, data);
+}
+
+static int dcb_app_parse_mapping_port_prio(__u32 key, char *value, void *data)
+{
+	__u8 prio;
+
+	if (key == 0) {
+		fprintf(stderr, "Port ID of 0 is invalid\n");
+		return -EINVAL;
+	}
+
+	if (get_u8(&prio, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("PORT", key, 0xffff,
+				 "PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
+				 dcb_app_parse_mapping_cb, data);
+}
+
+static int dcb_app_parse_default_prio(int *argcp, char ***argvp, struct dcb_app_table *tab)
+{
+	int argc = *argcp;
+	char **argv = *argvp;
+	int ret = 0;
+
+	while (argc > 0) {
+		struct dcb_app app;
+		__u8 prio;
+
+		if (get_u8(&prio, *argv, 0)) {
+			ret = 1;
+			break;
+		}
+
+		app = (struct dcb_app){
+			.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE,
+			.protocol = 0,
+			.priority = prio,
+		};
+		ret = dcb_app_table_push(tab, &app);
+		if (ret != 0)
+			break;
+
+		argc--, argv++;
+	}
+
+	*argcp = argc;
+	*argvp = argv;
+	return ret;
+}
+
+static bool dcb_app_is_ethtype(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+	       app->protocol != 0;
+}
+
+static bool dcb_app_is_default(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+	       app->protocol == 0;
+}
+
+static bool dcb_app_is_dscp(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_DSCP;
+}
+
+static bool dcb_app_is_stream_port(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_STREAM;
+}
+
+static bool dcb_app_is_dgram_port(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_DGRAM;
+}
+
+static bool dcb_app_is_port(const struct dcb_app *app)
+{
+	return app->selector == IEEE_8021QAZ_APP_SEL_ANY;
+}
+
+static int dcb_app_print_key_dec(__u16 protocol)
+{
+	return print_uint(PRINT_ANY, NULL, "%d:", protocol);
+}
+
+static int dcb_app_print_key_hex(__u16 protocol)
+{
+	return print_uint(PRINT_ANY, NULL, "%x:", protocol);
+}
+
+static int dcb_app_print_key_dscp(__u16 protocol)
+{
+	const char *name = rtnl_dsfield_get_name(protocol << 2);
+
+
+	if (!is_json_context() && name != NULL)
+		return print_string(PRINT_FP, NULL, "%s:", name);
+	return print_uint(PRINT_ANY, NULL, "%d:", protocol);
+}
+
+static void dcb_app_print_filtered(const struct dcb_app_table *tab,
+				   bool (*filter)(const struct dcb_app *),
+				   int (*print_key)(__u16 protocol),
+				   const char *json_name,
+				   const char *fp_name)
+{
+	bool first = true;
+	size_t i;
+
+	for (i = 0; i < tab->n_apps; i++) {
+		struct dcb_app *app = &tab->apps[i];
+
+		if (!filter(app))
+			continue;
+		if (first) {
+			open_json_array(PRINT_JSON, json_name);
+			print_string(PRINT_FP, NULL, "%s ", fp_name);
+			first = false;
+		}
+
+		open_json_array(PRINT_JSON, NULL);
+		print_key(app->protocol);
+		print_uint(PRINT_ANY, NULL, "%d ", app->priority);
+		close_json_array(PRINT_JSON, NULL);
+	}
+
+	if (!first) {
+		close_json_array(PRINT_JSON, json_name);
+		print_nl();
+	}
+}
+
+static void dcb_app_print_ethtype_prio(const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_ethtype,  dcb_app_print_key_hex,
+			       "ethtype_prio", "ethtype-prio");
+}
+
+static void dcb_app_print_dscp_prio(const struct dcb *dcb,
+				    const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_dscp,
+			       dcb->numeric ? dcb_app_print_key_dec
+					    : dcb_app_print_key_dscp,
+			       "dscp_prio", "dscp-prio");
+}
+
+static void dcb_app_print_stream_port_prio(const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_stream_port, dcb_app_print_key_dec,
+			       "stream_port_prio", "stream-port-prio");
+}
+
+static void dcb_app_print_dgram_port_prio(const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_dgram_port, dcb_app_print_key_dec,
+			       "dgram_port_prio", "dgram-port-prio");
+}
+
+static void dcb_app_print_port_prio(const struct dcb_app_table *tab)
+{
+	dcb_app_print_filtered(tab, dcb_app_is_port, dcb_app_print_key_dec,
+			       "port_prio", "port-prio");
+}
+
+static void dcb_app_print_default_prio(const struct dcb_app_table *tab)
+{
+	bool first = true;
+	size_t i;
+
+	for (i = 0; i < tab->n_apps; i++) {
+		if (!dcb_app_is_default(&tab->apps[i]))
+			continue;
+		if (first) {
+			open_json_array(PRINT_JSON, "default_prio");
+			print_string(PRINT_FP, NULL, "default-prio ", NULL);
+			first = false;
+		}
+		print_uint(PRINT_ANY, NULL, "%d ", tab->apps[i].priority);
+	}
+
+	if (!first) {
+		close_json_array(PRINT_JSON, "default_prio");
+		print_nl();
+	}
+}
+
+static void dcb_app_print(const struct dcb *dcb, const struct dcb_app_table *tab)
+{
+	dcb_app_print_ethtype_prio(tab);
+	dcb_app_print_default_prio(tab);
+	dcb_app_print_dscp_prio(dcb, tab);
+	dcb_app_print_stream_port_prio(tab);
+	dcb_app_print_dgram_port_prio(tab);
+	dcb_app_print_port_prio(tab);
+}
+
+static int dcb_app_get_table_attr_cb(const struct nlattr *attr, void *data)
+{
+	struct dcb_app_table *tab = data;
+	struct dcb_app *app;
+	int ret;
+
+	if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE_APP) {
+		fprintf(stderr, "Unknown attribute in DCB_ATTR_IEEE_APP_TABLE: %d\n",
+			mnl_attr_get_type(attr));
+		return MNL_CB_OK;
+	}
+	if (mnl_attr_get_payload_len(attr) < sizeof(struct dcb_app)) {
+		fprintf(stderr, "DCB_ATTR_IEEE_APP payload expected to have size %zd, not %d\n",
+			sizeof(struct dcb_app), mnl_attr_get_payload_len(attr));
+		return MNL_CB_OK;
+	}
+
+	app = mnl_attr_get_payload(attr);
+	ret = dcb_app_table_push(tab, app);
+	if (ret != 0)
+		return MNL_CB_ERROR;
+
+	return MNL_CB_OK;
+}
+
+static int dcb_app_get(struct dcb *dcb, const char *dev, struct dcb_app_table *tab)
+{
+	uint16_t payload_len;
+	void *payload;
+	int ret;
+
+	ret = dcb_get_attribute_va(dcb, dev, DCB_ATTR_IEEE_APP_TABLE, &payload, &payload_len);
+	if (ret != 0)
+		return ret;
+
+	ret = mnl_attr_parse_payload(payload, payload_len, dcb_app_get_table_attr_cb, tab);
+	if (ret != MNL_CB_OK)
+		return -EINVAL;
+
+	return 0;
+}
+
+struct dcb_app_add_del {
+	const struct dcb_app_table *tab;
+	bool (*filter)(const struct dcb_app *app);
+};
+
+static int dcb_app_add_del_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
+{
+	struct dcb_app_add_del *add_del = data;
+	struct nlattr *nest;
+	size_t i;
+
+	nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE_APP_TABLE);
+
+	for (i = 0; i < add_del->tab->n_apps; i++) {
+		const struct dcb_app *app = &add_del->tab->apps[i];
+
+		if (add_del->filter == NULL || add_del->filter(app))
+			mnl_attr_put(nlh, DCB_ATTR_IEEE_APP, sizeof(*app), app);
+	}
+
+	mnl_attr_nest_end(nlh, nest);
+	return 0;
+}
+
+static int dcb_app_add_del(struct dcb *dcb, const char *dev, int command,
+			   const struct dcb_app_table *tab,
+			   bool (*filter)(const struct dcb_app *))
+{
+	struct dcb_app_add_del add_del = {
+		.tab = tab,
+		.filter = filter,
+	};
+
+	if (tab->n_apps == 0)
+		return 0;
+
+	return dcb_set_attribute_va(dcb, command, dev, dcb_app_add_del_cb, &add_del);
+}
+
+static int dcb_cmd_app_parse_add_del(struct dcb *dcb, const char *dev,
+				     int argc, char **argv, struct dcb_app_table *tab)
+{
+	struct dcb_app_parse_mapping pm = {
+		.tab = tab,
+	};
+	int ret;
+
+	if (!argc) {
+		dcb_app_help_add();
+		return 0;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_app_help_add();
+			return 0;
+		} else if (matches(*argv, "ethtype-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
+			ret = parse_mapping(&argc, &argv, false,
+					    &dcb_app_parse_mapping_ethtype_prio,
+					    &pm);
+		} else if (matches(*argv, "default-prio") == 0) {
+			NEXT_ARG();
+			ret = dcb_app_parse_default_prio(&argc, &argv, pm.tab);
+			if (ret != 0) {
+				fprintf(stderr, "Invalid default priority %s\n", *argv);
+				return ret;
+			}
+		} else if (matches(*argv, "dscp-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+			ret = parse_mapping_gen(&argc, &argv,
+						&dcb_app_parse_dscp,
+						&dcb_app_parse_mapping_dscp_prio,
+						&pm);
+		} else if (matches(*argv, "stream-port-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_STREAM;
+			ret = parse_mapping(&argc, &argv, false,
+					    &dcb_app_parse_mapping_port_prio,
+					    &pm);
+		} else if (matches(*argv, "dgram-port-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
+			ret = parse_mapping(&argc, &argv, false,
+					    &dcb_app_parse_mapping_port_prio,
+					    &pm);
+		} else if (matches(*argv, "port-prio") == 0) {
+			NEXT_ARG();
+			pm.selector = IEEE_8021QAZ_APP_SEL_ANY;
+			ret = parse_mapping(&argc, &argv, false,
+					    &dcb_app_parse_mapping_port_prio,
+					    &pm);
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_app_help_add();
+			return -EINVAL;
+		}
+
+		if (ret != 0) {
+			fprintf(stderr, "Invalid mapping %s\n", *argv);
+			return ret;
+		}
+		if (pm.err)
+			return pm.err;
+	} while (argc > 0);
+
+	return 0;
+}
+
+static int dcb_cmd_app_add(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table tab = {};
+	int ret;
+
+	ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
+	if (ret != 0)
+		return ret;
+
+	ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &tab, NULL);
+	dcb_app_table_fini(&tab);
+	return ret;
+}
+
+static int dcb_cmd_app_del(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table tab = {};
+	int ret;
+
+	ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
+	if (ret != 0)
+		return ret;
+
+	ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL);
+	dcb_app_table_fini(&tab);
+	return ret;
+}
+
+static int dcb_cmd_app_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table tab = {};
+	int ret;
+
+	ret = dcb_app_get(dcb, dev, &tab);
+	if (ret != 0)
+		return ret;
+
+	dcb_app_table_sort(&tab);
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_app_print(dcb, &tab);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_app_help_show_flush();
+			goto out;
+		} else if (matches(*argv, "ethtype-prio") == 0) {
+			dcb_app_print_ethtype_prio(&tab);
+		} else if (matches(*argv, "dscp-prio") == 0) {
+			dcb_app_print_dscp_prio(dcb, &tab);
+		} else if (matches(*argv, "stream-port-prio") == 0) {
+			dcb_app_print_stream_port_prio(&tab);
+		} else if (matches(*argv, "dgram-port-prio") == 0) {
+			dcb_app_print_dgram_port_prio(&tab);
+		} else if (matches(*argv, "port-prio") == 0) {
+			dcb_app_print_port_prio(&tab);
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_app_help_show_flush();
+			ret = -EINVAL;
+			goto out;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	dcb_app_table_fini(&tab);
+	return ret;
+}
+
+static int dcb_cmd_app_flush(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table tab = {};
+	int ret;
+
+	ret = dcb_app_get(dcb, dev, &tab);
+	if (ret != 0)
+		return ret;
+
+	if (!argc) {
+		ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_app_help_show_flush();
+			goto out;
+		} else if (matches(*argv, "ethtype-prio") == 0) {
+			ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
+					      &dcb_app_is_ethtype);
+			if (ret != 0)
+				goto out;
+		} else if (matches(*argv, "default-prio") == 0) {
+			ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
+					      &dcb_app_is_default);
+			if (ret != 0)
+				goto out;
+		} else if (matches(*argv, "dscp-prio") == 0) {
+			ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
+					      &dcb_app_is_dscp);
+			if (ret != 0)
+				goto out;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_app_help_show_flush();
+			ret = -EINVAL;
+			goto out;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	dcb_app_table_fini(&tab);
+	return ret;
+}
+
+static int dcb_cmd_app_replace(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcb_app_table orig = {};
+	struct dcb_app_table tab = {};
+	struct dcb_app_table new = {};
+	int ret;
+
+	ret = dcb_app_get(dcb, dev, &orig);
+	if (ret != 0)
+		return ret;
+
+	ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
+	if (ret != 0)
+		goto out;
+
+	/* Attempts to add an existing entry would be rejected, so drop
+	 * these entries from tab.
+	 */
+	ret = dcb_app_table_copy(&new, &tab);
+	if (ret != 0)
+		goto out;
+	dcb_app_table_remove_existing(&new, &orig);
+
+	ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &new, NULL);
+	if (ret != 0) {
+		fprintf(stderr, "Could not add new APP entries\n");
+		goto out;
+	}
+
+	/* Remove the obsolete entries. */
+	dcb_app_table_remove_replaced(&orig, &tab);
+	ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &orig, NULL);
+	if (ret != 0) {
+		fprintf(stderr, "Could not remove replaced APP entries\n");
+		goto out;
+	}
+
+out:
+	dcb_app_table_fini(&new);
+	dcb_app_table_fini(&tab);
+	dcb_app_table_fini(&orig);
+	return 0;
+}
+
+int dcb_cmd_app(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_app_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_show, dcb_app_help_show_flush);
+	} else if (matches(*argv, "flush") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_flush, dcb_app_help_show_flush);
+	} else if (matches(*argv, "add") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_add, dcb_app_help_add);
+	} else if (matches(*argv, "del") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_del, dcb_app_help_add);
+	} else if (matches(*argv, "replace") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_app_replace, dcb_app_help_add);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_app_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_buffer.c
+++ b/dcb/dcb_buffer.c
@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_buffer_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb buffer set dev STRING\n"
+		"           [ prio-buffer PRIO-MAP ]\n"
+		"           [ buffer-size SIZE-MAP ]\n"
+		"\n"
+		" where PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n"
+		"       PRIO-MAPPING := { all | PRIO }:BUFFER\n"
+		"       SIZE-MAP := [ SIZE-MAP ] SIZE-MAPPING\n"
+		"       SIZE-MAPPING := { all | BUFFER }:INTEGER\n"
+		"       PRIO := { 0 .. 7 }\n"
+		"       BUFFER := { 0 .. 7 }\n"
+		"\n"
+	);
+}
+
+static void dcb_buffer_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb buffer show dev STRING\n"
+		"           [ prio-buffer ] [ buffer-size ] [ total-size ]\n"
+		"\n"
+	);
+}
+
+static void dcb_buffer_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb buffer help\n"
+		"\n"
+	);
+	dcb_buffer_help_show();
+	dcb_buffer_help_set();
+}
+
+static int dcb_buffer_parse_mapping_prio_buffer(__u32 key, char *value, void *data)
+{
+	struct dcbnl_buffer *buffer = data;
+	__u8 buf;
+
+	if (get_u8(&buf, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("PRIO", key, IEEE_8021Q_MAX_PRIORITIES - 1,
+				 "BUFFER", buf, DCBX_MAX_BUFFERS - 1,
+				 dcb_set_u8, buffer->prio2buffer);
+}
+
+static int dcb_buffer_parse_mapping_buffer_size(__u32 key, char *value, void *data)
+{
+	struct dcbnl_buffer *buffer = data;
+	unsigned int size;
+
+	if (get_size(&size, value)) {
+		fprintf(stderr, "%d:%s: Illegal value for buffer size\n", key, value);
+		return -EINVAL;
+	}
+
+	return dcb_parse_mapping("BUFFER", key, DCBX_MAX_BUFFERS - 1,
+				 "INTEGER", size, -1,
+				 dcb_set_u32, buffer->buffer_size);
+}
+
+static void dcb_buffer_print_total_size(const struct dcbnl_buffer *buffer)
+{
+	print_size(PRINT_ANY, "total_size", "total-size %s ", buffer->total_size);
+}
+
+static void dcb_buffer_print_prio_buffer(const struct dcbnl_buffer *buffer)
+{
+	dcb_print_named_array("prio_buffer", "prio-buffer",
+			      buffer->prio2buffer, ARRAY_SIZE(buffer->prio2buffer),
+			      dcb_print_array_u8);
+}
+
+static void dcb_buffer_print_buffer_size(const struct dcbnl_buffer *buffer)
+{
+	size_t size = ARRAY_SIZE(buffer->buffer_size);
+	SPRINT_BUF(b);
+	size_t i;
+
+	open_json_array(PRINT_JSON, "buffer_size");
+	print_string(PRINT_FP, NULL, "buffer-size ", NULL);
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%s ", i);
+		print_size(PRINT_ANY, NULL, b, buffer->buffer_size[i]);
+	}
+
+	close_json_array(PRINT_JSON, "buffer_size");
+}
+
+static void dcb_buffer_print(const struct dcbnl_buffer *buffer)
+{
+	dcb_buffer_print_prio_buffer(buffer);
+	print_nl();
+
+	dcb_buffer_print_buffer_size(buffer);
+	print_nl();
+
+	dcb_buffer_print_total_size(buffer);
+	print_nl();
+}
+
+static int dcb_buffer_get(struct dcb *dcb, const char *dev, struct dcbnl_buffer *buffer)
+{
+	return dcb_get_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer));
+}
+
+static int dcb_buffer_set(struct dcb *dcb, const char *dev, const struct dcbnl_buffer *buffer)
+{
+	return dcb_set_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer));
+}
+
+static int dcb_cmd_buffer_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcbnl_buffer buffer;
+	int ret;
+
+	if (!argc) {
+		dcb_buffer_help_set();
+		return 0;
+	}
+
+	ret = dcb_buffer_get(dcb, dev, &buffer);
+	if (ret)
+		return ret;
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_buffer_help_set();
+			return 0;
+		} else if (matches(*argv, "prio-buffer") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true,
+					    &dcb_buffer_parse_mapping_prio_buffer, &buffer);
+			if (ret) {
+				fprintf(stderr, "Invalid priority mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "buffer-size") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true,
+					    &dcb_buffer_parse_mapping_buffer_size, &buffer);
+			if (ret) {
+				fprintf(stderr, "Invalid buffer size mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_buffer_help_set();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+	return dcb_buffer_set(dcb, dev, &buffer);
+}
+
+static int dcb_cmd_buffer_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct dcbnl_buffer buffer;
+	int ret;
+
+	ret = dcb_buffer_get(dcb, dev, &buffer);
+	if (ret)
+		return ret;
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_buffer_print(&buffer);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_buffer_help_show();
+			return 0;
+		} else if (matches(*argv, "prio-buffer") == 0) {
+			dcb_buffer_print_prio_buffer(&buffer);
+			print_nl();
+		} else if (matches(*argv, "buffer-size") == 0) {
+			dcb_buffer_print_buffer_size(&buffer);
+			print_nl();
+		} else if (matches(*argv, "total-size") == 0) {
+			dcb_buffer_print_total_size(&buffer);
+			print_nl();
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_buffer_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_buffer_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_buffer_show, dcb_buffer_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_buffer_set, dcb_buffer_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_buffer_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_dcbx.c
+++ b/dcb/dcb_dcbx.c
@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_dcbx_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb dcbx set dev STRING\n"
+		"           [ host | lld-managed ]\n"
+		"           [ cee | ieee ] [ static ]\n"
+		"\n"
+	);
+}
+
+static void dcb_dcbx_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb dcbx show dev STRING\n"
+		"\n"
+	);
+}
+
+static void dcb_dcbx_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb dcbx help\n"
+		"\n"
+	);
+	dcb_dcbx_help_show();
+	dcb_dcbx_help_set();
+}
+
+struct dcb_dcbx_flag {
+	__u8 value;
+	const char *key_fp;
+	const char *key_json;
+};
+
+static struct dcb_dcbx_flag dcb_dcbx_flags[] = {
+	{DCB_CAP_DCBX_HOST, "host"},
+	{DCB_CAP_DCBX_LLD_MANAGED, "lld-managed", "lld_managed"},
+	{DCB_CAP_DCBX_VER_CEE, "cee"},
+	{DCB_CAP_DCBX_VER_IEEE, "ieee"},
+	{DCB_CAP_DCBX_STATIC, "static"},
+};
+
+static void dcb_dcbx_print(__u8 dcbx)
+{
+	int bit;
+	int i;
+
+	while ((bit = ffs(dcbx))) {
+		bool found = false;
+
+		bit--;
+		for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) {
+			struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i];
+
+			if (flag->value == 1 << bit) {
+				print_bool(PRINT_JSON, flag->key_json ?: flag->key_fp,
+					   NULL, true);
+				print_string(PRINT_FP, NULL, "%s ", flag->key_fp);
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			fprintf(stderr, "Unknown DCBX bit %#x.\n", 1 << bit);
+
+		dcbx &= ~(1 << bit);
+	}
+
+	print_nl();
+}
+
+static int dcb_dcbx_get(struct dcb *dcb, const char *dev, __u8 *dcbx)
+{
+	__u16 payload_len;
+	void *payload;
+	int err;
+
+	err = dcb_get_attribute_bare(dcb, DCB_CMD_IEEE_GET, dev, DCB_ATTR_DCBX,
+				     &payload, &payload_len);
+	if (err != 0)
+		return err;
+
+	if (payload_len != 1) {
+		fprintf(stderr, "DCB_ATTR_DCBX payload has size %d, expected 1.\n",
+			payload_len);
+		return -EINVAL;
+	}
+	*dcbx = *(__u8 *) payload;
+	return 0;
+}
+
+static int dcb_dcbx_set(struct dcb *dcb, const char *dev, __u8 dcbx)
+{
+	return dcb_set_attribute_bare(dcb, DCB_CMD_SDCBX, dev, DCB_ATTR_DCBX,
+				      &dcbx, 1, DCB_ATTR_DCBX);
+}
+
+static int dcb_cmd_dcbx_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	__u8 dcbx = 0;
+	__u8 i;
+
+	if (!argc) {
+		dcb_dcbx_help_set();
+		return 0;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_dcbx_help_set();
+			return 0;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) {
+			struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i];
+
+			if (matches(*argv, flag->key_fp) == 0) {
+				dcbx |= flag->value;
+				NEXT_ARG_FWD();
+				goto next;
+			}
+		}
+
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_dcbx_help_set();
+		return -EINVAL;
+
+next:
+		;
+	} while (argc > 0);
+
+	return dcb_dcbx_set(dcb, dev, dcbx);
+}
+
+static int dcb_cmd_dcbx_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	__u8 dcbx;
+	int ret;
+
+	ret = dcb_dcbx_get(dcb, dev, &dcbx);
+	if (ret != 0)
+		return ret;
+
+	while (argc > 0) {
+		if (matches(*argv, "help") == 0) {
+			dcb_dcbx_help_show();
+			return 0;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_dcbx_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	}
+
+	open_json_object(NULL);
+	dcb_dcbx_print(dcbx);
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_dcbx_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_dcbx_show, dcb_dcbx_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_dcbx_set, dcb_dcbx_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_dcbx_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_ets.c
+++ b/dcb/dcb_ets.c
@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_ets_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb ets set dev STRING\n"
+		"           [ willing { on | off } ]\n"
+		"           [ { tc-tsa | reco-tc-tsa } TSA-MAP ]\n"
+		"           [ { pg-bw | tc-bw | reco-tc-bw } BW-MAP ]\n"
+		"           [ { prio-tc | reco-prio-tc } PRIO-MAP ]\n"
+		"\n"
+		" where TSA-MAP := [ TSA-MAP ] TSA-MAPPING\n"
+		"       TSA-MAPPING := { all | TC }:{ strict | cbs | ets | vendor }\n"
+		"       BW-MAP := [ BW-MAP ] BW-MAPPING\n"
+		"       BW-MAPPING := { all | TC }:INTEGER\n"
+		"       PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n"
+		"       PRIO-MAPPING := { all | PRIO }:TC\n"
+		"       TC := { 0 .. 7 }\n"
+		"       PRIO := { 0 .. 7 }\n"
+		"\n"
+	);
+}
+
+static void dcb_ets_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb ets show dev STRING\n"
+		"           [ willing ] [ ets-cap ] [ cbs ] [ tc-tsa ]\n"
+		"           [ reco-tc-tsa ] [ pg-bw ] [ tc-bw ] [ reco-tc-bw ]\n"
+		"           [ prio-tc ] [ reco-prio-tc ]\n"
+		"\n"
+	);
+}
+
+static void dcb_ets_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb ets help\n"
+		"\n"
+	);
+	dcb_ets_help_show();
+	dcb_ets_help_set();
+}
+
+static const char *const tsa_names[] = {
+	[IEEE_8021QAZ_TSA_STRICT] = "strict",
+	[IEEE_8021QAZ_TSA_CB_SHAPER] = "cbs",
+	[IEEE_8021QAZ_TSA_ETS] = "ets",
+	[IEEE_8021QAZ_TSA_VENDOR] = "vendor",
+};
+
+static int dcb_ets_parse_mapping_tc_tsa(__u32 key, char *value, void *data)
+{
+	__u8 tsa;
+	int ret;
+
+	tsa = parse_one_of("TSA", value, tsa_names, ARRAY_SIZE(tsa_names), &ret);
+	if (ret)
+		return ret;
+
+	return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
+				 "TSA", tsa, -1U,
+				 dcb_set_u8, data);
+}
+
+static int dcb_ets_parse_mapping_tc_bw(__u32 key, char *value, void *data)
+{
+	__u8 bw;
+
+	if (get_u8(&bw, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
+				 "BW", bw, 100,
+				 dcb_set_u8, data);
+}
+
+static int dcb_ets_parse_mapping_prio_tc(unsigned int key, char *value, void *data)
+{
+	__u8 tc;
+
+	if (get_u8(&tc, value, 0))
+		return -EINVAL;
+
+	return dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1,
+				 "TC", tc, IEEE_8021QAZ_MAX_TCS - 1,
+				 dcb_set_u8, data);
+}
+
+static void dcb_print_array_tsa(const __u8 *array, size_t size)
+{
+	dcb_print_array_kw(array, size, tsa_names, ARRAY_SIZE(tsa_names));
+}
+
+static void dcb_ets_print_willing(const struct ieee_ets *ets)
+{
+	print_on_off(PRINT_ANY, "willing", "willing %s ", ets->willing);
+}
+
+static void dcb_ets_print_ets_cap(const struct ieee_ets *ets)
+{
+	print_uint(PRINT_ANY, "ets_cap", "ets-cap %d ", ets->ets_cap);
+}
+
+static void dcb_ets_print_cbs(const struct ieee_ets *ets)
+{
+	print_on_off(PRINT_ANY, "cbs", "cbs %s ", ets->cbs);
+}
+
+static void dcb_ets_print_tc_bw(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("tc_bw", "tc-bw",
+			      ets->tc_tx_bw, ARRAY_SIZE(ets->tc_tx_bw),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print_pg_bw(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("pg_bw", "pg-bw",
+			      ets->tc_rx_bw, ARRAY_SIZE(ets->tc_rx_bw),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print_tc_tsa(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("tc_tsa", "tc-tsa",
+			      ets->tc_tsa, ARRAY_SIZE(ets->tc_tsa),
+			      dcb_print_array_tsa);
+}
+
+static void dcb_ets_print_prio_tc(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("prio_tc", "prio-tc",
+			      ets->prio_tc, ARRAY_SIZE(ets->prio_tc),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print_reco_tc_bw(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("reco_tc_bw", "reco-tc-bw",
+			      ets->tc_reco_bw, ARRAY_SIZE(ets->tc_reco_bw),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print_reco_tc_tsa(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("reco_tc_tsa", "reco-tc-tsa",
+			      ets->tc_reco_tsa, ARRAY_SIZE(ets->tc_reco_tsa),
+			      dcb_print_array_tsa);
+}
+
+static void dcb_ets_print_reco_prio_tc(const struct ieee_ets *ets)
+{
+	dcb_print_named_array("reco_prio_tc", "reco-prio-tc",
+			      ets->reco_prio_tc, ARRAY_SIZE(ets->reco_prio_tc),
+			      dcb_print_array_u8);
+}
+
+static void dcb_ets_print(const struct ieee_ets *ets)
+{
+	dcb_ets_print_willing(ets);
+	dcb_ets_print_ets_cap(ets);
+	dcb_ets_print_cbs(ets);
+	print_nl();
+
+	dcb_ets_print_tc_bw(ets);
+	print_nl();
+
+	dcb_ets_print_pg_bw(ets);
+	print_nl();
+
+	dcb_ets_print_tc_tsa(ets);
+	print_nl();
+
+	dcb_ets_print_prio_tc(ets);
+	print_nl();
+
+	dcb_ets_print_reco_tc_bw(ets);
+	print_nl();
+
+	dcb_ets_print_reco_tc_tsa(ets);
+	print_nl();
+
+	dcb_ets_print_reco_prio_tc(ets);
+	print_nl();
+}
+
+static int dcb_ets_get(struct dcb *dcb, const char *dev, struct ieee_ets *ets)
+{
+	return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets));
+}
+
+static int dcb_ets_validate_bw(const __u8 bw[], const __u8 tsa[], const char *what)
+{
+	bool has_ets = false;
+	unsigned int total = 0;
+	unsigned int tc;
+
+	for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) {
+		if (tsa[tc] == IEEE_8021QAZ_TSA_ETS) {
+			has_ets = true;
+			break;
+		}
+	}
+
+	/* TC bandwidth is only intended for ETS, but 802.1Q-2018 only requires
+	 * that the sum be 100, and individual entries 0..100. It explicitly
+	 * notes that non-ETS TCs can have non-0 TC bandwidth during
+	 * reconfiguration.
+	 */
+	for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) {
+		if (bw[tc] > 100) {
+			fprintf(stderr, "%d%% for TC %d of %s is not a valid bandwidth percentage, expected 0..100%%\n",
+				bw[tc], tc, what);
+			return -EINVAL;
+		}
+		total += bw[tc];
+	}
+
+	/* This is what 802.1Q-2018 requires. */
+	if (total == 100)
+		return 0;
+
+	/* But this requirement does not make sense for all-strict
+	 * configurations. Anything else than 0 does not make sense: either BW
+	 * has not been reconfigured for the all-strict allocation yet, at which
+	 * point we expect sum of 100. Or it has already been reconfigured, at
+	 * which point accept 0.
+	 */
+	if (!has_ets && total == 0)
+		return 0;
+
+	fprintf(stderr, "Bandwidth percentages in %s sum to %d%%, expected %d%%\n",
+		what, total, has_ets ? 100 : 0);
+	return -EINVAL;
+}
+
+static int dcb_ets_set(struct dcb *dcb, const char *dev, const struct ieee_ets *ets)
+{
+	/* Do not validate pg-bw, which is not standard and has unclear
+	 * meaning.
+	 */
+	if (dcb_ets_validate_bw(ets->tc_tx_bw, ets->tc_tsa, "tc-bw") ||
+	    dcb_ets_validate_bw(ets->tc_reco_bw, ets->tc_reco_tsa, "reco-tc-bw"))
+		return -EINVAL;
+
+	return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets));
+}
+
+static int dcb_cmd_ets_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_ets ets;
+	int ret;
+
+	if (!argc) {
+		dcb_ets_help_set();
+		return 1;
+	}
+
+	ret = dcb_ets_get(dcb, dev, &ets);
+	if (ret)
+		return ret;
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_ets_help_set();
+			return 0;
+		} else if (matches(*argv, "willing") == 0) {
+			NEXT_ARG();
+			ets.willing = parse_on_off("willing", *argv, &ret);
+			if (ret)
+				return ret;
+		} else if (matches(*argv, "tc-tsa") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa,
+					    ets.tc_tsa);
+			if (ret) {
+				fprintf(stderr, "Invalid tc-tsa mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "reco-tc-tsa") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa,
+					    ets.tc_reco_tsa);
+			if (ret) {
+				fprintf(stderr, "Invalid reco-tc-tsa mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "tc-bw") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
+					    ets.tc_tx_bw);
+			if (ret) {
+				fprintf(stderr, "Invalid tc-bw mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "pg-bw") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
+					    ets.tc_rx_bw);
+			if (ret) {
+				fprintf(stderr, "Invalid pg-bw mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "reco-tc-bw") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
+					    ets.tc_reco_bw);
+			if (ret) {
+				fprintf(stderr, "Invalid reco-tc-bw mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "prio-tc") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc,
+					    ets.prio_tc);
+			if (ret) {
+				fprintf(stderr, "Invalid prio-tc mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "reco-prio-tc") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc,
+					    ets.reco_prio_tc);
+			if (ret) {
+				fprintf(stderr, "Invalid reco-prio-tc mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_ets_help_set();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+	return dcb_ets_set(dcb, dev, &ets);
+}
+
+static int dcb_cmd_ets_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_ets ets;
+	int ret;
+
+	ret = dcb_ets_get(dcb, dev, &ets);
+	if (ret)
+		return ret;
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_ets_print(&ets);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_ets_help_show();
+			return 0;
+		} else if (matches(*argv, "willing") == 0) {
+			dcb_ets_print_willing(&ets);
+			print_nl();
+		} else if (matches(*argv, "ets-cap") == 0) {
+			dcb_ets_print_ets_cap(&ets);
+			print_nl();
+		} else if (matches(*argv, "cbs") == 0) {
+			dcb_ets_print_cbs(&ets);
+			print_nl();
+		} else if (matches(*argv, "tc-tsa") == 0) {
+			dcb_ets_print_tc_tsa(&ets);
+			print_nl();
+		} else if (matches(*argv, "reco-tc-tsa") == 0) {
+			dcb_ets_print_reco_tc_tsa(&ets);
+			print_nl();
+		} else if (matches(*argv, "tc-bw") == 0) {
+			dcb_ets_print_tc_bw(&ets);
+			print_nl();
+		} else if (matches(*argv, "pg-bw") == 0) {
+			dcb_ets_print_pg_bw(&ets);
+			print_nl();
+		} else if (matches(*argv, "reco-tc-bw") == 0) {
+			dcb_ets_print_reco_tc_bw(&ets);
+			print_nl();
+		} else if (matches(*argv, "prio-tc") == 0) {
+			dcb_ets_print_prio_tc(&ets);
+			print_nl();
+		} else if (matches(*argv, "reco-prio-tc") == 0) {
+			dcb_ets_print_reco_prio_tc(&ets);
+			print_nl();
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_ets_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_ets_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_show, dcb_ets_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_set, dcb_ets_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_ets_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_maxrate.c
+++ b/dcb/dcb_maxrate.c
@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_maxrate_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb maxrate set dev STRING\n"
+		"           [ tc-maxrate RATE-MAP ]\n"
+		"\n"
+		" where RATE-MAP := [ RATE-MAP ] RATE-MAPPING\n"
+		"       RATE-MAPPING := { all | TC }:RATE\n"
+		"       TC := { 0 .. 7 }\n"
+		"\n"
+	);
+}
+
+static void dcb_maxrate_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb [ -i ] maxrate show dev STRING\n"
+		"           [ tc-maxrate ]\n"
+		"\n"
+	);
+}
+
+static void dcb_maxrate_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb maxrate help\n"
+		"\n"
+	);
+	dcb_maxrate_help_show();
+	dcb_maxrate_help_set();
+}
+
+static int dcb_maxrate_parse_mapping_tc_maxrate(__u32 key, char *value, void *data)
+{
+	__u64 rate;
+
+	if (get_rate64(&rate, value))
+		return -EINVAL;
+
+	return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
+				 "RATE", rate, -1,
+				 dcb_set_u64, data);
+}
+
+static void dcb_maxrate_print_tc_maxrate(struct dcb *dcb, const struct ieee_maxrate *maxrate)
+{
+	size_t size = ARRAY_SIZE(maxrate->tc_maxrate);
+	SPRINT_BUF(b);
+	size_t i;
+
+	open_json_array(PRINT_JSON, "tc_maxrate");
+	print_string(PRINT_FP, NULL, "tc-maxrate ", NULL);
+
+	for (i = 0; i < size; i++) {
+		snprintf(b, sizeof(b), "%zd:%%s ", i);
+		print_rate(dcb->use_iec, PRINT_ANY, NULL, b, maxrate->tc_maxrate[i]);
+	}
+
+	close_json_array(PRINT_JSON, "tc_maxrate");
+}
+
+static void dcb_maxrate_print(struct dcb *dcb, const struct ieee_maxrate *maxrate)
+{
+	dcb_maxrate_print_tc_maxrate(dcb, maxrate);
+	print_nl();
+}
+
+static int dcb_maxrate_get(struct dcb *dcb, const char *dev, struct ieee_maxrate *maxrate)
+{
+	return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate));
+}
+
+static int dcb_maxrate_set(struct dcb *dcb, const char *dev, const struct ieee_maxrate *maxrate)
+{
+	return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate));
+}
+
+static int dcb_cmd_maxrate_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_maxrate maxrate;
+	int ret;
+
+	if (!argc) {
+		dcb_maxrate_help_set();
+		return 0;
+	}
+
+	ret = dcb_maxrate_get(dcb, dev, &maxrate);
+	if (ret)
+		return ret;
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_maxrate_help_set();
+			return 0;
+		} else if (matches(*argv, "tc-maxrate") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true,
+					    &dcb_maxrate_parse_mapping_tc_maxrate, &maxrate);
+			if (ret) {
+				fprintf(stderr, "Invalid mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_maxrate_help_set();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+	return dcb_maxrate_set(dcb, dev, &maxrate);
+}
+
+static int dcb_cmd_maxrate_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_maxrate maxrate;
+	int ret;
+
+	ret = dcb_maxrate_get(dcb, dev, &maxrate);
+	if (ret)
+		return ret;
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_maxrate_print(dcb, &maxrate);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_maxrate_help_show();
+			return 0;
+		} else if (matches(*argv, "tc-maxrate") == 0) {
+			dcb_maxrate_print_tc_maxrate(dcb, &maxrate);
+			print_nl();
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_maxrate_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_maxrate_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_maxrate_show, dcb_maxrate_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_maxrate_set, dcb_maxrate_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_maxrate_help();
+		return -EINVAL;
+	}
+}
--- a/dcb/dcb_pfc.c
+++ b/dcb/dcb_pfc.c
@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <stdio.h>
+#include <linux/dcbnl.h>
+
+#include "dcb.h"
+#include "utils.h"
+
+static void dcb_pfc_help_set(void)
+{
+	fprintf(stderr,
+		"Usage: dcb pfc set dev STRING\n"
+		"           [ prio-pfc PFC-MAP ]\n"
+		"           [ macsec-bypass { on | off } ]\n"
+		"           [ delay INTEGER ]\n"
+		"\n"
+		" where PFC-MAP := [ PFC-MAP ] PFC-MAPPING\n"
+		"       PFC-MAPPING := { all | TC }:PFC\n"
+		"       TC := { 0 .. 7 }\n"
+		"       PFC := { on | off }\n"
+		"\n"
+	);
+}
+
+static void dcb_pfc_help_show(void)
+{
+	fprintf(stderr,
+		"Usage: dcb [ -s ] pfc show dev STRING\n"
+		"           [ pfc-cap ] [ prio-pfc ] [ macsec-bypass ]\n"
+		"           [ delay ] [ requests ] [ indications ]\n"
+		"\n"
+	);
+}
+
+static void dcb_pfc_help(void)
+{
+	fprintf(stderr,
+		"Usage: dcb pfc help\n"
+		"\n"
+	);
+	dcb_pfc_help_show();
+	dcb_pfc_help_set();
+}
+
+static void dcb_pfc_to_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 pfc_en)
+{
+	int i;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		array[i] = !!(pfc_en & (1 << i));
+}
+
+static void dcb_pfc_from_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 *pfc_en_p)
+{
+	__u8 pfc_en = 0;
+	int i;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (array[i])
+			pfc_en |= 1 << i;
+	}
+
+	*pfc_en_p = pfc_en;
+}
+
+static int dcb_pfc_parse_mapping_prio_pfc(__u32 key, char *value, void *data)
+{
+	struct ieee_pfc *pfc = data;
+	__u8 pfc_en[IEEE_8021QAZ_MAX_TCS];
+	bool enabled;
+	int ret;
+
+	dcb_pfc_to_array(pfc_en, pfc->pfc_en);
+
+	enabled = parse_on_off("PFC", value, &ret);
+	if (ret)
+		return ret;
+
+	ret = dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1,
+				"PFC", enabled, -1,
+				dcb_set_u8, pfc_en);
+	if (ret)
+		return ret;
+
+	dcb_pfc_from_array(pfc_en, &pfc->pfc_en);
+	return 0;
+}
+
+static void dcb_pfc_print_pfc_cap(const struct ieee_pfc *pfc)
+{
+	print_uint(PRINT_ANY, "pfc_cap", "pfc-cap %d ", pfc->pfc_cap);
+}
+
+static void dcb_pfc_print_macsec_bypass(const struct ieee_pfc *pfc)
+{
+	print_on_off(PRINT_ANY, "macsec_bypass", "macsec-bypass %s ", pfc->mbc);
+}
+
+static void dcb_pfc_print_delay(const struct ieee_pfc *pfc)
+{
+	print_uint(PRINT_ANY, "delay", "delay %d ", pfc->delay);
+}
+
+static void dcb_pfc_print_prio_pfc(const struct ieee_pfc *pfc)
+{
+	__u8 pfc_en[IEEE_8021QAZ_MAX_TCS];
+
+	dcb_pfc_to_array(pfc_en, pfc->pfc_en);
+	dcb_print_named_array("prio_pfc", "prio-pfc",
+			      pfc_en, ARRAY_SIZE(pfc_en), &dcb_print_array_on_off);
+}
+
+static void dcb_pfc_print_requests(const struct ieee_pfc *pfc)
+{
+	open_json_array(PRINT_JSON, "requests");
+	print_string(PRINT_FP, NULL, "requests ", NULL);
+	dcb_print_array_u64(pfc->requests, ARRAY_SIZE(pfc->requests));
+	close_json_array(PRINT_JSON, "requests");
+}
+
+static void dcb_pfc_print_indications(const struct ieee_pfc *pfc)
+{
+	open_json_array(PRINT_JSON, "indications");
+	print_string(PRINT_FP, NULL, "indications ", NULL);
+	dcb_print_array_u64(pfc->indications, ARRAY_SIZE(pfc->indications));
+	close_json_array(PRINT_JSON, "indications");
+}
+
+static void dcb_pfc_print(const struct dcb *dcb, const struct ieee_pfc *pfc)
+{
+	dcb_pfc_print_pfc_cap(pfc);
+	dcb_pfc_print_macsec_bypass(pfc);
+	dcb_pfc_print_delay(pfc);
+	print_nl();
+
+	dcb_pfc_print_prio_pfc(pfc);
+	print_nl();
+
+	if (dcb->stats) {
+		dcb_pfc_print_requests(pfc);
+		print_nl();
+
+		dcb_pfc_print_indications(pfc);
+		print_nl();
+	}
+}
+
+static int dcb_pfc_get(struct dcb *dcb, const char *dev, struct ieee_pfc *pfc)
+{
+	return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc));
+}
+
+static int dcb_pfc_set(struct dcb *dcb, const char *dev, const struct ieee_pfc *pfc)
+{
+	return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc));
+}
+
+static int dcb_cmd_pfc_set(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_pfc pfc;
+	int ret;
+
+	if (!argc) {
+		dcb_pfc_help_set();
+		return 0;
+	}
+
+	ret = dcb_pfc_get(dcb, dev, &pfc);
+	if (ret)
+		return ret;
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_pfc_help_set();
+			return 0;
+		} else if (matches(*argv, "prio-pfc") == 0) {
+			NEXT_ARG();
+			ret = parse_mapping(&argc, &argv, true,
+					    &dcb_pfc_parse_mapping_prio_pfc, &pfc);
+			if (ret) {
+				fprintf(stderr, "Invalid pfc mapping %s\n", *argv);
+				return ret;
+			}
+			continue;
+		} else if (matches(*argv, "macsec-bypass") == 0) {
+			NEXT_ARG();
+			pfc.mbc = parse_on_off("macsec-bypass", *argv, &ret);
+			if (ret)
+				return ret;
+		} else if (matches(*argv, "delay") == 0) {
+			NEXT_ARG();
+			/* Do not support the size notations for delay.
+			 * Delay is specified in "bit times", not bits, so
+			 * it is not applicable. At the same time it would
+			 * be confusing that 10Kbit does not mean 10240,
+			 * but 1280.
+			 */
+			if (get_u16(&pfc.delay, *argv, 0)) {
+				fprintf(stderr, "Invalid delay `%s', expected an integer 0..65535\n",
+					*argv);
+				return -EINVAL;
+			}
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_pfc_help_set();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+	return dcb_pfc_set(dcb, dev, &pfc);
+}
+
+static int dcb_cmd_pfc_show(struct dcb *dcb, const char *dev, int argc, char **argv)
+{
+	struct ieee_pfc pfc;
+	int ret;
+
+	ret = dcb_pfc_get(dcb, dev, &pfc);
+	if (ret)
+		return ret;
+
+	open_json_object(NULL);
+
+	if (!argc) {
+		dcb_pfc_print(dcb, &pfc);
+		goto out;
+	}
+
+	do {
+		if (matches(*argv, "help") == 0) {
+			dcb_pfc_help_show();
+			return 0;
+		} else if (matches(*argv, "prio-pfc") == 0) {
+			dcb_pfc_print_prio_pfc(&pfc);
+			print_nl();
+		} else if (matches(*argv, "pfc-cap") == 0) {
+			dcb_pfc_print_pfc_cap(&pfc);
+			print_nl();
+		} else if (matches(*argv, "macsec-bypass") == 0) {
+			dcb_pfc_print_macsec_bypass(&pfc);
+			print_nl();
+		} else if (matches(*argv, "delay") == 0) {
+			dcb_pfc_print_delay(&pfc);
+			print_nl();
+		} else if (matches(*argv, "requests") == 0) {
+			dcb_pfc_print_requests(&pfc);
+			print_nl();
+		} else if (matches(*argv, "indications") == 0) {
+			dcb_pfc_print_indications(&pfc);
+			print_nl();
+		} else {
+			fprintf(stderr, "What is \"%s\"?\n", *argv);
+			dcb_pfc_help_show();
+			return -EINVAL;
+		}
+
+		NEXT_ARG_FWD();
+	} while (argc > 0);
+
+out:
+	close_json_object();
+	return 0;
+}
+
+int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv)
+{
+	if (!argc || matches(*argv, "help") == 0) {
+		dcb_pfc_help();
+		return 0;
+	} else if (matches(*argv, "show") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_pfc_show, dcb_pfc_help_show);
+	} else if (matches(*argv, "set") == 0) {
+		NEXT_ARG_FWD();
+		return dcb_cmd_parse_dev(dcb, argc, argv,
+					 dcb_cmd_pfc_set, dcb_pfc_help_set);
+	} else {
+		fprintf(stderr, "What is \"%s\"?\n", *argv);
+		dcb_pfc_help();
+		return -EINVAL;
+	}
+}
--- a/devlink/Makefile
+++ b/devlink/Makefile
@ -7,12 +7,13 @@ ifeq ($(HAVE_MNL),y)

 DEVLINKOBJ = devlink.o mnlg.o
 TARGETS += devlink
+LDLIBS += -lm

 endif

 all: $(TARGETS) $(LIBS)

-devlink: $(DEVLINKOBJ)
+devlink: $(DEVLINKOBJ) $(LIBNETLINK)
 	$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@

 install: all
--- a/devlink/devlink.c
+++ b/devlink/devlink.c
--- a/devlink/mnlg.c
+++ b/devlink/mnlg.c
@ -14,11 +14,11 @@
 #include <string.h>
 #include <errno.h>
 #include <unistd.h>
-#include <time.h>
 #include <libmnl/libmnl.h>
 #include <linux/genetlink.h>

 #include "libnetlink.h"
+#include "mnl_utils.h"
 #include "utils.h"
 #include "mnlg.h"

@ -28,90 +28,13 @@ struct mnlg_socket {
 	uint32_t id;
 	uint8_t version;
 	unsigned int seq;
-	unsigned int portid;
 };

-static struct nlmsghdr *__mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd,
-					   uint16_t flags, uint32_t id,
-					   uint8_t version)
-{
-	struct nlmsghdr *nlh;
-	struct genlmsghdr *genl;
-
-	nlh = mnl_nlmsg_put_header(nlg->buf);
-	nlh->nlmsg_type	= id;
-	nlh->nlmsg_flags = flags;
-	nlg->seq = time(NULL);
-	nlh->nlmsg_seq = nlg->seq;
-
-	genl = mnl_nlmsg_put_extra_header(nlh, sizeof(struct genlmsghdr));
-	genl->cmd = cmd;
-	genl->version = version;
-
-	return nlh;
-}
-
-struct nlmsghdr *mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd,
-				  uint16_t flags)
-{
-	return __mnlg_msg_prepare(nlg, cmd, flags, nlg->id, nlg->version);
-}
-
-int mnlg_socket_send(struct mnlg_socket *nlg, const struct nlmsghdr *nlh)
+int mnlg_socket_send(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh)
 {
 	return mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len);
 }

-static int mnlg_cb_noop(const struct nlmsghdr *nlh, void *data)
-{
-	return MNL_CB_OK;
-}
-
-static int mnlg_cb_error(const struct nlmsghdr *nlh, void *data)
-{
-	const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh);
-
-	/* Netlink subsystems returns the errno value with different signess */
-	if (err->error < 0)
-		errno = -err->error;
-	else
-		errno = err->error;
-
-	if (nl_dump_ext_ack(nlh, NULL))
-		return MNL_CB_ERROR;
-
-	return err->error == 0 ? MNL_CB_STOP : MNL_CB_ERROR;
-}
-
-static int mnlg_cb_stop(const struct nlmsghdr *nlh, void *data)
-{
-	return MNL_CB_STOP;
-}
-
-static mnl_cb_t mnlg_cb_array[NLMSG_MIN_TYPE] = {
-	[NLMSG_NOOP]	= mnlg_cb_noop,
-	[NLMSG_ERROR]	= mnlg_cb_error,
-	[NLMSG_DONE]	= mnlg_cb_stop,
-	[NLMSG_OVERRUN]	= mnlg_cb_noop,
-};
-
-int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data)
-{
-	int err;
-
-	do {
-		err = mnl_socket_recvfrom(nlg->nl, nlg->buf,
-					  MNL_SOCKET_BUFFER_SIZE);
-		if (err <= 0)
-			break;
-		err = mnl_cb_run2(nlg->buf, err, nlg->seq, nlg->portid,
-				  data_cb, data, mnlg_cb_array,
-				  ARRAY_SIZE(mnlg_cb_array));
-	} while (err > 0);
-
-	return err;
-}
-
 struct group_info {
 	bool found;
 	uint32_t id;
@ -191,15 +114,17 @@ static int get_group_id_cb(const struct nlmsghdr *nlh, void *data)
 	return MNL_CB_OK;
 }

-int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name)
+int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name)
 {
 	struct nlmsghdr *nlh;
 	struct group_info group_info;
 	int err;

-	nlh = __mnlg_msg_prepare(nlg, CTRL_CMD_GETFAMILY,
-				 NLM_F_REQUEST | NLM_F_ACK, GENL_ID_CTRL, 1);
-	mnl_attr_put_u16(nlh, CTRL_ATTR_FAMILY_ID, nlg->id);
+	nlh = _mnlu_gen_socket_cmd_prepare(nlg, CTRL_CMD_GETFAMILY,
+					   NLM_F_REQUEST | NLM_F_ACK,
+					   GENL_ID_CTRL, 1);
+
+	mnl_attr_put_u16(nlh, CTRL_ATTR_FAMILY_ID, nlg->family);

 	err = mnlg_socket_send(nlg, nlh);
 	if (err < 0)
@ -207,7 +132,7 @@ int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name)

 	group_info.found = false;
 	group_info.name = group_name;
-	err = mnlg_socket_recv_run(nlg, get_group_id_cb, &group_info);
+	err = mnlu_gen_socket_recv_run(nlg, get_group_id_cb, &group_info);
 	if (err < 0)
 		return err;

@ -224,92 +149,7 @@ int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name)
 	return 0;
 }

-static int get_family_id_attr_cb(const struct nlattr *attr, void *data)
+int mnlg_socket_get_fd(struct mnlu_gen_socket *nlg)
 {
-	const struct nlattr **tb = data;
-	int type = mnl_attr_get_type(attr);
-
-	if (mnl_attr_type_valid(attr, CTRL_ATTR_MAX) < 0)
-		return MNL_CB_ERROR;
-
-	if (type == CTRL_ATTR_FAMILY_ID &&
-	    mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
-		return MNL_CB_ERROR;
-	tb[type] = attr;
-	return MNL_CB_OK;
-}
-
-static int get_family_id_cb(const struct nlmsghdr *nlh, void *data)
-{
-	uint32_t *p_id = data;
-	struct nlattr *tb[CTRL_ATTR_MAX + 1] = {};
-	struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
-
-	mnl_attr_parse(nlh, sizeof(*genl), get_family_id_attr_cb, tb);
-	if (!tb[CTRL_ATTR_FAMILY_ID])
-		return MNL_CB_ERROR;
-	*p_id = mnl_attr_get_u16(tb[CTRL_ATTR_FAMILY_ID]);
-	return MNL_CB_OK;
-}
-
-struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version)
-{
-	struct mnlg_socket *nlg;
-	struct nlmsghdr *nlh;
-	int one = 1;
-	int err;
-
-	nlg = malloc(sizeof(*nlg));
-	if (!nlg)
-		return NULL;
-
-	nlg->buf = malloc(MNL_SOCKET_BUFFER_SIZE);
-	if (!nlg->buf)
-		goto err_buf_alloc;
-
-	nlg->nl = mnl_socket_open(NETLINK_GENERIC);
-	if (!nlg->nl)
-		goto err_mnl_socket_open;
-
-	/* Older kernels may no support capped/extended ACK reporting */
-	mnl_socket_setsockopt(nlg->nl, NETLINK_CAP_ACK, &one, sizeof(one));
-	mnl_socket_setsockopt(nlg->nl, NETLINK_EXT_ACK, &one, sizeof(one));
-
-	err = mnl_socket_bind(nlg->nl, 0, MNL_SOCKET_AUTOPID);
-	if (err < 0)
-		goto err_mnl_socket_bind;
-
-	nlg->portid = mnl_socket_get_portid(nlg->nl);
-
-	nlh = __mnlg_msg_prepare(nlg, CTRL_CMD_GETFAMILY,
-				 NLM_F_REQUEST | NLM_F_ACK, GENL_ID_CTRL, 1);
-	mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name);
-
-	err = mnlg_socket_send(nlg, nlh);
-	if (err < 0)
-		goto err_mnlg_socket_send;
-
-	err = mnlg_socket_recv_run(nlg, get_family_id_cb, &nlg->id);
-	if (err < 0)
-		goto err_mnlg_socket_recv_run;
-
-	nlg->version = version;
-	return nlg;
-
-err_mnlg_socket_recv_run:
-err_mnlg_socket_send:
-err_mnl_socket_bind:
-	mnl_socket_close(nlg->nl);
-err_mnl_socket_open:
-	free(nlg->buf);
-err_buf_alloc:
-	free(nlg);
-	return NULL;
-}
-
-void mnlg_socket_close(struct mnlg_socket *nlg)
-{
-	mnl_socket_close(nlg->nl);
-	free(nlg->buf);
-	free(nlg);
+	return mnl_socket_get_fd(nlg->nl);
 }
--- a/devlink/mnlg.h
+++ b/devlink/mnlg.h
@ -14,14 +14,10 @@

 #include <libmnl/libmnl.h>

-struct mnlg_socket;
+struct mnlu_gen_socket;

-struct nlmsghdr *mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd,
-				  uint16_t flags);
-int mnlg_socket_send(struct mnlg_socket *nlg, const struct nlmsghdr *nlh);
-int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data);
-int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name);
-struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version);
-void mnlg_socket_close(struct mnlg_socket *nlg);
+int mnlg_socket_send(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh);
+int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name);
+int mnlg_socket_get_fd(struct mnlu_gen_socket *nlg);

 #endif /* _MNLG_H_ */
--- a/doc/actions/actions-general
+++ b/doc/actions/actions-general
@ -6,8 +6,8 @@ What is it?
 -----------

 An extension to the filtering/classification architecture of Linux Traffic
-Control. 
-Up to 2.6.8 the only action that could be "attached" to a filter was policing. 
+Control.
+Up to 2.6.8 the only action that could be "attached" to a filter was policing.
 i.e you could say something like:

 -----
@ -17,7 +17,7 @@ tc filter add dev lo parent ffff: protocol ip prio 10 u32 match ip src \

 which implies "if a packet is seen on the ingress of the lo device with
 a source IP address of 127.0.0.1/32 we give it a classification id  of 1:1 and
-we execute a policing action which rate limits its bandwidth utilization 
+we execute a policing action which rate limits its bandwidth utilization
 to 1.5Mbps".

 The new extensions allow for more than just policing actions to be added.
@ -29,9 +29,9 @@ syntax which will work fine. Of course to get the required effect you need
 both newer tc and kernel. If you are reading this you have the
 right tc ;->

-A side effect is that we can now get stateless firewalling to work with tc. 
+A side effect is that we can now get stateless firewalling to work with tc.
 Essentially this is now an alternative to iptables.
-I won't go into details of my dislike for iptables at times, but 
+I won't go into details of my dislike for iptables at times, but
 scalability is one of the main issues; however, if you need stateful
 classification - use netfilter (for now).

@ -61,7 +61,7 @@ tc filter add dev lo parent 1:0 protocol ip prio 10 u32 \
 match ip src 127.0.0.1/32 flowid 1:1 \
 action police mtu 4000 rate 1500kbit burst 90k

-" generic Actions" (gact) at the moment are: 
+" generic Actions" (gact) at the moment are:
 { drop, pass, reclassify, continue}
 (If you have others, no listed here give me a reason and we will add them)
 +drop says to drop the packet
@ -93,43 +93,43 @@ decimal 12, then use flowid 1:c.

 3) A feature i call pipe
 The motivation is derived from Unix pipe mechanism but applied to packets.
-Essentially take a matching packet and pass it through 
+Essentially take a matching packet and pass it through
 action1 | action2 | action3 etc.
 You could do something similar to this with the tc policer and the "continue"
-operator but this rather restricts it to just the policer and requires 
-multiple rules (and lookups, hence quiet inefficient); 
+operator but this rather restricts it to just the policer and requires
+multiple rules (and lookups, hence quiet inefficient);

-as an example -- and please note that this is just an example _not_ The 
+as an example -- and please note that this is just an example _not_ The
 Word Youve Been Waiting For (yes i have had problems giving examples
 which ended becoming dogma in documents and people modifying them a little
-to look clever); 
+to look clever);

-i selected the metering rates to be small so that i can show better how 
+i selected the metering rates to be small so that i can show better how
 things work.
- 
-The script below does the following: 
- an incoming packet from 10.0.0.21 is first given a firewall mark of 1. 

- It is then metered to make sure it does not exceed its allocated rate of 
+The script below does the following:
+- an incoming packet from 10.0.0.21 is first given a firewall mark of 1.
+
+- It is then metered to make sure it does not exceed its allocated rate of
 1Kbps. If it doesn't exceed rate, this is where we terminate action execution.

- If it does exceed its rate, its "color" changes to a mark of 2 and it is 
+- If it does exceed its rate, its "color" changes to a mark of 2 and it is
 then passed through a second meter.

-The second meter is shared across all flows on that device [i am surpised 
-that this seems to be not a well know feature of the policer; Bert was telling 
+-The second meter is shared across all flows on that device [i am surpised
+that this seems to be not a well know feature of the policer; Bert was telling
 me that someone was writing a qdisc just to do sharing across multiple devices;
 it must be the summer heat again; weve had someone doing that every year around
-summer  -- the key to sharing is to use a operator "index" in your policer 
-rules (example "index 20"). All your rules have to use the same index to 
+summer  -- the key to sharing is to use a operator "index" in your policer
+rules (example "index 20"). All your rules have to use the same index to
 share.]
- 
+
 -If the second meter is exceeded the color of the flow changes further to 3.

 -We then pass the packet to another meter which is shared across all devices
 in the system. If this meter is exceeded we drop the packet.

-Note the mark can be used further up the system to do things like policy 
+Note the mark can be used further up the system to do things like policy
 or more interesting things on the egress.

 ------------------ cut here -------------------------------
@ -161,31 +161,31 @@ action ipt -j mark --set-mark 3 \
 # and then attempt to borrow from a meter used by all devices in the
 # system. Should this be exceeded, drop the packet on the floor.
 action police index 20 mtu 5000 rate 1kbit burst 90k drop
--------------------------------- 
+---------------------------------

-Now lets see the actions installed with 
+Now lets see the actions installed with
 "tc filter show parent ffff: dev eth0"

 -------- output -----------
 jroot# tc filter show parent ffff: dev eth0
-filter protocol ip pref 1 u32 
-filter protocol ip pref 1 u32 fh 800: ht divisor 1 
-filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15 
+filter protocol ip pref 1 u32
+filter protocol ip pref 1 u32 fh 800: ht divisor 1
+filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15

-   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x1  index 2

-   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb 
+   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb

-   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x2  index 1

-   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b 
+   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b

-   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x3  index 3

-   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b 
+   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b

  match 0a000015/ffffffff at 12
 -------------------------------
@ -209,31 +209,31 @@ Now lets take a look at the stats with "tc -s filter show parent ffff: dev eth0"

 --------------
 jroot# tc -s filter show parent ffff: dev eth0
-filter protocol ip pref 1 u32 
-filter protocol ip pref 1 u32 fh 800: ht divisor 1 
+filter protocol ip pref 1 u32
+filter protocol ip pref 1 u32 fh 800: ht divisor 1
 filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
-5 
+5

-   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x1  index 2
-         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0) 
+         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0)

-   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb 
-         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122) 
+   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
+         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122)

-   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x2  index 1
-         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0) 
+         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0)

-   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b 
-         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945) 
+   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
+         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945)

-   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING
        target MARK set 0x3  index 3
-         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0) 
+         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0)

-   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b 
-         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437) 
+   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
+         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437)

  match 0a000015/ffffffff at 12
 -------------------------------
@ -254,4 +254,3 @@ At the moment the focus has been on getting the architecture in place.
 Expect new things in the spurious time i have to work on this
 (particularly around end of year when i have typically get time off
 from work).
-
--- a/doc/actions/gact-usage
+++ b/doc/actions/gact-usage
@ -1,16 +1,16 @@

 gact <ACTION> [RAND] [INDEX]

-Where: 
-	ACTION := reclassify | drop | continue | pass | ok 
+Where:
+	ACTION := reclassify | drop | continue | pass | ok
 	RAND := random <RANDTYPE> <ACTION> <VAL>
 	RANDTYPE := netrand | determ
        VAL : = value not exceeding 10000
        INDEX := index value used
-      
+
 ACTION semantics
 - pass and ok are equivalent to accept
- continue allows to restart classification lookup
+- continue allows one to restart classification lookup
 - drop drops packets
 - reclassify implies continue classification where we left off

@ -42,14 +42,14 @@ filter u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16  (rule hit 32 suc
         random type none pass val 0
         index 1 ref 1 bind 1 installed 59 sec used 35 sec
         Sent 1680 bytes 20 pkts (dropped 20, overlimits 0 )
- 
+
 ----

 # example 2
 #allow 1 out 10 randomly using the netrand generator
 tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
 10.0.0.9/32 flowid 1:16 action drop random netrand ok 10
- 
+
 ping -c 20 10.0.0.9

 ----
@ -59,14 +59,14 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
         random type netrand pass val 10
         index 5 ref 1 bind 1 installed 49 sec used 25 sec
         Sent 1680 bytes 20 pkts (dropped 16, overlimits 0 )
-                                                                                
+
 --------
 #alternative: deterministically accept every second packet
 tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
 10.0.0.9/32 flowid 1:16 action drop random determ ok 2
-                                                                                
+
 ping -c 20 10.0.0.9
-                                                                                
+
 tc -s filter show parent ffff: dev eth0
 -----
 filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1filter protocol ip pref 6 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16  (rule hit 20 success 20)
@ -76,4 +76,3 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
         index 4 ref 1 bind 1 installed 118 sec used 82 sec
         Sent 1680 bytes 20 pkts (dropped 10, overlimits 0 )
 -----
-
--- a/doc/actions/ifb-README
+++ b/doc/actions/ifb-README
@ -6,18 +6,18 @@ with a _lot_ less code.
 Known IMQ/IFB USES
 ------------------

-As far as i know the reasons listed below is why people use IMQ. 
+As far as i know the reasons listed below is why people use IMQ.
 It would be nice to know of anything else that i missed.

 1) qdiscs/policies that are per device as opposed to system wide.
 IFB allows for sharing.

 2) Allows for queueing incoming traffic for shaping instead of
-dropping. I am not aware of any study that shows policing is 
+dropping. I am not aware of any study that shows policing is
 worse than shaping in achieving the end goal of rate control.
 I would be interested if anyone is experimenting.

-3) Very interesting use: if you are serving p2p you may want to give 
+3) Very interesting use: if you are serving p2p you may want to give
 preference to your own locally originated traffic (when responses come back)
 vs someone using your system to do bittorent. So QoSing based on state
 comes in as the solution. What people did to achieve this was stick
@ -25,17 +25,17 @@ the IMQ somewhere prelocal hook.
 I think this is a pretty neat feature to have in Linux in general.
 (i.e not just for IMQ).
 But i won't go back to putting netfilter hooks in the device to satisfy
-this.  I also don't think its worth it hacking ifb some more to be 
+this.  I also don't think its worth it hacking ifb some more to be
 aware of say L3 info and play ip rule tricks to achieve this.
 --> Instead the plan is to have a conntrack related action. This action will
-selectively either query/create conntrack state on incoming packets. 
-Packets could then be redirected to ifb based on what happens -> eg 
-on incoming packets; if we find they are of known state we could send to 
+selectively either query/create conntrack state on incoming packets.
+Packets could then be redirected to ifb based on what happens -> eg
+on incoming packets; if we find they are of known state we could send to
 a different queue than one which didn't have existing state. This
 all however is dependent on whatever rules the admin enters.

 At the moment this 3rd function does not exist yet. I have decided that
-instead of sitting on the patch for another year, to release it and then 
+instead of sitting on the patch for another year, to release it and then
 if there is pressure i will add this feature.

 An example, to provide functionality that most people use IMQ for below:
@ -43,10 +43,10 @@ An example, to provide functionality that most people use IMQ for below:
 --------
 export TC="/sbin/tc"

-$TC qdisc add dev ifb0 root handle 1: prio 
+$TC qdisc add dev ifb0 root handle 1: prio
 $TC qdisc add dev ifb0 parent 1:1 handle 10: sfq
 $TC qdisc add dev ifb0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000
-$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq                                
+$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq
 $TC filter add dev ifb0 protocol ip pref 1 parent 1: handle 1 fw classid 1:1
 $TC filter add dev ifb0 protocol ip pref 2 parent 1: handle 2 fw classid 1:2

@ -54,7 +54,7 @@ ifconfig ifb0 up

 $TC qdisc add dev eth0 ingress

-# redirect all IP packets arriving in eth0 to ifb0 
+# redirect all IP packets arriving in eth0 to ifb0
 # use mark 1 --> puts them onto class 1:1
 $TC filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
 match u32 0 0 flowid 1:1 \
@ -77,44 +77,44 @@ PING 10.22 (10.0.0.22): 56 data bytes
 --- 10.22 ping statistics ---
 3 packets transmitted, 3 packets received, 0% packet loss
 round-trip min/avg/max = 0.6/1.3/2.8 ms
-[root@jzny action-tests]# 
+[root@jzny action-tests]#
 -----
 Now look at some stats:

 ---
 [root@jmandrake]:~# $TC -s filter show parent ffff: dev eth0
-filter protocol ip pref 10 u32 
-filter protocol ip pref 10 u32 fh 800: ht divisor 1 
-filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 
+filter protocol ip pref 10 u32
+filter protocol ip pref 10 u32 fh 800: ht divisor 1
+filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
  match 00000000/00000000 at 0
-        action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING 
-        target MARK set 0x1  
-        index 1 ref 1 bind 1 installed 4195sec  used 27sec 
-         Sent 252 bytes 3 pkts (dropped 0, overlimits 0) 
+        action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING
+        target MARK set 0x1
+        index 1 ref 1 bind 1 installed 4195sec  used 27sec
+         Sent 252 bytes 3 pkts (dropped 0, overlimits 0)

        action order 2: mirred (Egress Redirect to device ifb0) stolen
        index 1 ref 1 bind 1 installed 165 sec used 27 sec
-         Sent 252 bytes 3 pkts (dropped 0, overlimits 0) 
+         Sent 252 bytes 3 pkts (dropped 0, overlimits 0)

 [root@jmandrake]:~# $TC -s qdisc
-qdisc sfq 30: dev ifb0 limit 128p quantum 1514b 
- Sent 0 bytes 0 pkts (dropped 0, overlimits 0) 
-qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s 
- Sent 210 bytes 3 pkts (dropped 0, overlimits 0) 
-qdisc sfq 10: dev ifb0 limit 128p quantum 1514b 
- Sent 294 bytes 3 pkts (dropped 0, overlimits 0) 
+qdisc sfq 30: dev ifb0 limit 128p quantum 1514b
+ Sent 0 bytes 0 pkts (dropped 0, overlimits 0)
+qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s
+ Sent 210 bytes 3 pkts (dropped 0, overlimits 0)
+qdisc sfq 10: dev ifb0 limit 128p quantum 1514b
+ Sent 294 bytes 3 pkts (dropped 0, overlimits 0)
 qdisc prio 1: dev ifb0 bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
- Sent 504 bytes 6 pkts (dropped 0, overlimits 0) 
-qdisc ingress ffff: dev eth0 ---------------- 
- Sent 308 bytes 5 pkts (dropped 0, overlimits 0) 
+ Sent 504 bytes 6 pkts (dropped 0, overlimits 0)
+qdisc ingress ffff: dev eth0 ----------------
+ Sent 308 bytes 5 pkts (dropped 0, overlimits 0)

 [root@jmandrake]:~# ifconfig ifb0
-ifb0    Link encap:Ethernet  HWaddr 00:00:00:00:00:00  
+ifb0    Link encap:Ethernet  HWaddr 00:00:00:00:00:00
          inet6 addr: fe80::200:ff:fe00:0/64 Scope:Link
          UP BROADCAST RUNNING NOARP  MTU:1500  Metric:1
          RX packets:6 errors:0 dropped:3 overruns:0 frame:0
          TX packets:3 errors:0 dropped:0 overruns:0 carrier:0
-          collisions:0 txqueuelen:32 
+          collisions:0 txqueuelen:32
          RX bytes:504 (504.0 b)  TX bytes:252 (252.0 b)
 -----

--- a/doc/actions/mirred-usage
+++ b/doc/actions/mirred-usage
@ -7,10 +7,10 @@ flow to be mirrored. High end switches typically can select based
 on more than just a port (eg a 5 tuple classifier). They may also be
 capable of redirecting.

-Usage: 
+Usage:

-mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME> 
-where: 
+mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME>
+where:
 DIRECTION := <ingress | egress>
 ACTION := <mirror | redirect>
 INDEX is the specific policy instance id
@ -18,7 +18,7 @@ DEVICENAME is the devicename

 Direction:
 - Ingress is not supported at the moment. It will be in the
-future as well as mirror/redirecting to a socket. 
+future as well as mirror/redirecting to a socket.

 Action:
 - Mirror takes a copy of the packet and sends it to specified
@ -29,14 +29,14 @@ steals the packet and redirects to specified destination dev.
 What NOT to do if you don't want your machine to crash:
 ------------------------------------------------------

-Do not create loops! 
+Do not create loops!
 Loops are not hard to create in the egress qdiscs.

 Here are simple rules to follow if you don't want to get
 hurt:
 A) Do not have the same packet go to same netdevice twice
 in a single graph of policies. Your machine will just hang!
-This is design intent _not a bug_ to teach you some lessons. 
+This is design intent _not a bug_ to teach you some lessons.

 In the future if there are easy ways to do this in the kernel
 without affecting other packets not interested in this feature
@ -51,7 +51,7 @@ B) Do not redirect from one IFB device to another.
 Remember that IFB is a very specialized case of packet redirecting
 device. Instead of redirecting it puts packets at the exact spot
 on the stack it found them from.
-Redirecting from ifbX->ifbY will actually not crash your machine but your 
+Redirecting from ifbX->ifbY will actually not crash your machine but your
 packets will all be dropped (this is much simpler to detect
 and resolve and is only affecting users of ifb as opposed to the
 whole stack).
@ -64,7 +64,7 @@ Some examples:

 1) Mirror all packets arriving on eth0 to be sent out on eth1.
 You may have a sniffer or some accounting box hooked up on eth1.
- 
+
 ---
 tc qdisc add dev eth0 ingress
 tc filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
@ -100,7 +100,7 @@ stack (i.e ping would work).
 3) Even more funky example:

 #
-#allow 1 out 10 packets on ingress of lo to randomly make it to the 
+#allow 1 out 10 packets on ingress of lo to randomly make it to the
 # host A (Randomness uses the netrand generator)
 #
 ---
@ -111,9 +111,9 @@ action mirred egress mirror dev eth0
 ---

 4)
-# for packets from 10.0.0.9 going out on eth0 (could be local 
-# IP or something # we are forwarding) - 
-# if exceeding a 100Kbps rate, then redirect to eth1 
+# for packets from 10.0.0.9 going out on eth0 (could be local
+# IP or something # we are forwarding) -
+# if exceeding a 100Kbps rate, then redirect to eth1
 #

 ---
@ -158,7 +158,7 @@ Essentially a good debugging/logging interface (sort of like
 BSDs speacialized log device does without needing one).

 If you replace mirror with redirect, those packets will be
-blackholed and will never make it out. 
+blackholed and will never make it out.

 cheers,
 jamal
--- a/etc/iproute2/rt_protos
+++ b/etc/iproute2/rt_protos
@ -14,8 +14,10 @@
 13	dnrouted
 14	xorp
 15	ntk
-16      dhcp
+16	dhcp
+18	keepalived
 42	babel
+99	openr
 186	bgp
 187	isis
 188	ospf
--- a/examples/README.cbq
+++ b/examples/README.cbq
@ -1,122 +0,0 @@
-# CHANGES
-# -------
-# v0.3a2- fixed bug in "if" operator. Thanks kad@dgtu.donetsk.ua.
-# v0.3a-  added TIME parameter. Example:
-#         TIME=00:00-19:00;64Kbit/6Kbit
-#         So, between 00:00 and 19:00 RATE will be 64Kbit.
-#         Just start "cbq.init timecheck" periodically from cron (every 10
-#         minutes for example).
-#         !!! Anyway you MUST start "cbq.init start" for CBQ initialize.
-# v0.2 -  Some cosmetique changes. Now it more compatible with
-#         old bash version. Thanks to Stanislav V. Voronyi
-#         <stas@cnti.uanet.kharkov.ua>.
-# v0.1 -  First public release
-# 
-# README
-# ------
-# 
-# First of all - this is just a SIMPLE EXAMPLE of CBQ power.
-# Don't ask me "why" and "how" :)
-# 
-# This is an example of using CBQ (Class Based Queueing) and policy-based
-# filter for building smart ethernet shapers. All CBQ parameters are
-# correct only for ETHERNET (eth0,1,2..) linux interfaces. It works for
-# ARCNET too (just set bandwidth parameter to 2Mbit). It was tested
-# on 2.1.125-2.1.129 linux kernels (KSI linux, Nostromo version) and 
-# ip-route utility by A.Kuznetsov (iproute2-ss981101 version). 
-# You can download ip-route from ftp://ftp.inr.ac.ru/ip-routing or
-# get iproute2*.rpm (compiled with glibc) from ftp.ksi-linux.com.
-# 
-# 
-# HOW IT WORKS
-# 
-# Each shaper must be described by config file in $CBQ_PATH
-# (/etc/sysconfig/cbq/) directory - one config file for each CBQ shaper.
-# 
-# Some words about config file name:
-# Each shaper has its personal ID - two byte HEX number. Really ID is 
-# CBQ class.
-# So, filename looks like:
-# 
-# cbq-1280.My_first_shaper
-# ^^^ ^^^  ^^^^^^^^^^^^^
-#  |  |            |______ Shaper name - any word
-#  |  |___________________ ID (0000-FFFF), let ID looks like shaper's rate
-#  |______________________ Filename must begin from "cbq-" 
-# 
-# 
-# Config file describes shaper parameters and source[destination] 
-# address[port].
-# For example let's prepare /etc/sysconfig/cbq/cbq-1280.My_first_shaper:
-# 
-# ----------8<---------------------
-# DEVICE=eth0,10Mbit,1Mbit
-# RATE=128Kbit
-# WEIGHT=10Kbit
-# PRIO=5
-# RULE=192.168.1.0/24
-# ----------8<---------------------
-# 
-# This is minimal configuration, where:
-# DEVICE:  eth0   - device where we do control our traffic
-#          10Mbit - REAL ethernet card bandwidth
-#          1Mbit  - "weight" of :1 class (parent for all shapers for eth0),
-#                   as a rule of thumb weight=batdwidth/10.
-#          100Mbit adapter's example: DEVICE=eth0,100Mbit,10Mbit
-#          *** If you want to build more than one shaper per device it's
-#              enough to describe bandwidth and weight once  - cbq.init
-#              is smart :) You can put only 'DEVICE=eth0' into cbq-* 
-#              config file for eth0.
-# 
-# RATE:    Shaper's speed - Kbit,Mbit or bps (bytes per second)
-# 
-# WEIGHT:  "weight" of shaper (CBQ class). Like for DEVICE - approx. RATE/10
-# 
-# PRIO:    shaper's priority from 1 to 8 where 1 is the highest one.
-#          I do always use "5" for all my shapers.
-# 
-# RULE:    [source addr][:source port],[dest addr][:dest port]
-#          Some examples:
-# RULE=10.1.1.0/24:80         - all traffic for network 10.1.1.0 to port 80
-#                               will be shaped.
-# RULE=10.2.2.5               - shaper works only for IP address 10.2.2.5   
-# RULE=:25,10.2.2.128/25:5000 - all traffic from any address and port 25 to
-#                               address 10.2.2.128 - 10.2.2.255 and port 5000
-#                               will be shaped.
-# RULE=10.5.5.5:80,           - shaper active only for traffic from port 80 of
-#                               address 10.5.5.5
-# Multiple RULE fields per one config file are allowed. For example:
-# RULE=10.1.1.2:80
-# RULE=10.1.1.2:25
-# RULE=10.1.1.2:110
-# 
-# *** ATTENTION!!!
-# All shapers do work only for outgoing traffic!
-# So, if you want to build bidirectional shaper you must set it up for
-# both ethernet card. For example let's build shaper for our linux box like:
-# 
-#                     ---------             192.168.1.1
-# BACKBONE -----eth0-|  linux  |-eth1------*[our client]
-#                     ---------
-# 
-# Let all traffic from backbone to client will be shaped at 28Kbit and
-# traffic from client to backbone - at 128Kbit. We need two config files:
-# 
-# ---8<-----/etc/sysconfig/cbq/cbq-28.client-out----
-# DEVICE=eth1,10Mbit,1Mbit
-# RATE=28Kbit
-# WEIGHT=2Kbit
-# PRIO=5
-# RULE=192.168.1.1
-# ---8<---------------------------------------------
-# 
-# ---8<-----/etc/sysconfig/cbq/cbq-128.client-in----
-# DEVICE=eth0,10Mbit,1Mbit
-# RATE=128Kbit
-# WEIGHT=10Kbit
-# PRIO=5
-# RULE=192.168.1.1,
-# ---8<---------------------------------------------
-#                 ^pay attention to "," - this is source address!
-# 
-# Enjoy.
--- a/examples/SYN-DoS.rate.limit
+++ b/examples/SYN-DoS.rate.limit
@ -1,49 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# this script shows how one can rate limit incoming SYNs
-# Useful for TCP-SYN attack protection. You can use
-# IPchains to have more powerful additions to the SYN (eg 
-# in addition the subnet)
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-#
-# tag all incoming SYN packets through $INDEV as mark value 1
-############################################################ 
-$IPCHAINS -A input -i $INDEV -y -m 1
-############################################################ 
-#
-# install the ingress qdisc on the ingress interface
-############################################################ 
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-
-#
-# 
-# SYN packets are 40 bytes (320 bits) so three SYNs equals
-# 960 bits (approximately 1kbit); so we rate limit below
-# the incoming SYNs to 3/sec (not very sueful really; but
-#serves to show the point - JHS
-############################################################ 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 1 fw \
-police rate 1kbit burst 40 mtu 9k drop flowid :1
-############################################################ 
-
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/bpf/README
+++ b/examples/bpf/README
@ -1,8 +1,18 @@
 eBPF toy code examples (running in kernel) to familiarize yourself
 with syntax and features:

- - bpf_shared.c		-> Ingress/egress map sharing example
- - bpf_tailcall.c	-> Using tail call chains
- - bpf_cyclic.c		-> Simple cycle as tail calls
+- BTF defined map examples
 - bpf_graft.c		-> Demo on altering runtime behaviour
- - bpf_map_in_map.c     -> Using map in map example
+ - bpf_shared.c 	-> Ingress/egress map sharing example
+ - bpf_map_in_map.c	-> Using map in map example
+
+- legacy struct bpf_elf_map defined map examples
+ - legacy/bpf_shared.c		-> Ingress/egress map sharing example
+ - legacy/bpf_tailcall.c	-> Using tail call chains
+ - legacy/bpf_cyclic.c		-> Simple cycle as tail calls
+ - legacy/bpf_graft.c		-> Demo on altering runtime behaviour
+ - legacy/bpf_map_in_map.c	-> Using map in map example
+
+Note: Users should use new BTF way to defined the maps, the examples
+in legacy folder which is using struct bpf_elf_map defined maps is not
+recommanded.
--- a/examples/bpf/bpf_graft.c
+++ b/examples/bpf/bpf_graft.c
@ -33,13 +33,13 @@
 *   [...]
 */

-struct bpf_elf_map __section_maps jmp_tc = {
-	.type		= BPF_MAP_TYPE_PROG_ARRAY,
-	.size_key	= sizeof(uint32_t),
-	.size_value	= sizeof(uint32_t),
-	.pinning	= PIN_GLOBAL_NS,
-	.max_elem	= 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint32_t));
+	__uint(max_entries, 1);
+	__uint(pinning, LIBBPF_PIN_BY_NAME);
+} jmp_tc __section(".maps");

 __section("aaa")
 int cls_aaa(struct __sk_buff *skb)
--- a/examples/bpf/bpf_map_in_map.c
+++ b/examples/bpf/bpf_map_in_map.c
@ -1,24 +1,23 @@
 #include "../../include/bpf_api.h"

-#define MAP_INNER_ID	42
+struct inner_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint32_t));
+	__uint(max_entries, 1);
+} map_inner __section(".maps");

-struct bpf_elf_map __section_maps map_inner = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.size_key	= sizeof(uint32_t),
-	.size_value	= sizeof(uint32_t),
-	.id		= MAP_INNER_ID,
-	.inner_idx	= 0,
-	.pinning	= PIN_GLOBAL_NS,
-	.max_elem	= 1,
-};
-
-struct bpf_elf_map __section_maps map_outer = {
-	.type		= BPF_MAP_TYPE_ARRAY_OF_MAPS,
-	.size_key	= sizeof(uint32_t),
-	.size_value	= sizeof(uint32_t),
-	.inner_id	= MAP_INNER_ID,
-	.pinning	= PIN_GLOBAL_NS,
-	.max_elem	= 1,
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint32_t));
+	__uint(max_entries, 1);
+	__uint(pinning, LIBBPF_PIN_BY_NAME);
+	__array(values, struct inner_map);
+} map_outer __section(".maps") = {
+	.values = {
+		[0] = &map_inner,
+	},
 };

 __section("egress")
--- a/examples/bpf/bpf_shared.c
+++ b/examples/bpf/bpf_shared.c
@ -18,13 +18,13 @@
 * instance is being created.
 */

-struct bpf_elf_map __section_maps map_sh = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.size_key	= sizeof(uint32_t),
-	.size_value	= sizeof(uint32_t),
-	.pinning	= PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */
-	.max_elem	= 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint32_t));
+	__uint(max_entries, 1);
+	__uint(pinning, LIBBPF_PIN_BY_NAME);	/* or LIBBPF_PIN_NONE */
+} map_sh __section(".maps");

 __section("egress")
 int emain(struct __sk_buff *skb)
--- a/examples/bpf/legacy/bpf_cyclic.c
+++ b/examples/bpf/legacy/bpf_cyclic.c
@ -1,4 +1,4 @@
-#include "../../include/bpf_api.h"
+#include "../../../include/bpf_api.h"

 /* Cyclic dependency example to test the kernel's runtime upper
 * bound on loops. Also demonstrates on how to use direct-actions,
--- a/examples/bpf/legacy/bpf_graft.c
+++ b/examples/bpf/legacy/bpf_graft.c
@ -0,0 +1,66 @@
+#include "../../../include/bpf_api.h"
+
+/* This example demonstrates how classifier run-time behaviour
+ * can be altered with tail calls. We start out with an empty
+ * jmp_tc array, then add section aaa to the array slot 0, and
+ * later on atomically replace it with section bbb. Note that
+ * as shown in other examples, the tc loader can prepopulate
+ * tail called sections, here we start out with an empty one
+ * on purpose to show it can also be done this way.
+ *
+ * tc filter add dev foo parent ffff: bpf obj graft.o
+ * tc exec bpf dbg
+ *   [...]
+ *   Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough
+ *   <idle>-0            [001] ..s. 138993.202265: : fallthrough
+ *   Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough
+ *   [...]
+ *
+ * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa
+ * tc exec bpf dbg
+ *   [...]
+ *   Socket Thread-19818 [002] ..s. 139012.053587: : aaa
+ *   <idle>-0            [002] ..s. 139012.172359: : aaa
+ *   Socket Thread-19818 [001] ..s. 139012.173556: : aaa
+ *   [...]
+ *
+ * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb
+ * tc exec bpf dbg
+ *   [...]
+ *   Socket Thread-19818 [002] ..s. 139022.102967: : bbb
+ *   <idle>-0            [002] ..s. 139022.155640: : bbb
+ *   Socket Thread-19818 [001] ..s. 139022.156730: : bbb
+ *   [...]
+ */
+
+struct bpf_elf_map __section_maps jmp_tc = {
+	.type		= BPF_MAP_TYPE_PROG_ARRAY,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+__section("aaa")
+int cls_aaa(struct __sk_buff *skb)
+{
+	printt("aaa\n");
+	return TC_H_MAKE(1, 42);
+}
+
+__section("bbb")
+int cls_bbb(struct __sk_buff *skb)
+{
+	printt("bbb\n");
+	return TC_H_MAKE(1, 43);
+}
+
+__section_cls_entry
+int cls_entry(struct __sk_buff *skb)
+{
+	tail_call(skb, &jmp_tc, 0);
+	printt("fallthrough\n");
+	return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
--- a/examples/bpf/legacy/bpf_map_in_map.c
+++ b/examples/bpf/legacy/bpf_map_in_map.c
@ -0,0 +1,56 @@
+#include "../../../include/bpf_api.h"
+
+#define MAP_INNER_ID	42
+
+struct bpf_elf_map __section_maps map_inner = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.id		= MAP_INNER_ID,
+	.inner_idx	= 0,
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+struct bpf_elf_map __section_maps map_outer = {
+	.type		= BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.inner_id	= MAP_INNER_ID,
+	.pinning	= PIN_GLOBAL_NS,
+	.max_elem	= 1,
+};
+
+__section("egress")
+int emain(struct __sk_buff *skb)
+{
+	struct bpf_elf_map *map_inner;
+	int key = 0, *val;
+
+	map_inner = map_lookup_elem(&map_outer, &key);
+	if (map_inner) {
+		val = map_lookup_elem(map_inner, &key);
+		if (val)
+			lock_xadd(val, 1);
+	}
+
+	return BPF_H_DEFAULT;
+}
+
+__section("ingress")
+int imain(struct __sk_buff *skb)
+{
+	struct bpf_elf_map *map_inner;
+	int key = 0, *val;
+
+	map_inner = map_lookup_elem(&map_outer, &key);
+	if (map_inner) {
+		val = map_lookup_elem(map_inner, &key);
+		if (val)
+			printt("map val: %d\n", *val);
+	}
+
+	return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
--- a/examples/bpf/legacy/bpf_shared.c
+++ b/examples/bpf/legacy/bpf_shared.c
@ -0,0 +1,53 @@
+#include "../../../include/bpf_api.h"
+
+/* Minimal, stand-alone toy map pinning example:
+ *
+ * clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c
+ * tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress
+ * tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress
+ *
+ * Both classifier will share the very same map instance in this example,
+ * so map content can be accessed from ingress *and* egress side!
+ *
+ * This example has a pinning of PIN_OBJECT_NS, so it's private and
+ * thus shared among various program sections within the object.
+ *
+ * A setting of PIN_GLOBAL_NS would place it into a global namespace,
+ * so that it can be shared among different object files. A setting
+ * of PIN_NONE (= 0) means no sharing, so each tc invocation a new map
+ * instance is being created.
+ */
+
+struct bpf_elf_map __section_maps map_sh = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.size_key	= sizeof(uint32_t),
+	.size_value	= sizeof(uint32_t),
+	.pinning	= PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */
+	.max_elem	= 1,
+};
+
+__section("egress")
+int emain(struct __sk_buff *skb)
+{
+	int key = 0, *val;
+
+	val = map_lookup_elem(&map_sh, &key);
+	if (val)
+		lock_xadd(val, 1);
+
+	return BPF_H_DEFAULT;
+}
+
+__section("ingress")
+int imain(struct __sk_buff *skb)
+{
+	int key = 0, *val;
+
+	val = map_lookup_elem(&map_sh, &key);
+	if (val)
+		printt("map val: %d\n", *val);
+
+	return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
--- a/examples/bpf/legacy/bpf_tailcall.c
+++ b/examples/bpf/legacy/bpf_tailcall.c
@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#include "../../include/bpf_api.h"
+#include "../../../include/bpf_api.h"

 #define ENTRY_INIT	3
 #define ENTRY_0		0
--- a/examples/cbqinit.eth1
+++ b/examples/cbqinit.eth1
@ -1,76 +0,0 @@
-#! /bin/sh
-
-TC=/home/root/tc
-IP=/home/root/ip
-DEVICE=eth1
-BANDWIDTH="bandwidth 10Mbit"
-
-# Attach CBQ on $DEVICE. It will have handle 1:.
-#   $BANDWIDTH is real $DEVICE bandwidth (10Mbit).
-#   avpkt is average packet size.
-#   mpu is minimal packet size.
-
-$TC qdisc add dev $DEVICE  root  handle 1:  cbq \
-$BANDWIDTH avpkt 1000 mpu 64
-
-# Create root class with classid 1:1. This step is not necessary.
-#   bandwidth is the same as on CBQ itself.
-#   rate == all the bandwidth
-#   allot is MTU + MAC header
-#   maxburst measure allowed class burstiness (please,read S.Floyd and VJ papers)
-#   est 1sec 8sec means, that kernel will evaluate average rate
-#                 on this class with period 1sec and time constant 8sec.
-#                 This rate is viewed with "tc -s class ls dev $DEVICE"
-
-$TC class add dev $DEVICE parent 1:0 classid :1 est 1sec 8sec cbq \
-$BANDWIDTH rate 10Mbit allot 1514 maxburst 50 avpkt 1000
-
-# Bulk.
-#    New parameters are: 
-#    weight, which is set to be proportional to
-#            "rate". It is not necessary, weight=1 will work as well.
-#    defmap and split say that best effort ttraffic, not classfied
-#            by another means will fall to this class.
-
-$TC class add dev $DEVICE parent 1:1 classid :2 est 1sec 8sec cbq \
-$BANDWIDTH rate 4Mbit allot 1514 weight 500Kbit \
-prio 6 maxburst 50 avpkt 1000 split 1:0 defmap ff3d
-
-# OPTIONAL.
-# Attach "sfq" qdisc to this class, quantum is MTU, perturb
-# gives period of hash function perturbation in seconds.
-#
-$TC qdisc add dev $DEVICE parent 1:2 sfq quantum 1514b perturb 15
-
-# Interactive-burst class
-
-$TC class add dev $DEVICE parent 1:1 classid :3 est 2sec 16sec cbq \
-$BANDWIDTH rate 1Mbit allot 1514 weight 100Kbit \
-prio 2 maxburst 100 avpkt 1000 split 1:0 defmap c0
-
-$TC qdisc add dev $DEVICE parent 1:3 sfq quantum 1514b perturb 15
-
-# Background.
-
-$TC class add dev $DEVICE parent 1:1 classid :4 est 1sec 8sec cbq \
-  $BANDWIDTH rate 100Kbit allot 1514 weight 10Mbit \
-  prio 7 maxburst 10 avpkt 1000 split 1:0 defmap 2
-
-$TC qdisc add dev $DEVICE parent 1:4 sfq quantum 1514b perturb 15
-
-# Realtime class for RSVP
-
-$TC class add dev $DEVICE parent 1:1 classid 1:7FFE cbq \
-rate 5Mbit $BANDWIDTH allot 1514b avpkt 1000 \
-maxburst 20
-
-# Reclassified realtime traffic
-#
-# New element: split is not 1:0, but 1:7FFE. It means,
-#     that only real-time packets, which violated policing filters
-#     or exceeded reshaping buffers will fall to it.
-
-$TC class add dev $DEVICE parent 1:7FFE classid 1:7FFF  est 4sec 32sec cbq \
-rate 1Mbit $BANDWIDTH allot 1514b avpkt 1000 weight 10Kbit \
-prio 6 maxburst 10 split 1:7FFE defmap ffff
-
--- a/examples/diffserv/Edge1
+++ b/examples/diffserv/Edge1
@ -1,68 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# This script just tags on the ingress interfac using Ipchains
-# the result is used for fast classification and re-marking
-# on the egress interface
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-#
-# tag all incoming packets from host 10.2.0.24 to value 1
-# tag all incoming packets from host 10.2.0.3 to value 2
-# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
-#These values are used in the egress
-#
-############################################################ 
-$IPCHAINS -A input -s 10.2.0.4/24 -m 3
-$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
-$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64 set_tc_index
-#
-# values of the DSCP to change depending on the class
-#
-#becomes EF
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0xb8
-#becomes AF11
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x28
-#becomes AF21
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x48
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent 1:0
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
--- a/examples/diffserv/Edge2
+++ b/examples/diffserv/Edge2
@ -1,87 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# This script tags the fwmark on the ingress interface using IPchains
-# the result is used first for policing on the Ingress interface then
-# for fast classification and re-marking
-# on the egress interface
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-#
-# tag all incoming packets from host 10.2.0.24 to value 1
-# tag all incoming packets from host 10.2.0.3 to value 2
-# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
-#These values are used in the egress
-############################################################ 
-$IPCHAINS -A input -s 10.2.0.0/24 -m 3
-$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
-$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
-############################################################ 
-#
-# install the ingress qdisc on the ingress interface
-############################################################ 
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-
-#
-# attach a fw classifier to the ingress which polices anything marked
-# by ipchains to tag value 3 (The rest of the subnet packets -- not
-# tag 1 or 2) to not go beyond 1.5Mbps
-# Allow up to at least 60 packets to burst (assuming maximum packet 
-# size of # 1.5 KB) in the long run and up to about 6 packets in the
-# shot run
-
-############################################################ 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 3 fw \
-police rate 1500kbit burst 90k mtu 9k drop flowid :1
-############################################################ 
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0xb8
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x28
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x48
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $DEV ingress
--- a/examples/diffserv/Edge31-ca-u32
+++ b/examples/diffserv/Edge31-ca-u32
@ -1,170 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities using u32 classifier
-# This script tags tcindex based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color aware mode marker with PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.1)
-#
-# The colors are defined using the Diffserv Fields
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/usr/src/iproute2-current
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-INDEV=eth0
-EGDEV="dev eth1"
-CIR1=1500kbit
-CIR2=1000kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-
-############################################################ 
-#
-# install the ingress qdisc on the ingress interface
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-#
-# Create u32 filters 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1: u32 \
-divisor 1
-############################################################ 
-
-# The meters: Note that we have shared meters in this case as identified
-# by the index parameter
-meter1=" police index 1 rate $CIR1 burst $CBS1 "
-meter2=" police index 2 rate $CIR2 burst $CBS1 "
-meter3=" police index 3 rate $CIR2 burst $CBS2 "
-meter4=" police index 4 rate $CIR1 burst $CBS2 "
-meter5=" police index 5 rate $CIR1 burst $CBS2 "
-
-# All packets are marked with a tcindex value which is used on the egress
-# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-
-# *********************** AF41 *************************** 
-#AF41 (DSCP 0x22) is passed on with a tcindex value 1
-#if it doesn't exceed its CIR/CBS 
-#policer 1  is used.
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
-match ip tos 0x88 0xfc \
-$meter1 \
-continue flowid :1
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-# tcindex value  of 2
-# policer 2 is used
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
-match ip tos 0x88 0xfc \
-$meter2 \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3 (policer 3)
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip tos 0x88 0xfc \
-$meter3 \
-drop flowid :3
-#
-
-# *********************** AF42 *************************** 
-#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
-#if it doesn't exceed its CIR/CBS 
-#policer 2 is used. Note that this is shared with the AF41
-#
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
-match ip tos 0x90 0xfc \
-$meter2 \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3 (policer 3)
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip tos 0x90 0xfc \
-$meter3 \
-drop flowid :3
-#
-# *********************** AF43 *************************** 
-#
-#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
-#if it doesn't exceed its CIR/CBS
-#policer 3 is used. Note that this is shared with the AF41 and AF42
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip tos 0x98 0xfc \
-$meter3 \
-drop flowid :3
-#
-# *********************** BE *************************** 
-#
-# Anything else (not from the AF4*) gets discarded if it 
-# exceeds 1Mbps and by default goes to BE if it doesn't
-# Note that the BE class is also used by the AF4* in the worst
-# case
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
-match ip src 0/0\
-$meter4 \
-drop flowid :4
-
-######################## Egress side ########################
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/Edge31-cb-chains
+++ b/examples/diffserv/Edge31-cb-chains
@ -1,132 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# This script fwmark tags(IPchains) based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color blind mode marker with no PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.1)
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-CIR1=1500kbit
-CIR2=1000kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-
-meter1="police rate $CIR1 burst $CBS1 "
-meter2="police rate $CIR1 burst $CBS2 "
-meter3="police rate $CIR2 burst $CBS1 "
-meter4="police rate $CIR2 burst $CBS2 "
-meter5="police rate $CIR2 burst $CBS2 "
-#
-# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
-# tag all incoming packets from any other subnet to fw tag 2
-############################################################ 
-$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
-$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
-#
-############################################################ 
-# install the ingress qdisc on the ingress interface
-$TC qdisc add dev $INDEV handle ffff: ingress
-#
-############################################################ 
-
-# All packets are marked with a tcindex value which is used on the egress
-# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-#
-############################################################ 
-# 
-# anything with fw tag of 1 is passed on with a tcindex value 1
-#if it doesn't exceed its allocated rate (CIR/CBS)
-# 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
-$meter1 \
-continue flowid 4:1
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-#tcindex value  of 2
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
-$meter2 \
-continue flowid 4:2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
-$meter3 \
-drop flowid 4:3
-#
-# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it 
-# exceeds 1Mbps and by default goes to BE if it doesn't
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 2 fw \
-$meter5 \
-drop flowid 4:4
-
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping (using tcindex; could easily have
-# replaced it with the fw classifier instead)
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/Edge32-ca-u32
+++ b/examples/diffserv/Edge32-ca-u32
@ -1,198 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities using u32 classifier
-# This script tags tcindex based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color aware mode marker with PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.2)
-#
-# The colors are defined using the Diffserv Fields
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-CIR1=1000kbit
-CIR2=500kbit
-# the PIR is what is in excess of the CIR
-PIR1=1000kbit
-PIR2=500kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-#the EBS is about 20 max sized packets
-EBS1=30k
-EBS2=30k
-
-# The meters: Note that we have shared meters in this case as identified
-# by the index parameter
-meter1=" police index 1 rate $CIR1 burst $CBS1 "
-meter1a=" police index 2 rate $PIR1 burst $EBS1 "
-meter2=" police index 3 rate $CIR2 burst $CBS1 "
-meter2a=" police index 4 rate $PIR2 burst $EBS1 "
-meter3=" police index 5 rate $CIR2 burst $CBS2 "
-meter3a=" police index 6 rate $PIR2 burst $EBS2 "
-meter4=" police index 7 rate $CIR1 burst $CBS2 "
-
-############################################################ 
-#
-# install the ingress qdisc on the ingress interface
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-#
-# All packets are marked with a tcindex value which is used on the egress
-# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-#
-# *********************** AF41 *************************** 
-#AF41 (DSCP 0x22) from is passed on with a tcindex value 1
-#if it doesn't exceed its CIR/CBS + PIR/EBS
-#policer 1  is used.
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
-match ip tos 0x88 0xfc \
-$meter1 \
-continue flowid :1
-$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
-match ip tos 0x88 0xfc \
-$meter1a \
-continue flowid :1
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-# tcindex value  of 2
-# policer 2 is used
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
-match ip tos 0x88 0xfc \
-$meter2 \
-continue flowid :2
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
-match ip tos 0x88 0xfc \
-$meter2a \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3 (policer 3)
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
-match ip tos 0x88 0xfc \
-$meter3 \
-continue flowid :3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip tos 0x88 0xfc \
-$meter3a \
-drop flowid :3
-#
-# *********************** AF42 *************************** 
-#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
-#if it doesn't exceed its CIR/CBS + PIR/EBS
-#policer 2 is used. Note that this is shared with the AF41
-#
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 8 u32 \
-match ip tos 0x90 0xfc \
-$meter2 \
-continue flowid :2
-$TC filter add dev $INDEV parent ffff: protocol ip prio 9 u32 \
-match ip tos 0x90 0xfc \
-$meter2a \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3 (policer 3)
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 10 u32 \
-match ip tos 0x90 0xfc \
-$meter3 \
-continue flowid :3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 11 u32 \
-match ip tos 0x90 0xfc \
-$meter3a \
-drop flowid :3
-
-#
-# *********************** AF43 *************************** 
-#
-#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
-#if it doesn't exceed its CIR/CBS + PIR/EBS
-#policer 3 is used. Note that this is shared with the AF41 and AF42
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 13 u32 \
-match ip tos 0x98 0xfc \
-$meter3 \
-continue flowid :3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 14 u32 \
-match ip tos 0x98 0xfc \
-$meter3a \
-drop flowid :3
-#
-## *********************** BE *************************** 
-##
-## Anything else (not from the AF4*) gets discarded if it 
-## exceeds 1Mbps and by default goes to BE if it doesn't
-## Note that the BE class is also used by the AF4* in the worst
-## case
-##
-$TC filter add dev $INDEV parent ffff: protocol ip prio 16 u32 \
-match ip src 0/0\
-$meter4 \
-drop flowid :4
-
-######################## Egress side ########################
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/Edge32-cb-chains
+++ b/examples/diffserv/Edge32-cb-chains
@ -1,144 +0,0 @@
-#! /bin/sh -x
-#
-# sample script on using the ingress capabilities
-# This script fwmark tags(IPchains) based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color blind mode marker with no PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.1)
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
-INDEV=eth2
-EGDEV="dev eth1"
-CIR1=1500kbit
-CIR2=500kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-
-meter1="police rate $CIR1 burst $CBS1 "
-meter1a="police rate $CIR2 burst $CBS1 "
-meter2="police rate $CIR1 burst $CBS2 "
-meter2a="police rate $CIR2 burst $CBS2 "
-meter3="police rate $CIR2 burst $CBS1 "
-meter3a="police rate $CIR2 burst $CBS1 "
-meter4="police rate $CIR2 burst $CBS2 "
-meter5="police rate $CIR1 burst $CBS2 "
-#
-# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
-# tag all incoming packets from any other subnet to fw tag 2
-############################################################ 
-$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
-$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
-#
-############################################################ 
-# install the ingress qdisc on the ingress interface
-$TC qdisc add dev $INDEV handle ffff: ingress
-#
-############################################################ 
-
-# All packets are marked with a tcindex value which is used on the egress
-# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-#
-############################################################ 
-# 
-# anything with fw tag of 1 is passed on with a tcindex value 1
-#if it doesn't exceed its allocated rate (CIR/CBS)
-# 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 1 handle 1 fw \
-$meter1 \
-continue flowid 4:1
-$TC filter add dev $INDEV parent ffff: protocol ip prio 2 handle 1 fw \
-$meter1a \
-continue flowid 4:1
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-#tcindex value  of 2
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 3 handle 1 fw \
-$meter2 \
-continue flowid 4:2
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
-$meter2a \
-continue flowid 4:2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
-$meter3 \
-continue flowid 4:3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
-$meter3a \
-drop flowid 4:3
-#
-# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it 
-# exceeds 1Mbps and by default goes to BE if it doesn't
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 7 handle 2 fw \
-$meter5 \
-drop flowid 4:4
-
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping (using tcindex; could easily have
-# replaced it with the fw classifier instead)
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/Edge32-cb-u32
+++ b/examples/diffserv/Edge32-cb-u32
@ -1,145 +0,0 @@
-#! /bin/sh 
-#
-# sample script on using the ingress capabilities using u32 classifier
-# This script tags tcindex based on metering on the ingress 
-# interface the result is used for fast classification and re-marking
-# on the egress interface
-# This is an example of a color blind mode marker with PIR configured
-# based on draft-wahjak-mcm-00.txt (section 3.2)
-#
-#path to various utilities;
-#change to reflect yours.
-#
-IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
-TC=$IPROUTE/tc/tc
-IP=$IPROUTE/ip/ip
-INDEV=eth2
-EGDEV="dev eth1"
-CIR1=1000kbit
-CIR2=1000kbit
-# The PIR is the excess (in addition to the CIR i.e if always
-# going to the PIR --> average rate is CIR+PIR)
-PIR1=1000kbit
-PIR2=500kbit
-
-#The CBS is about 60 MTU sized packets
-CBS1=90k
-CBS2=90k
-#the EBS is about 10 max sized packets
-EBS1=15k
-EBS2=15k
-# The meters
-meter1=" police rate $CIR1 burst $CBS1 "
-meter1a=" police rate $PIR1 burst $EBS1 "
-meter2=" police rate $CIR2 burst $CBS1 "
-meter2a="police rate $PIR2 burst $CBS1 "
-meter3=" police rate $CIR2 burst $CBS2 "
-meter3a=" police rate $PIR2 burst $EBS2 "
-meter4=" police rate $CIR1 burst $CBS2 "
-meter5=" police rate $CIR1 burst $CBS2 "
-
-
-# install the ingress qdisc on the ingress interface
-############################################################ 
-$TC qdisc add dev $INDEV handle ffff: ingress
-############################################################ 
-#
-############################################################ 
-
-# All packets are marked with a tcindex value which is used on the egress
-# NOTE: tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
-# 
-#anything from subnet 10.2.0.2/24 is passed on with a tcindex value 1
-#if it doesn't exceed its CIR/CBS + PIR/EBS
-# 
-$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
-match ip src 10.2.0.0/24 $meter1 \
-continue flowid :1
-$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
-match ip src 10.2.0.0/24 $meter1a \
-continue flowid :1
-
-#
-# if it exceeds the above but not the extra rate/burst below, it gets a 
-#tcindex value  of 2
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
-match ip src 10.2.0.0/24 $meter2 \
-continue flowid :2
-$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
-match ip src 10.2.0.0/24 $meter2a \
-continue flowid :2
-#
-# if it exceeds the above but not the rule below, it gets a tcindex value
-# of 3
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
-match ip src 10.2.0.0/24 $meter3 \
-continue flowid :3
-$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
-match ip src 10.2.0.0/24 $meter3a \
-drop flowid :3
-#
-#
-# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it 
-# exceeds 1Mbps and by default goes to BE if it doesn't
-#
-$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
-match ip src 0/0 $meter5 \
-drop flowid :4
-
-
-######################## Egress side ########################
-
-
-# attach a dsmarker
-#
-$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
-#
-# values of the DSCP to change depending on the class
-#note that the ECN bits are masked out
-#
-#AF41 (0x88 is 0x22 shifted to the right by two bits)
-#
-$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
-       value 0x88
-#AF42
-$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
-       value 0x90
-#AF43
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x98
-#BE
-$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
-       value 0x0
-#
-#
-# The class mapping
-#
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 1 tcindex classid 1:1
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 2 tcindex  classid 1:2
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 3 tcindex  classid 1:3
-$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
-          handle 4 tcindex  classid 1:4
-#
-
-#
-echo "---- qdisc parameters Ingress  ----------"
-$TC qdisc ls dev $INDEV
-echo "---- Class parameters Ingress  ----------"
-$TC class ls dev $INDEV
-echo "---- filter parameters Ingress ----------"
-$TC filter ls dev $INDEV parent ffff:
-
-echo "---- qdisc parameters Egress  ----------"
-$TC qdisc ls $EGDEV
-echo "---- Class parameters Egress  ----------"
-$TC class ls $EGDEV
-echo "---- filter parameters Egress ----------"
-$TC filter ls $EGDEV parent 1:0
-#
-#deleting the ingress qdisc
-#$TC qdisc del $INDEV ingress
--- a/examples/diffserv/README
+++ b/examples/diffserv/README
@ -1,98 +0,0 @@
-
-Note all these are mere examples which can be customized to your needs
-
-AFCBQ
-----
-AF PHB built using CBQ, DSMARK,GRED (default in GRIO mode) ,RED for BE 
-and the tcindex classifier with some algorithmic mapping
-
-EFCBQ
-----
-EF PHB built using CBQ (for rate control and prioritization), 
-DSMARK( to remark DSCPs), tcindex  classifier and  RED for the BE
-traffic.
-
-EFPRIO
------
-EF PHB using the PRIO scheduler, Token Bucket to rate control EF,
-tcindex classifier, DSMARK to remark, and RED for the BE traffic
-
-EDGE scripts
-==============
-
-CB-3(1|2)-(u32/chains)
-======================
-
-
-The major differences are that the classifier is u32 on -u32 extension
-and IPchains on the chains extension. CB stands for color Blind
-and 31 is for the mode where only a CIR and CBS are defined whereas
-32 stands for a mode where a CIR/CBS + PIR/EBS are defined.
-
-Color Blind (CB)
-==========-----=
-We look at one special subnet that we are interested in for simplicty
-reasons to demonstrate the capability. We send the packets from that
-subnet to AF4*, BE or end up dropping depending on the metering results. 
-
-
-The algorithm overview is as follows:
-
-*classify:
-
-**case: subnet X
----------------
-  if !exceed meter1 tag as AF41
-	else
-	    if !exceed meter2  tag as AF42
-	        else
-		  if !exceed meter 3 tag as AF43
-		      else 
-			 drop 
-
-default case: Any other subnet
-------------------------------
-  if !exceed meter 5 tag as AF43
-      else
-	 drop 
-
-
-One Egress side change the DSCPs of the packets to reflect AF4* and BE
-based on the tags from the ingress.
-
-------------------------------------------------------------
-
-Color Aware
-===========
-
-Define some meters with + policing and give them IDs eg
-
-meter1=police index 1 rate $CIR1 burst $CBS1  
-meter2=police index 2 rate $CIR2 burst $CBS2   etc 
-
-General overview:
-classify based on the DSCPs and use the policer ids to decide tagging
-
-
-*classify on ingress:
-
-switch (dscp) {
-    case AF41: /* tos&0xfc == 0x88 */
-	if (!exceed meter1) break;
-    case AF42: /* tos&0xfc == 0x90 */
-	if (!exceed meter2) {
-	    tag as AF42;
-	    break;
-	}
-    case AF43: /* tos&0xfc == 0x98 */
-	if (!exceed meter3) {
-	    tag as AF43;
-	    break;
-	} else
-	  drop;
-    default:
-	if (!exceed meter4) tag as BE;
-	else drop;
-}
-
-On the Egress side mark the proper AF tags
--- a/examples/diffserv/afcbq
+++ b/examples/diffserv/afcbq
@ -1,105 +0,0 @@
-#!/usr/bin/perl
-#
-#
-# AF using CBQ for a single interface eth0 
-# 4 AF classes using GRED and one BE using RED
-# Things you might want to change:
-#	- the device bandwidth (set at 10Mbits)
-#	- the bandwidth allocated for each AF class and the BE class	
-#	- the drop probability associated with each AF virtual queue
-#
-# AF DSCP values used (based on AF draft 04)
-# -----------------------------------------
-# AF DSCP values
-# AF1 1. 0x0a 2. 0x0c 3. 0x0e
-# AF2 1. 0x12 2. 0x14 3. 0x16
-# AF3 1. 0x1a 2. 0x1c 3. 0x1e
-# AF4 1. 0x22 2. 0x24 3. 0x26
-
-#
-# 
-# A simple DSCP-class relationship formula used to generate
-# values in the for loop of this script; $drop stands for the
-# DP
-#	$dscp = ($class*8+$drop*2)
-#
-#  if you use GRIO buffer sharing, then GRED priority is set as follows:
-#  $gprio=$drop+1; 
-#
-
-$TC = "/usr/src/iproute2-current/tc/tc";
-$DEV = "dev lo";
-$DEV = "dev eth1";
-$DEV = "dev eth0";
-# the BE-class number
-$beclass = "5";  
-
-#GRIO buffer sharing on or off?
-$GRIO = "";
-$GRIO = "grio";
-# The bandwidth of your device
-$linerate="10Mbit";
-# The BE and AF rates
-%rate_table=();
-$berate="1500Kbit";
-$rate_table{"AF1rate"}="1500Kbit";
-$rate_table{"AF2rate"}="1500Kbit";
-$rate_table{"AF3rate"}="1500Kbit";
-$rate_table{"AF4rate"}="1500Kbit";
-#
-#
-#
-print "\n# --- General setup  ---\n";
-print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
-print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex mask 0xfc " .
-   "shift 2 pass_on\n";
-   #"shift 2\n";
-print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth $linerate ".
-  "cell 8 avpkt 1000 mpu 64\n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 1 tcindex ".
-  "mask 0xf0 shift 4 pass_on\n";
-for $class (1..4) {
-    print "\n# --- AF Class $class specific setup---\n";
-    $AFrate=sprintf("AF%drate",$class);
-    print "$TC class add $DEV parent 2:0 classid 2:$class cbq ".
-      "bandwidth $linerate rate $rate_table{$AFrate} avpkt 1000 prio ".
-      (6-$class)." bounded allot 1514 weight 1 maxburst 21\n";
-    print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle $class ".
-      "tcindex classid 2:$class\n";
-    print "$TC qdisc add $DEV parent 2:$class gred setup DPs 3 default 2 ".
-      "$GRIO\n";
-# 
-# per DP setup
-#
-    for $drop (1..3) {
-    print "\n# --- AF Class $class DP $drop---\n";
-	$dscp = $class*8+$drop*2;
-	$tcindex = sprintf("1%x%x",$class,$drop);
-	print "$TC filter add $DEV parent 1:0 protocol ip prio 1 ".
-	  "handle $dscp tcindex classid 1:$tcindex\n";
-	$prob = $drop*0.02;
-        if ($GRIO) {
-	$gprio = $drop+1;
-	print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
-	  "max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
-	  "probability $prob ".
-          "prio $gprio\n";
-        } else {
-	print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
-	  "max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
-	  "probability $prob \n";
-	}
-    }
-}
-#
-#
-print "\n#------BE Queue setup------\n";
-print "$TC filter add $DEV parent 1:0 protocol ip prio 2 ".
-          "handle 0 tcindex mask 0 classid 1:1\n";
-print "$TC class add $DEV parent 2:0 classid 2:$beclass cbq ".
-      "bandwidth $linerate rate $berate avpkt 1000 prio 6 " .
-      "bounded allot 1514 weight 1 maxburst 21 \n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle 0 tcindex ".
-  "classid 2:5\n";
-print "$TC qdisc add $DEV parent 2:5 red limit 60KB min 15KB max 45KB ".
-  "burst 20 avpkt 1000 bandwidth $linerate probability 0.4\n";
--- a/examples/diffserv/ef-prio
+++ b/examples/diffserv/ef-prio
@ -1,25 +0,0 @@
-#!/usr/bin/perl
-$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
-$DEV = "dev eth1";
-$efrate="1.5Mbit";
-$MTU="1.5kB";
-print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
-print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
-  "mask 0xfc shift 2\n";
-print "$TC qdisc add $DEV parent 1:0 handle 2:0 prio\n";
-#
-# EF class: Maximum about one MTU sized packet allowed on the queue
-#
-print "$TC qdisc add $DEV parent 2:1 tbf rate $efrate burst $MTU limit 1.6kB\n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
-	  "handle 0x2e tcindex classid 2:1 pass_on\n";
-#
-# BE class
-#
-print "#BE class(2:2) \n";
-print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
-	  "min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
-	  "probability 0.4\n";
-#
-print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
-	  "handle 0 tcindex mask 0 classid 2:2 pass_on\n";
--- a/examples/diffserv/efcbq
+++ b/examples/diffserv/efcbq
@ -1,31 +0,0 @@
-#!/usr/bin/perl
-#
-$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
-$DEV = "dev eth1";
-print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
-print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
-  "mask 0xfc shift 2\n";
-print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth ".
-	"10Mbit cell 8 avpkt 1000 mpu 64\n";
-#
-# EF class
-#
-print "$TC class add $DEV parent 2:0 classid 2:1 cbq bandwidth ". 
-	"10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated ".
-	"allot 1514 weight 1 maxburst 10 \n";
-# packet fifo for EF?
-print "$TC qdisc add $DEV parent 2:1 pfifo limit 5\n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
-	  "handle 0x2e tcindex classid 2:1 pass_on\n";
-#
-# BE class
-#
-print "#BE class(2:2) \n";
-print "$TC class add $DEV parent 2:0 classid 2:2 cbq bandwidth ". 
-	"10Mbit rate 5Mbit avpkt 1000 prio 7 allot 1514 weight 1 ".
-	"maxburst 21 borrow split 2:0 defmap 0xffff \n";
-print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
-	  "min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
-	  "probability 0.4\n";
-print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
-	  "handle 0 tcindex mask 0 classid 2:2 pass_on\n";
--- a/examples/diffserv/regression-testing
+++ b/examples/diffserv/regression-testing
@ -1,125 +0,0 @@
-
-These were the tests done to validate the Diffserv scripts.
-This document will be updated continuously. If you do more
-thorough validation testing please post the details to the
-diffserv mailing list. 
-Nevertheless, these tests should serve for basic validation.
-
-AFCBQ, EFCBQ, EFPRIO
----------------------
-
-generate all possible DSCPs and observe that they 
-get sent to the proper classes. In the case of AF also
-to the correct Virtual Queues.
-
-Edge1
-----
-generate TOS values 0x0,0x10,0xbb each with IP addresses
-10.2.0.24 (mark 1), 10.2.0.3 (mark2) and 10.2.0.30 (mark 3)
-and observe that they get marked as expected.
-
-Edge2
-----
-
-Repeat the tests in Edge1
-ftp with data direction from 10.2.0.2
-	*observe that the metering/policing works correctly (and the marking
-	as well). In this case the mark used will be 3
-
-Edge31-cb-chains
----------------
-
-ftp with data direction from 10.2.0.2
-
-	*observe that the metering/policing works correctly (and the marking
-	as well). In this case the mark used will be 1. 
-
-	Metering: The data throughput should not exceed 2*CIR1 + 2*CIR2
-	which is roughly: 5mbps
-
-	Marking: the should be a variation of marked packets:
-	AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
-
-More tests required to see the interaction of several sources (other
-than subnet 10.2.0.0/24).
-
-Edge31-ca-u32
--------------
-
-Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the 
-discard port of 10.1.0.2 (behind eth1)
-
-1) generate with src tos = 0x88
-	Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
-	approximately 5mbps
-	Marking: Should vary between 0x88,0x90,0x98 and 0x0
-
-2) generate with src tos = 0x90
-	Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
-	approximately 3.5mbps
-	Marking: Should vary between 0x90,0x98 and 0x0
-
-3) generate with src tos = 0x98
-	Metering: Allocated throughput should not exceed CIR1 + CIR2
-	approximately 2.5mbps
-	Marking: Should vary between 0x98 and 0x0
-
-4) generate with src tos any other than the above
-	Metering: Allocated throughput should not exceed CIR1 
-	approximately 1.5mbps
-	Marking: Should be consistent at 0x0
-
-TODO: Testing on how each color shares when all 4 types of packets
-are going through the edge device
-
-Edge32-cb-u32, Edge32-cb-chains
-------------------------------
-
-ftp with data direction from 10.2.0.2
-
-	*observe that the metering/policing works correctly (and the marking
-	as well). 
-
-	Metering: 
-        The data throughput should not exceed 2*CIR1 + 2*CIR2
-	+ 2*PIR2 + PIR1 for u32 which is roughly: 6mbps
-        The data throughput should not exceed 2*CIR1 + 5*CIR2
-	for chains which is roughly: 6mbps
-
-	Marking: the should be a variation of marked packets:
-	AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
-
-TODO:
-More tests required to see the interaction of several sources (other
-than subnet 10.2.0.0/24).
-More tests needed to capture stats on how many times the CIR was exceeded
-but the data was not remarked etc.
-
-Edge32-ca-u32
--------------
-
-Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the 
-discard port of 10.1.0.2 (behind eth1)
-
-1) generate with src tos = 0x88
-	Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
-	+PIR1 -- approximately 4mbps
-	Marking: Should vary between 0x88,0x90,0x98 and 0x0
-
-2) generate with src tos = 0x90
-	Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
-	+ 2* PIR2 approximately 3mbps
-	Marking: Should vary between 0x90,0x98 and 0x0
-
-3) generate with src tos = 0x98
-	Metering: Allocated throughput should not exceed PIR1+ CIR1 + CIR2
-	approximately 2.5mbps
-	Marking: Should vary between 0x98 and 0x0
-
-4) generate with src tos any other than the above
-	Metering: Allocated throughput should not exceed CIR1 
-	approximately 1mbps
-	Marking: Should be consistent at 0x0
-
-TODO: Testing on how each color shares when all 4 types of packets
-are going through the edge device
--- a/examples/gaiconf
+++ b/examples/gaiconf
@ -1,134 +0,0 @@
-#!/bin/sh
-
-#
-# Setup address label from /etc/gai.conf
-#
-# Written by YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>, 2010.
-#
-
-IP=ip
-DEFAULT_GAICONF=/etc/gai.conf
-verbose=
-debug=
-
-function run ()
-{
-	if [ x"$verbose" != x"" ]; then
-		echo "$@"
-	fi
-	if [ x"$debug" = x"" ]; then
-		"$@"
-	fi
-}
-
-function do_load_config ()
-{
-	file=$1; shift
-	flush=1
-	cat $file | while read command prefix label; do
-		if [ x"$command" = x"#label" ]; then
-			if [ ${flush} = 1 ]; then
-				run ${IP} -6 addrlabel flush
-				flush=0
-			fi
-			run ${IP} -6 addrlabel add prefix $prefix label $label
-		fi
-	done
-}
-
-function do_list_config ()
-{
-	${IP} -6 addrlabel list | while read p pfx l lbl; do
-		echo label ${pfx} ${lbl}
-	done
-}
-
-function help ()
-{
-	echo "Usage: $0 [-v] {--list | --config [ ${DEFAULT_GAICONF} ] | --default}"
-	exit 1
-}
-
-TEMP=`getopt -o c::dlv -l config::,default,list,verbose -n gaiconf -- "$@"`
-
-if [ $? != 0 ]; then
-	echo "Terminating..." >&2
-	exit 1
-fi
-
-TEMPFILE=`mktemp`
-
-eval set -- "$TEMP"
-
-while true ; do
-	case "$1" in
-		-c|--config)
-			if [ x"$cmd" != x"" ]; then
-				help
-			fi
-			case "$2" in
-			"")	gai_conf="${DEFAULT_GAICONF}"
-				shift 2
-				;;
-			*)	gai_conf="$2"
-				shift 2
-			esac
-			cmd=config
-			;;
-		-d|--default)
-			if [ x"$cmd" != x"" ]; then
-				help
-			fi
-			gai_conf=${TEMPFILE}
-			cmd=config
-			;;
-		-l|--list)
-			if [ x"$cmd" != x"" ]; then
-				help
-			fi
-			cmd=list
-			shift
-			;;
-		-v)
-			verbose=1
-			shift
-			;;
-		--)
-			shift;
-			break
-			;;
-		*)
-			echo "Internal error!" >&2
-			exit 1
-			;;
-	esac
-done
-
-case "$cmd" in
-	config)
-		if [ x"$gai_conf" = x"${TEMPFILE}" ]; then
-			sed -e 's/^[[:space:]]*//' <<END_OF_DEFAULT >${TEMPFILE}
-				label ::1/128       0
-				label ::/0          1
-				label 2002::/16     2
-				label ::/96         3
-				label ::ffff:0:0/96 4
-				label fec0::/10     5
-				label fc00::/7      6
-				label 2001:0::/32   7
-END_OF_DEFAULT
-		fi
-		do_load_config "$gai_conf"
-		;;
-	list)
-		do_list_config
-		;;
-	*)
-		help
-		;;
-esac
-
-rm -f "${TEMPFILE}"
-
-exit 0
-
--- a/genl/ctrl.c
+++ b/genl/ctrl.c
@ -28,13 +28,15 @@
 static int usage(void)
 {
 	fprintf(stderr,"Usage: ctrl <CMD>\n" \
-		       "CMD   := get <PARMS> | list | monitor\n" \
+		       "CMD   := get <PARMS> | list | monitor | policy <PARMS>\n" \
 		       "PARMS := name <name> | id <id>\n" \
 		       "Examples:\n" \
 		       "\tctrl ls\n" \
 		       "\tctrl monitor\n" \
 		       "\tctrl get name foobar\n" \
-		       "\tctrl get id 0xF\n");
+		       "\tctrl get id 0xF\n"
+		       "\tctrl policy name foobar\n"
+		       "\tctrl policy id 0xF\n");
 	return -1;
 }

@ -123,7 +125,8 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
 	    ghdr->cmd != CTRL_CMD_DELFAMILY &&
 	    ghdr->cmd != CTRL_CMD_NEWFAMILY &&
 	    ghdr->cmd != CTRL_CMD_NEWMCAST_GRP &&
-	    ghdr->cmd != CTRL_CMD_DELMCAST_GRP) {
+	    ghdr->cmd != CTRL_CMD_DELMCAST_GRP &&
+	    ghdr->cmd != CTRL_CMD_GETPOLICY) {
 		fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd);
 		return 0;
 	}
@ -136,7 +139,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
 	}

 	attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
-	parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len);
+	parse_rtattr_flags(tb, CTRL_ATTR_MAX, attrs, len, NLA_F_NESTED);

 	if (tb[CTRL_ATTR_FAMILY_NAME]) {
 		char *name = RTA_DATA(tb[CTRL_ATTR_FAMILY_NAME]);
@ -159,6 +162,36 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
 		__u32 *ma = RTA_DATA(tb[CTRL_ATTR_MAXATTR]);
 		fprintf(fp, " max attribs: %d ",*ma);
 	}
+	if (tb[CTRL_ATTR_OP_POLICY]) {
+		const struct rtattr *pos;
+
+		rtattr_for_each_nested(pos, tb[CTRL_ATTR_OP_POLICY]) {
+			struct rtattr *ptb[CTRL_ATTR_POLICY_DUMP_MAX + 1];
+			struct rtattr *pattrs = RTA_DATA(pos);
+			int plen = RTA_PAYLOAD(pos);
+
+			parse_rtattr_flags(ptb, CTRL_ATTR_POLICY_DUMP_MAX,
+					   pattrs, plen, NLA_F_NESTED);
+
+			fprintf(fp, " op %d policies:",
+				pos->rta_type & ~NLA_F_NESTED);
+
+			if (ptb[CTRL_ATTR_POLICY_DO]) {
+				__u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DO]);
+
+				fprintf(fp, " do=%d", *v);
+			}
+
+			if (ptb[CTRL_ATTR_POLICY_DUMP]) {
+				__u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DUMP]);
+
+				fprintf(fp, " dump=%d", *v);
+			}
+		}
+	}
+	if (tb[CTRL_ATTR_POLICY])
+		nl_print_policy(tb[CTRL_ATTR_POLICY], fp);
+
 	/* end of family definitions .. */
 	fprintf(fp,"\n");
 	if (tb[CTRL_ATTR_OPS]) {
@ -235,7 +268,9 @@ static int ctrl_list(int cmd, int argc, char **argv)
 		exit(1);
 	}

-	if (cmd == CTRL_CMD_GETFAMILY) {
+	if (cmd == CTRL_CMD_GETFAMILY || cmd == CTRL_CMD_GETPOLICY) {
+		req.g.cmd = cmd;
+
 		if (argc != 2) {
 			fprintf(stderr, "Wrong number of params\n");
 			return -1;
@ -260,7 +295,9 @@ static int ctrl_list(int cmd, int argc, char **argv)
 			fprintf(stderr, "Wrong params\n");
 			goto ctrl_done;
 		}
+	}

+	if (cmd == CTRL_CMD_GETFAMILY) {
 		if (rtnl_talk(&rth, nlh, &answer) < 0) {
 			fprintf(stderr, "Error talking to the kernel\n");
 			goto ctrl_done;
@ -273,7 +310,7 @@ static int ctrl_list(int cmd, int argc, char **argv)

 	}

-	if (cmd == CTRL_CMD_UNSPEC) {
+	if (cmd == CTRL_CMD_UNSPEC || cmd == CTRL_CMD_GETPOLICY) {
 		nlh->nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
 		nlh->nlmsg_seq = rth.dump = ++rth.seq;

@ -324,6 +361,8 @@ static int parse_ctrl(struct genl_util *a, int argc, char **argv)
 	    matches(*argv, "show") == 0 ||
 	    matches(*argv, "lst") == 0)
 		return ctrl_list(CTRL_CMD_UNSPEC, argc-1, argv+1);
+	if (matches(*argv, "policy") == 0)
+		return ctrl_list(CTRL_CMD_GETPOLICY, argc-1, argv+1);
 	if (matches(*argv, "help") == 0)
 		return usage();

--- a/genl/genl.c
+++ b/genl/genl.c
@ -22,7 +22,7 @@
 #include <errno.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h> /* until we put our own header */
-#include "SNAPSHOT.h"
+#include "version.h"
 #include "utils.h"
 #include "genl_utils.h"

@ -118,7 +118,7 @@ int main(int argc, char **argv)
 		} else if (matches(argv[1], "-raw") == 0) {
 			++show_raw;
 		} else if (matches(argv[1], "-Version") == 0) {
-			printf("genl utility, iproute2-ss%s\n", SNAPSHOT);
+			printf("genl utility, iproute2-%s\n", version);
 			exit(0);
 		} else if (matches(argv[1], "-help") == 0) {
 			usage();
--- a/genl/genl_utils.h
+++ b/genl/genl_utils.h
@ -2,11 +2,10 @@
 #ifndef _TC_UTIL_H_
 #define _TC_UTIL_H_ 1

+#include <linux/genetlink.h>
 #include "utils.h"
-#include "linux/genetlink.h"

-struct genl_util
-{
+struct genl_util {
 	struct  genl_util *next;
 	char	name[16];
 	int	(*parse_genlopt)(struct genl_util *fu, int argc, char **argv);
--- a/include/SNAPSHOT.h
+++ b/include/SNAPSHOT.h
@ -1 +0,0 @@
-static const char SNAPSHOT[] = "190107";
--- a/include/bpf_api.h
+++ b/include/bpf_api.h
@ -19,6 +19,19 @@

 #include "bpf_elf.h"

+/** libbpf pin type. */
+enum libbpf_pin_type {
+	LIBBPF_PIN_NONE,
+	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+	LIBBPF_PIN_BY_NAME,
+};
+
+/** Type helper macros. */
+
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) typeof(val) *name
+#define __array(name, val) typeof(val) *name[]
+
 /** Misc macros. */

 #ifndef __stringify
--- a/include/bpf_util.h
+++ b/include/bpf_util.h
@ -272,14 +272,18 @@ const char *bpf_prog_to_default_section(enum bpf_prog_type type);
 int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
 int bpf_trace_pipe(void);

-void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+void bpf_print_ops(struct rtattr *bpf_ops, __u16 len);

-int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
-		  size_t size_insns, const char *license, char *log,
-		  size_t size_log);
+int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns,
+		      size_t size_insns, const char *license, __u32 ifindex,
+		      char *log, size_t size_log);
+int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+		     size_t size_insns, const char *license, char *log,
+		     size_t size_log);

 int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type);
 int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type);
+int bpf_program_attach(int prog_fd, int target_fd, enum bpf_attach_type type);

 int bpf_dump_prog_info(FILE *f, uint32_t id);

@ -287,6 +291,16 @@ int bpf_dump_prog_info(FILE *f, uint32_t id);
 int bpf_send_map_fds(const char *path, const char *obj);
 int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
 		     unsigned int entries);
+#ifdef HAVE_LIBBPF
+int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg);
+int iproute2_bpf_fetch_ancillary(void);
+int iproute2_get_root_path(char *root_path, size_t len);
+bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname);
+bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap,
+			    struct bpf_elf_map *omap, char *omap_name);
+int iproute2_find_map_name_by_id(unsigned int map_id, char *name);
+int iproute2_load_libbpf(struct bpf_cfg_in *cfg);
+#endif /* HAVE_LIBBPF */
 #else
 static inline int bpf_send_map_fds(const char *path, const char *obj)
 {
@ -299,5 +313,15 @@ static inline int bpf_recv_map_fds(const char *path, int *fds,
 {
 	return -1;
 }
+#ifdef HAVE_LIBBPF
+static inline int iproute2_load_libbpf(struct bpf_cfg_in *cfg)
+{
+	fprintf(stderr, "No ELF library support compiled in.\n");
+	return -1;
+}
+#endif /* HAVE_LIBBPF */
 #endif /* HAVE_ELF */
+
+const char *get_libbpf_version(void);
+
 #endif /* __BPF_UTIL__ */
--- a/include/cg_map.h
+++ b/include/cg_map.h
@ -0,0 +1,6 @@
+#ifndef __CG_MAP_H__
+#define __CG_MAP_H__
+
+const char *cg_id_to_path(__u64 id);
+
+#endif /* __CG_MAP_H__ */
--- a/include/iptables.h
+++ b/include/iptables.h
@ -12,7 +12,7 @@ extern int do_command4(int argc, char *argv[], char **table,
 		      struct xtc_handle **handle, bool restore);
 extern int delete_chain4(const xt_chainlabel chain, int verbose,
 			struct xtc_handle *handle);
-extern int flush_entries4(const xt_chainlabel chain, int verbose, 
+extern int flush_entries4(const xt_chainlabel chain, int verbose,
 			struct xtc_handle *handle);
 extern int for_each_chain4(int (*fn)(const xt_chainlabel, int, struct xtc_handle *),
 		int verbose, int builtinstoo, struct xtc_handle *handle);
--- a/include/json_print.h
+++ b/include/json_print.h
@ -15,6 +15,9 @@
 #include "json_writer.h"
 #include "color.h"

+#define _IS_JSON_CONTEXT(type) (is_json_context() && (type & PRINT_JSON || type & PRINT_ANY))
+#define _IS_FP_CONTEXT(type)   (!is_json_context() && (type & PRINT_FP || type & PRINT_ANY))
+
 json_writer_t *get_json_writer(void);

 /*
@ -31,11 +34,11 @@ enum output_type {

 void new_json_obj(int json);
 void delete_json_obj(void);
+void new_json_obj_plain(int json);
+void delete_json_obj_plain(void);

 bool is_json_context(void);

-void fflush_fp(void);
-
 void open_json_object(const char *str);
 void close_json_object(void);
 void open_json_array(enum output_type type, const char *delim);
@ -44,32 +47,61 @@ void close_json_array(enum output_type type, const char *delim);
 void print_nl(void);

 #define _PRINT_FUNC(type_name, type)					\
-	void print_color_##type_name(enum output_type t,		\
-				     enum color_attr color,		\
-				     const char *key,			\
-				     const char *fmt,			\
-				     type value);			\
+	int print_color_##type_name(enum output_type t,			\
+				    enum color_attr color,		\
+				    const char *key,			\
+				    const char *fmt,			\
+				    type value);			\
 									\
-	static inline void print_##type_name(enum output_type t,	\
-					     const char *key,		\
-					     const char *fmt,		\
-					     type value)		\
+	static inline int print_##type_name(enum output_type t,		\
+					    const char *key,		\
+					    const char *fmt,		\
+					    type value)			\
 	{								\
-		print_color_##type_name(t, COLOR_NONE, key, fmt, value);	\
+		return print_color_##type_name(t, COLOR_NONE, key, fmt,	\
+					       value);			\
 	}
-_PRINT_FUNC(int, int);
-_PRINT_FUNC(s64, int64_t);
-_PRINT_FUNC(bool, bool);
-_PRINT_FUNC(null, const char*);
-_PRINT_FUNC(string, const char*);
-_PRINT_FUNC(uint, unsigned int);
-_PRINT_FUNC(u64, uint64_t);
-_PRINT_FUNC(hu, unsigned short);
-_PRINT_FUNC(hex, unsigned int);
-_PRINT_FUNC(0xhex, unsigned long long);
-_PRINT_FUNC(luint, unsigned long);
-_PRINT_FUNC(lluint, unsigned long long);
-_PRINT_FUNC(float, double);
+
+/* These functions return 0 if printing to a JSON context, number of
+ * characters printed otherwise (as calculated by printf(3)).
+ */
+_PRINT_FUNC(int, int)
+_PRINT_FUNC(s64, int64_t)
+_PRINT_FUNC(bool, bool)
+_PRINT_FUNC(on_off, bool)
+_PRINT_FUNC(null, const char*)
+_PRINT_FUNC(string, const char*)
+_PRINT_FUNC(uint, unsigned int)
+_PRINT_FUNC(size, __u32)
+_PRINT_FUNC(u64, uint64_t)
+_PRINT_FUNC(hhu, unsigned char)
+_PRINT_FUNC(hu, unsigned short)
+_PRINT_FUNC(hex, unsigned int)
+_PRINT_FUNC(0xhex, unsigned long long)
+_PRINT_FUNC(luint, unsigned long)
+_PRINT_FUNC(lluint, unsigned long long)
+_PRINT_FUNC(float, double)
+_PRINT_FUNC(tv, const struct timeval *)
 #undef _PRINT_FUNC

+#define _PRINT_NAME_VALUE_FUNC(type_name, type, format_char)		  \
+	void print_##type_name##_name_value(const char *name, type value) \
+
+_PRINT_NAME_VALUE_FUNC(uint, unsigned int, u);
+_PRINT_NAME_VALUE_FUNC(string, const char*, s);
+#undef _PRINT_NAME_VALUE_FUNC
+
+int print_color_rate(bool use_iec, enum output_type t, enum color_attr color,
+		     const char *key, const char *fmt, unsigned long long rate);
+
+static inline int print_rate(bool use_iec, enum output_type t,
+			     const char *key, const char *fmt,
+			     unsigned long long rate)
+{
+	return print_color_rate(use_iec, t, COLOR_NONE, key, fmt, rate);
+}
+
+/* A backdoor to the size formatter. Please use print_size() instead. */
+char *sprint_size(__u32 sz, char *buf);
+
 #endif /* _JSON_PRINT_H_ */
--- a/include/json_writer.h
+++ b/include/json_writer.h
@ -38,6 +38,7 @@ void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num);
 void jsonw_uint(json_writer_t *self, unsigned int number);
 void jsonw_u64(json_writer_t *self, uint64_t number);
 void jsonw_xint(json_writer_t *self, uint64_t number);
+void jsonw_hhu(json_writer_t *self, unsigned char num);
 void jsonw_hu(json_writer_t *self, unsigned short number);
 void jsonw_int(json_writer_t *self, int number);
 void jsonw_s64(json_writer_t *self, int64_t number);
@ -52,6 +53,7 @@ void jsonw_float_field(json_writer_t *self, const char *prop, double num);
 void jsonw_uint_field(json_writer_t *self, const char *prop, unsigned int num);
 void jsonw_u64_field(json_writer_t *self, const char *prop, uint64_t num);
 void jsonw_xint_field(json_writer_t *self, const char *prop, uint64_t num);
+void jsonw_hhu_field(json_writer_t *self, const char *prop, unsigned char num);
 void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num);
 void jsonw_int_field(json_writer_t *self, const char *prop, int num);
 void jsonw_s64_field(json_writer_t *self, const char *prop, int64_t num);
--- a/include/libgenl.h
+++ b/include/libgenl.h
@ -21,6 +21,7 @@ struct {								\
 	},								\
 }

+int genl_add_mcast_grp(struct rtnl_handle *grth, __u16 genl_family, const char *group);
 int genl_resolve_family(struct rtnl_handle *grth, const char *family);
 int genl_init_handle(struct rtnl_handle *grth, const char *family,
 		     int *genl_family);
--- a/include/libnetlink.h
+++ b/include/libnetlink.h
@ -23,6 +23,7 @@ struct rtnl_handle {
 	FILE		       *dump_fp;
 #define RTNL_HANDLE_F_LISTEN_ALL_NSID		0x01
 #define RTNL_HANDLE_F_SUPPRESS_NLERR		0x02
+#define RTNL_HANDLE_F_STRICT_CHK		0x04
 	int			flags;
 };

@ -44,26 +45,33 @@ int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
 int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
 			     int protocol)
 	__attribute__((warn_unused_result));
-
+int rtnl_add_nl_group(struct rtnl_handle *rth, unsigned int group)
+	__attribute__((warn_unused_result));
 void rtnl_close(struct rtnl_handle *rth);
+void rtnl_set_strict_dump(struct rtnl_handle *rth);

-int rtnl_addrdump_req(struct rtnl_handle *rth, int family)
+typedef int (*req_filter_fn_t)(struct nlmsghdr *nlh, int reqlen);
+
+int rtnl_addrdump_req(struct rtnl_handle *rth, int family,
+		      req_filter_fn_t filter_fn)
 	__attribute__((warn_unused_result));
 int rtnl_addrlbldump_req(struct rtnl_handle *rth, int family)
 	__attribute__((warn_unused_result));
-int rtnl_routedump_req(struct rtnl_handle *rth, int family)
+int rtnl_routedump_req(struct rtnl_handle *rth, int family,
+		       req_filter_fn_t filter_fn)
 	__attribute__((warn_unused_result));
 int rtnl_ruledump_req(struct rtnl_handle *rth, int family)
 	__attribute__((warn_unused_result));
-int rtnl_neighdump_req(struct rtnl_handle *rth, int family)
+int rtnl_neighdump_req(struct rtnl_handle *rth, int family,
+		       req_filter_fn_t filter_fn)
 	__attribute__((warn_unused_result));
 int rtnl_neightbldump_req(struct rtnl_handle *rth, int family)
 	__attribute__((warn_unused_result));
 int rtnl_mdbdump_req(struct rtnl_handle *rth, int family)
 	__attribute__((warn_unused_result));
-int rtnl_netconfdump_req(struct rtnl_handle *rth, int family)
+int rtnl_brvlandump_req(struct rtnl_handle *rth, int family, __u32 dump_flags)
 	__attribute__((warn_unused_result));
-int rtnl_nsiddump_req(struct rtnl_handle *rth, int family)
+int rtnl_netconfdump_req(struct rtnl_handle *rth, int family)
 	__attribute__((warn_unused_result));

 int rtnl_linkdump_req(struct rtnl_handle *rth, int fam)
@ -71,11 +79,15 @@ int rtnl_linkdump_req(struct rtnl_handle *rth, int fam)
 int rtnl_linkdump_req_filter(struct rtnl_handle *rth, int fam, __u32 filt_mask)
 	__attribute__((warn_unused_result));

-typedef int (*req_filter_fn_t)(struct nlmsghdr *nlh, int reqlen);
-
 int rtnl_linkdump_req_filter_fn(struct rtnl_handle *rth, int fam,
 				req_filter_fn_t fn)
 	__attribute__((warn_unused_result));
+int rtnl_fdb_linkdump_req_filter_fn(struct rtnl_handle *rth,
+				    req_filter_fn_t filter_fn)
+	__attribute__((warn_unused_result));
+int rtnl_nsiddump_req_filter_fn(struct rtnl_handle *rth, int family,
+				req_filter_fn_t filter_fn)
+	__attribute__((warn_unused_result));
 int rtnl_statsdump_req_filter(struct rtnl_handle *rth, int fam, __u32 filt_mask)
 	__attribute__((warn_unused_result));
 int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req,
@ -84,12 +96,40 @@ int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req,
 int rtnl_dump_request_n(struct rtnl_handle *rth, struct nlmsghdr *n)
 	__attribute__((warn_unused_result));

+int rtnl_nexthopdump_req(struct rtnl_handle *rth, int family,
+			 req_filter_fn_t filter_fn)
+	__attribute__((warn_unused_result));
+int rtnl_nexthop_bucket_dump_req(struct rtnl_handle *rth, int family,
+				 req_filter_fn_t filter_fn)
+	__attribute__((warn_unused_result));
+
 struct rtnl_ctrl_data {
 	int	nsid;
 };

 typedef int (*rtnl_filter_t)(struct nlmsghdr *n, void *);

+/**
+ * rtnl error handler called from
+ *      rtnl_dump_done()
+ *      rtnl_dump_error()
+ *
+ * Return value is a bitmask of the following values:
+ * RTNL_LET_NLERR
+ *      error handled as usual
+ * RTNL_SUPPRESS_NLMSG_DONE_NLERR
+ *      error in nlmsg_type == NLMSG_DONE will be suppressed
+ * RTNL_SUPPRESS_NLMSG_ERROR_NLERR
+ *      error in nlmsg_type == NLMSG_ERROR will be suppressed
+ *      and nlmsg will be skipped
+ * RTNL_SUPPRESS_NLERR - suppress error in both previous cases
+ */
+#define RTNL_LET_NLERR				0x01
+#define RTNL_SUPPRESS_NLMSG_DONE_NLERR		0x02
+#define RTNL_SUPPRESS_NLMSG_ERROR_NLERR		0x04
+#define RTNL_SUPPRESS_NLERR			0x06
+typedef int (*rtnl_err_hndlr_t)(struct nlmsghdr *n, void *);
+
 typedef int (*rtnl_listen_filter_t)(struct rtnl_ctrl_data *,
 				    struct nlmsghdr *n, void *);

@ -99,6 +139,8 @@ typedef int (*nl_ext_ack_fn_t)(const char *errmsg, uint32_t off,
 struct rtnl_dump_filter_arg {
 	rtnl_filter_t filter;
 	void *arg1;
+	rtnl_err_hndlr_t errhndlr;
+	void *arg2;
 	__u16 nc_flags;
 };

@ -107,6 +149,15 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
 			void *arg, __u16 nc_flags);
 #define rtnl_dump_filter(rth, filter, arg) \
 	rtnl_dump_filter_nc(rth, filter, arg, 0)
+int rtnl_dump_filter_errhndlr_nc(struct rtnl_handle *rth,
+				 rtnl_filter_t filter,
+				 void *arg1,
+				 rtnl_err_hndlr_t errhndlr,
+				 void *arg2,
+				 __u16 nc_flags);
+#define rtnl_dump_filter_errhndlr(rth, filter, farg, errhndlr, earg) \
+	rtnl_dump_filter_errhndlr_nc(rth, filter, farg, errhndlr, earg, 0)
+
 int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
 	      struct nlmsghdr **answer)
 	__attribute__((warn_unused_result));
@ -121,6 +172,7 @@ int rtnl_send(struct rtnl_handle *rth, const void *buf, int)
 int rtnl_send_check(struct rtnl_handle *rth, const void *buf, int)
 	__attribute__((warn_unused_result));
 int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn);
+int nl_dump_ext_ack_done(const struct nlmsghdr *nlh, int error);

 int addattr(struct nlmsghdr *n, int maxlen, int type);
 int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data);
@ -158,7 +210,8 @@ int rta_nest_end(struct rtattr *rta, struct rtattr *nest);
 				    RTA_ALIGN((rta)->rta_len)))

 #define parse_rtattr_nested(tb, max, rta) \
-	(parse_rtattr((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta)))
+	(parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \
+			    NLA_F_NESTED))

 #define parse_rtattr_one_nested(type, rta) \
 	(parse_rtattr_one(type, RTA_DATA(rta), RTA_PAYLOAD(rta)))
@ -264,8 +317,20 @@ int rtnl_from_file(FILE *, rtnl_listen_filter_t handler,
 	((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct if_stats_msg))))
 #endif

+#ifndef BRVLAN_RTA
+#define BRVLAN_RTA(r) \
+	((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct br_vlan_msg))))
+#endif
+
 /* User defined nlmsg_type which is used mostly for logging netlink
 * messages from dump file */
 #define NLMSG_TSTAMP	15

+#define rtattr_for_each_nested(attr, nest) \
+	for ((attr) = (void *)RTA_DATA(nest); \
+	     RTA_OK(attr, RTA_PAYLOAD(nest) - ((char *)(attr) - (char *)RTA_DATA((nest)))); \
+	     (attr) = RTA_TAIL((attr)))
+
+void nl_print_policy(const struct rtattr *attr, FILE *fp);
+
 #endif /* __LIBNETLINK_H__ */
--- a/include/ll_map.h
+++ b/include/ll_map.h
@ -9,6 +9,7 @@ unsigned ll_name_to_index(const char *name);
 const char *ll_index_to_name(unsigned idx);
 int ll_index_to_type(unsigned idx);
 int ll_index_to_flags(unsigned idx);
+void ll_drop_by_index(unsigned index);
 unsigned namehash(const char *str);

 const char *ll_idx_n2a(unsigned int idx);
--- a/include/mnl_utils.h
+++ b/include/mnl_utils.h
@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __MNL_UTILS_H__
+#define __MNL_UTILS_H__ 1
+
+struct mnlu_gen_socket {
+	struct mnl_socket *nl;
+	char *buf;
+	uint32_t family;
+	unsigned int seq;
+	uint8_t version;
+};
+
+int mnlu_gen_socket_open(struct mnlu_gen_socket *nlg, const char *family_name,
+			 uint8_t version);
+void mnlu_gen_socket_close(struct mnlu_gen_socket *nlg);
+struct nlmsghdr *
+_mnlu_gen_socket_cmd_prepare(struct mnlu_gen_socket *nlg,
+			     uint8_t cmd, uint16_t flags,
+			     uint32_t id, uint8_t version);
+struct nlmsghdr *mnlu_gen_socket_cmd_prepare(struct mnlu_gen_socket *nlg,
+					     uint8_t cmd, uint16_t flags);
+int mnlu_gen_socket_sndrcv(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh,
+			   mnl_cb_t data_cb, void *data);
+
+struct mnl_socket *mnlu_socket_open(int bus);
+struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags,
+				  void *extra_header, size_t extra_header_size);
+int mnlu_socket_recv_run(struct mnl_socket *nl, unsigned int seq, void *buf, size_t buf_size,
+			 mnl_cb_t cb, void *data);
+int mnlu_gen_socket_recv_run(struct mnlu_gen_socket *nlg, mnl_cb_t cb,
+			     void *data);
+
+#endif /* __MNL_UTILS_H__ */
--- a/include/rt_names.h
+++ b/include/rt_names.h
@ -9,6 +9,7 @@ const char *rtnl_rtscope_n2a(int id, char *buf, int len);
 const char *rtnl_rttable_n2a(__u32 id, char *buf, int len);
 const char *rtnl_rtrealm_n2a(int id, char *buf, int len);
 const char *rtnl_dsfield_n2a(int id, char *buf, int len);
+const char *rtnl_dsfield_get_name(int id);
 const char *rtnl_group_n2a(int id, char *buf, int len);

 int rtnl_rtprot_a2n(__u32 *id, const char *arg);
@ -33,4 +34,9 @@ int ll_proto_a2n(unsigned short *id, const char *buf);
 const char *nl_proto_n2a(int id, char *buf, int len);
 int nl_proto_a2n(__u32 *id, const char *arg);

+int protodown_reason_a2n(__u32 *id, const char *arg);
+int protodown_reason_n2a(int id, char *buf, int len);
+
+extern int numeric;
+
 #endif
--- a/include/uapi/asm-generic/sockios.h
+++ b/include/uapi/asm-generic/sockios.h
@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_GENERIC_SOCKIOS_H
+#define __ASM_GENERIC_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP_OLD	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907		/* Get stamp (timespec) */
+
+#endif /* __ASM_GENERIC_SOCKIOS_H */
--- a/include/uapi/linux/amt.h
+++ b/include/uapi/linux/amt.h
@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2021 Taehee Yoo <ap420073@gmail.com>
+ */
+#ifndef _AMT_H_
+#define _AMT_H_
+
+enum ifla_amt_mode {
+	/* AMT interface works as Gateway mode.
+	 * The Gateway mode encapsulates IGMP/MLD traffic and decapsulates
+	 * multicast traffic.
+	 */
+	AMT_MODE_GATEWAY = 0,
+	/* AMT interface works as Relay mode.
+	 * The Relay mode encapsulates multicast traffic and decapsulates
+	 * IGMP/MLD traffic.
+	 */
+	AMT_MODE_RELAY,
+	__AMT_MODE_MAX,
+};
+
+#define AMT_MODE_MAX (__AMT_MODE_MAX - 1)
+
+enum {
+	IFLA_AMT_UNSPEC,
+	/* This attribute specify mode etier Gateway or Relay. */
+	IFLA_AMT_MODE,
+	/* This attribute specify Relay port.
+	 * AMT interface is created as Gateway mode, this attribute is used
+	 * to specify relay(remote) port.
+	 * AMT interface is created as Relay mode, this attribute is used
+	 * as local port.
+	 */
+	IFLA_AMT_RELAY_PORT,
+	/* This attribute specify Gateway port.
+	 * AMT interface is created as Gateway mode, this attribute is used
+	 * as local port.
+	 * AMT interface is created as Relay mode, this attribute is not used.
+	 */
+	IFLA_AMT_GATEWAY_PORT,
+	/* This attribute specify physical device */
+	IFLA_AMT_LINK,
+	/* This attribute specify local ip address */
+	IFLA_AMT_LOCAL_IP,
+	/* This attribute specify Relay ip address.
+	 * So, this is not used by Relay.
+	 */
+	IFLA_AMT_REMOTE_IP,
+	/* This attribute specify Discovery ip address.
+	 * When Gateway get started, it send discovery message to find the
+	 * Relay's ip address.
+	 * So, this is not used by Relay.
+	 */
+	IFLA_AMT_DISCOVERY_IP,
+	/* This attribute specify number of maximum tunnel. */
+	IFLA_AMT_MAX_TUNNELS,
+	__IFLA_AMT_MAX,
+};
+
+#define IFLA_AMT_MAX (__IFLA_AMT_MAX - 1)
+
+#endif /* _AMT_H_ */
--- a/include/uapi/linux/atmioc.h
+++ b/include/uapi/linux/atmioc.h
@ -5,7 +5,7 @@


 /*
- * See http://icawww1.epfl.ch/linux-atm/magic.html for the complete list of
+ * See https://icawww1.epfl.ch/linux-atm/magic.html for the complete list of
 * "magic" ioctl numbers.
 */

--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@ -22,9 +22,9 @@ struct btf_header {
 };

 /* Max # of type identifier */
-#define BTF_MAX_TYPE	0x0000ffff
+#define BTF_MAX_TYPE	0x000fffff
 /* Max offset into the string section */
-#define BTF_MAX_NAME_OFFSET	0x0000ffff
+#define BTF_MAX_NAME_OFFSET	0x00ffffff
 /* Max # of struct/union/enum members or func args */
 #define BTF_MAX_VLEN	0xffff

@ -34,13 +34,16 @@ struct btf_type {
 	 * bits  0-15: vlen (e.g. # of struct's members)
 	 * bits 16-23: unused
 	 * bits 24-27: kind (e.g. int, ptr, array...etc)
-	 * bits 28-31: unused
+	 * bits 28-30: unused
+	 * bit     31: kind_flag, currently used by
+	 *             struct, union and fwd
 	 */
 	__u32 info;
-	/* "size" is used by INT, ENUM, STRUCT and UNION.
+	/* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
 	 * "size" tells the size of the type it is describing.
 	 *
-	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+	 * FUNC, FUNC_PROTO, VAR and DECL_TAG.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@ -49,23 +52,33 @@ struct btf_type {
 	};
 };

-#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x1f)
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
+#define BTF_INFO_KFLAG(info)	((info) >> 31)

-#define BTF_KIND_UNKN		0	/* Unknown	*/
-#define BTF_KIND_INT		1	/* Integer	*/
-#define BTF_KIND_PTR		2	/* Pointer	*/
-#define BTF_KIND_ARRAY		3	/* Array	*/
-#define BTF_KIND_STRUCT		4	/* Struct	*/
-#define BTF_KIND_UNION		5	/* Union	*/
-#define BTF_KIND_ENUM		6	/* Enumeration	*/
-#define BTF_KIND_FWD		7	/* Forward	*/
-#define BTF_KIND_TYPEDEF	8	/* Typedef	*/
-#define BTF_KIND_VOLATILE	9	/* Volatile	*/
-#define BTF_KIND_CONST		10	/* Const	*/
-#define BTF_KIND_RESTRICT	11	/* Restrict	*/
-#define BTF_KIND_MAX		11
-#define NR_BTF_KINDS		12
+enum {
+	BTF_KIND_UNKN		= 0,	/* Unknown	*/
+	BTF_KIND_INT		= 1,	/* Integer	*/
+	BTF_KIND_PTR		= 2,	/* Pointer	*/
+	BTF_KIND_ARRAY		= 3,	/* Array	*/
+	BTF_KIND_STRUCT		= 4,	/* Struct	*/
+	BTF_KIND_UNION		= 5,	/* Union	*/
+	BTF_KIND_ENUM		= 6,	/* Enumeration	*/
+	BTF_KIND_FWD		= 7,	/* Forward	*/
+	BTF_KIND_TYPEDEF	= 8,	/* Typedef	*/
+	BTF_KIND_VOLATILE	= 9,	/* Volatile	*/
+	BTF_KIND_CONST		= 10,	/* Const	*/
+	BTF_KIND_RESTRICT	= 11,	/* Restrict	*/
+	BTF_KIND_FUNC		= 12,	/* Function	*/
+	BTF_KIND_FUNC_PROTO	= 13,	/* Function Proto	*/
+	BTF_KIND_VAR		= 14,	/* Variable	*/
+	BTF_KIND_DATASEC	= 15,	/* Section	*/
+	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
+	BTF_KIND_DECL_TAG	= 17,	/* Decl Tag */
+
+	NR_BTF_KINDS,
+	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
+};

 /* For some specific BTF_KIND, "struct btf_type" is immediately
 * followed by extra data.
@ -75,7 +88,7 @@ struct btf_type {
 * is the 32 bits arrangement:
 */
 #define BTF_INT_ENCODING(VAL)	(((VAL) & 0x0f000000) >> 24)
-#define BTF_INT_OFFSET(VAL)	(((VAL  & 0x00ff0000)) >> 16)
+#define BTF_INT_OFFSET(VAL)	(((VAL) & 0x00ff0000) >> 16)
 #define BTF_INT_BITS(VAL)	((VAL)  & 0x000000ff)

 /* Attributes stored in the BTF_INT_ENCODING */
@ -107,7 +120,69 @@ struct btf_array {
 struct btf_member {
 	__u32	name_off;
 	__u32	type;
-	__u32	offset;	/* offset in bits */
+	/* If the type info kind_flag is set, the btf_member offset
+	 * contains both member bitfield size and bit offset. The
+	 * bitfield size is set for bitfield members. If the type
+	 * info kind_flag is not set, the offset contains only bit
+	 * offset.
+	 */
+	__u32	offset;
+};
+
+/* If the struct/union type info kind_flag is set, the
+ * following two macros are used to access bitfield_size
+ * and bit_offset from btf_member.offset.
+ */
+#define BTF_MEMBER_BITFIELD_SIZE(val)	((val) >> 24)
+#define BTF_MEMBER_BIT_OFFSET(val)	((val) & 0xffffff)
+
+/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
+ * The exact number of btf_param is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_param {
+	__u32	name_off;
+	__u32	type;
+};
+
+enum {
+	BTF_VAR_STATIC = 0,
+	BTF_VAR_GLOBAL_ALLOCATED = 1,
+	BTF_VAR_GLOBAL_EXTERN = 2,
+};
+
+enum btf_func_linkage {
+	BTF_FUNC_STATIC = 0,
+	BTF_FUNC_GLOBAL = 1,
+	BTF_FUNC_EXTERN = 2,
+};
+
+/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+ * additional information related to the variable such as its linkage.
+ */
+struct btf_var {
+	__u32	linkage;
+};
+
+/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+ * to describe all BTF_KIND_VAR types it contains along with it's
+ * in-section offset as well as size.
+ */
+struct btf_var_secinfo {
+	__u32	type;
+	__u32	offset;
+	__u32	size;
+};
+
+/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe
+ * additional information related to the tag applied location.
+ * If component_idx == -1, the tag is applied to a struct, union,
+ * variable or function. Otherwise, it is applied to a struct/union
+ * member or a func argument, and component_idx indicates which member
+ * or argument (0 ... vlen-1).
+ */
+struct btf_decl_tag {
+       __s32   component_idx;
 };

 #endif /* __LINUX_BTF_H__ */
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */
 /*
 * linux/can.h
 *
@ -84,6 +84,7 @@ typedef __u32 can_err_mask_t;

 /* CAN payload length and DLC definitions according to ISO 11898-1 */
 #define CAN_MAX_DLC 8
+#define CAN_MAX_RAW_DLC 15
 #define CAN_MAX_DLEN 8

 /* CAN FD payload length and DLC definitions according to ISO 11898-7 */
@ -91,30 +92,39 @@ typedef __u32 can_err_mask_t;
 #define CANFD_MAX_DLEN 64

 /**
- * struct can_frame - basic CAN frame structure
- * @can_id:  CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition
- * @can_dlc: frame payload length in byte (0 .. 8) aka data length code
- *           N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1
- *           mapping of the 'data length code' to the real payload length
- * @__pad:   padding
- * @__res0:  reserved / padding
- * @__res1:  reserved / padding
- * @data:    CAN frame payload (up to 8 byte)
+ * struct can_frame - Classical CAN frame structure (aka CAN 2.0B)
+ * @can_id:   CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition
+ * @len:      CAN frame payload length in byte (0 .. 8)
+ * @can_dlc:  deprecated name for CAN frame payload length in byte (0 .. 8)
+ * @__pad:    padding
+ * @__res0:   reserved / padding
+ * @len8_dlc: optional DLC value (9 .. 15) at 8 byte payload length
+ *            len8_dlc contains values from 9 .. 15 when the payload length is
+ *            8 bytes but the DLC value (see ISO 11898-1) is greater then 8.
+ *            CAN_CTRLMODE_CC_LEN8_DLC flag has to be enabled in CAN driver.
+ * @data:     CAN frame payload (up to 8 byte)
 */
 struct can_frame {
 	canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
-	__u8    can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */
-	__u8    __pad;   /* padding */
-	__u8    __res0;  /* reserved / padding */
-	__u8    __res1;  /* reserved / padding */
-	__u8    data[CAN_MAX_DLEN] __attribute__((aligned(8)));
+	union {
+		/* CAN frame payload length in byte (0 .. CAN_MAX_DLEN)
+		 * was previously named can_dlc so we need to carry that
+		 * name for legacy support
+		 */
+		__u8 len;
+		__u8 can_dlc; /* deprecated */
+	} __attribute__((packed)); /* disable padding added in some ABIs */
+	__u8 __pad; /* padding */
+	__u8 __res0; /* reserved / padding */
+	__u8 len8_dlc; /* optional DLC for 8 byte payload length (9 .. 15) */
+	__u8 data[CAN_MAX_DLEN] __attribute__((aligned(8)));
 };

 /*
 * defined bits for canfd_frame.flags
 *
- * The use of struct canfd_frame implies the Extended Data Length (EDL) bit to
- * be set in the CAN frame bitstream on the wire. The EDL bit switch turns
+ * The use of struct canfd_frame implies the FD Frame (FDF) bit to
+ * be set in the CAN frame bitstream on the wire. The FDF bit switch turns
 * the CAN controllers bitstream processor into the CAN FD mode which creates
 * two new options within the CAN FD frame specification:
 *
@ -125,9 +135,18 @@ struct can_frame {
 * controller only the CANFD_BRS bit is relevant for real CAN controllers when
 * building a CAN FD frame for transmission. Setting the CANFD_ESI bit can make
 * sense for virtual CAN interfaces to test applications with echoed frames.
+ *
+ * The struct can_frame and struct canfd_frame intentionally share the same
+ * layout to be able to write CAN frame content into a CAN FD frame structure.
+ * When this is done the former differentiation via CAN_MTU / CANFD_MTU gets
+ * lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of
+ * using struct canfd_frame for mixed CAN / CAN FD content (dual use).
+ * N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of
+ * struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux.
 */
 #define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */
 #define CANFD_ESI 0x02 /* error state indicator of the transmitting node */
+#define CANFD_FDF 0x04 /* mark CAN FD for dual use of struct canfd_frame */

 /**
 * struct canfd_frame - CAN flexible data rate frame structure
@ -157,7 +176,8 @@ struct canfd_frame {
 #define CAN_TP20	4 /* VAG Transport Protocol v2.0 */
 #define CAN_MCNET	5 /* Bosch MCNet */
 #define CAN_ISOTP	6 /* ISO 15765-2 Transport Protocol */
-#define CAN_NPROTO	7
+#define CAN_J1939	7 /* SAE J1939 */
+#define CAN_NPROTO	8

 #define SOL_CAN_BASE 100

@ -174,6 +194,23 @@ struct sockaddr_can {
 		/* transport protocol class address information (e.g. ISOTP) */
 		struct { canid_t rx_id, tx_id; } tp;

+		/* J1939 address information */
+		struct {
+			/* 8 byte name when using dynamic addressing */
+			__u64 name;
+
+			/* pgn:
+			 * 8 bit: PS in PDU2 case, else 0
+			 * 8 bit: PF
+			 * 1 bit: DP
+			 * 1 bit: reserved
+			 */
+			__u32 pgn;
+
+			/* 1 byte address */
+			__u8 addr;
+		} j1939;
+
 		/* reserved for future CAN protocols address information */
 	} can_addr;
 };
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
 * linux/can/netlink.h
 *
@ -40,15 +40,15 @@ struct can_bittiming {
 };

 /*
- * CAN harware-dependent bit-timing constant
+ * CAN hardware-dependent bit-timing constant
 *
 * Used for calculating and checking bit-timing parameters
 */
 struct can_bittiming_const {
 	char name[16];		/* Name of the CAN controller hardware */
-	__u32 tseg1_min;	/* Time segement 1 = prop_seg + phase_seg1 */
+	__u32 tseg1_min;	/* Time segment 1 = prop_seg + phase_seg1 */
 	__u32 tseg1_max;
-	__u32 tseg2_min;	/* Time segement 2 = phase_seg2 */
+	__u32 tseg2_min;	/* Time segment 2 = phase_seg2 */
 	__u32 tseg2_max;
 	__u32 sjw_max;		/* Synchronisation jump width */
 	__u32 brp_min;		/* Bit-rate prescaler */
@ -100,6 +100,9 @@ struct can_ctrlmode {
 #define CAN_CTRLMODE_FD			0x20	/* CAN FD mode */
 #define CAN_CTRLMODE_PRESUME_ACK	0x40	/* Ignore missing CAN ACKs */
 #define CAN_CTRLMODE_FD_NON_ISO		0x80	/* CAN FD in non-ISO mode */
+#define CAN_CTRLMODE_CC_LEN8_DLC	0x100	/* Classic CAN DLC option */
+#define CAN_CTRLMODE_TDC_AUTO		0x200	/* CAN transiver automatically calculates TDCV */
+#define CAN_CTRLMODE_TDC_MANUAL		0x400	/* TDCV is manually set up by user */

 /*
 * CAN device statistics
@ -133,10 +136,35 @@ enum {
 	IFLA_CAN_BITRATE_CONST,
 	IFLA_CAN_DATA_BITRATE_CONST,
 	IFLA_CAN_BITRATE_MAX,
-	__IFLA_CAN_MAX
+	IFLA_CAN_TDC,
+
+	/* add new constants above here */
+	__IFLA_CAN_MAX,
+	IFLA_CAN_MAX = __IFLA_CAN_MAX - 1
 };

-#define IFLA_CAN_MAX	(__IFLA_CAN_MAX - 1)
+/*
+ * CAN FD Transmitter Delay Compensation (TDC)
+ *
+ * Please refer to struct can_tdc_const and can_tdc in
+ * include/linux/can/bittiming.h for further details.
+ */
+enum {
+	IFLA_CAN_TDC_UNSPEC,
+	IFLA_CAN_TDC_TDCV_MIN,	/* u32 */
+	IFLA_CAN_TDC_TDCV_MAX,	/* u32 */
+	IFLA_CAN_TDC_TDCO_MIN,	/* u32 */
+	IFLA_CAN_TDC_TDCO_MAX,	/* u32 */
+	IFLA_CAN_TDC_TDCF_MIN,	/* u32 */
+	IFLA_CAN_TDC_TDCF_MAX,	/* u32 */
+	IFLA_CAN_TDC_TDCV,	/* u32 */
+	IFLA_CAN_TDC_TDCO,	/* u32 */
+	IFLA_CAN_TDC_TDCF,	/* u32 */
+
+	/* add new constants above here */
+	__IFLA_CAN_TDC,
+	IFLA_CAN_TDC_MAX = __IFLA_CAN_TDC - 1
+};

 /* u16 termination range: 1..65535 Ohms */
 #define CAN_TERMINATION_DISABLED 0
--- a/include/uapi/linux/can/vxcan.h
+++ b/include/uapi/linux/can/vxcan.h
@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 #ifndef _CAN_VXCAN_H
 #define _CAN_VXCAN_H

--- a/include/uapi/linux/const.h
+++ b/include/uapi/linux/const.h
@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* const.h: Macros for dealing with constants.  */
+
+#ifndef _LINUX_CONST_H
+#define _LINUX_CONST_H
+
+/* Some constant macros are used in both assembler and
+ * C code.  Therefore we cannot annotate them always with
+ * 'UL' and other type specifiers unilaterally.  We
+ * use the following macros to deal with this.
+ *
+ * Similarly, _AT() will cast an expression with a type in C, but
+ * leave it unchanged in asm.
+ */
+
+#ifdef __ASSEMBLY__
+#define _AC(X,Y)	X
+#define _AT(T,X)	X
+#else
+#define __AC(X,Y)	(X##Y)
+#define _AC(X,Y)	__AC(X,Y)
+#define _AT(T,X)	((T)(X))
+#endif
+
+#define _UL(x)		(_AC(x, UL))
+#define _ULL(x)		(_AC(x, ULL))
+
+#define _BITUL(x)	(_UL(1) << (x))
+#define _BITULL(x)	(_ULL(1) << (x))
+
+#define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask)	(((x) + (mask)) & ~(mask))
+
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
+#endif /* _LINUX_CONST_H */
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@ -0,0 +1,769 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2008-2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@intel.com>
+ */
+
+#ifndef __LINUX_DCBNL_H__
+#define __LINUX_DCBNL_H__
+
+#include <linux/types.h>
+
+/* IEEE 802.1Qaz std supported values */
+#define IEEE_8021QAZ_MAX_TCS	8
+
+#define IEEE_8021QAZ_TSA_STRICT		0
+#define IEEE_8021QAZ_TSA_CB_SHAPER	1
+#define IEEE_8021QAZ_TSA_ETS		2
+#define IEEE_8021QAZ_TSA_VENDOR		255
+
+/* This structure contains the IEEE 802.1Qaz ETS managed object
+ *
+ * @willing: willing bit in ETS configuration TLV
+ * @ets_cap: indicates supported capacity of ets feature
+ * @cbs: credit based shaper ets algorithm supported
+ * @tc_tx_bw: tc tx bandwidth indexed by traffic class
+ * @tc_rx_bw: tc rx bandwidth indexed by traffic class
+ * @tc_tsa: TSA Assignment table, indexed by traffic class
+ * @prio_tc: priority assignment table mapping 8021Qp to traffic class
+ * @tc_reco_bw: recommended tc bandwidth indexed by traffic class for TLV
+ * @tc_reco_tsa: recommended tc bandwidth indexed by traffic class for TLV
+ * @reco_prio_tc: recommended tc tx bandwidth indexed by traffic class for TLV
+ *
+ * Recommended values are used to set fields in the ETS recommendation TLV
+ * with hardware offloaded LLDP.
+ *
+ * ----
+ *  TSA Assignment 8 bit identifiers
+ *	0	strict priority
+ *	1	credit-based shaper
+ *	2	enhanced transmission selection
+ *	3-254	reserved
+ *	255	vendor specific
+ */
+struct ieee_ets {
+	__u8	willing;
+	__u8	ets_cap;
+	__u8	cbs;
+	__u8	tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_rx_bw[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_tsa[IEEE_8021QAZ_MAX_TCS];
+	__u8	prio_tc[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_reco_bw[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_reco_tsa[IEEE_8021QAZ_MAX_TCS];
+	__u8	reco_prio_tc[IEEE_8021QAZ_MAX_TCS];
+};
+
+/* This structure contains rate limit extension to the IEEE 802.1Qaz ETS
+ * managed object.
+ * Values are 64 bits long and specified in Kbps to enable usage over both
+ * slow and very fast networks.
+ *
+ * @tc_maxrate: maximal tc tx bandwidth indexed by traffic class
+ */
+struct ieee_maxrate {
+	__u64	tc_maxrate[IEEE_8021QAZ_MAX_TCS];
+};
+
+enum dcbnl_cndd_states {
+	DCB_CNDD_RESET = 0,
+	DCB_CNDD_EDGE,
+	DCB_CNDD_INTERIOR,
+	DCB_CNDD_INTERIOR_READY,
+};
+
+/* This structure contains the IEEE 802.1Qau QCN managed object.
+ *
+ *@rpg_enable: enable QCN RP
+ *@rppp_max_rps: maximum number of RPs allowed for this CNPV on this port
+ *@rpg_time_reset: time between rate increases if no CNMs received.
+ *		   given in u-seconds
+ *@rpg_byte_reset: transmitted data between rate increases if no CNMs received.
+ *		   given in Bytes
+ *@rpg_threshold: The number of times rpByteStage or rpTimeStage can count
+ *		   before RP rate control state machine advances states
+ *@rpg_max_rate: the maxinun rate, in Mbits per second,
+ *		 at which an RP can transmit
+ *@rpg_ai_rate: The rate, in Mbits per second,
+ *		used to increase rpTargetRate in the RPR_ACTIVE_INCREASE
+ *@rpg_hai_rate: The rate, in Mbits per second,
+ *		 used to increase rpTargetRate in the RPR_HYPER_INCREASE state
+ *@rpg_gd: Upon CNM receive, flow rate is limited to (Fb/Gd)*CurrentRate.
+ *	   rpgGd is given as log2(Gd), where Gd may only be powers of 2
+ *@rpg_min_dec_fac: The minimum factor by which the current transmit rate
+ *		    can be changed by reception of a CNM.
+ *		    value is given as percentage (1-100)
+ *@rpg_min_rate: The minimum value, in bits per second, for rate to limit
+ *@cndd_state_machine: The state of the congestion notification domain
+ *		       defense state machine, as defined by IEEE 802.3Qau
+ *		       section 32.1.1. In the interior ready state,
+ *		       the QCN capable hardware may add CN-TAG TLV to the
+ *		       outgoing traffic, to specifically identify outgoing
+ *		       flows.
+ */
+
+struct ieee_qcn {
+	__u8 rpg_enable[IEEE_8021QAZ_MAX_TCS];
+	__u32 rppp_max_rps[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_time_reset[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_byte_reset[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_threshold[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_max_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_ai_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_hai_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_gd[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_min_dec_fac[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_min_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 cndd_state_machine[IEEE_8021QAZ_MAX_TCS];
+};
+
+/* This structure contains the IEEE 802.1Qau QCN statistics.
+ *
+ *@rppp_rp_centiseconds: the number of RP-centiseconds accumulated
+ *			 by RPs at this priority level on this Port
+ *@rppp_created_rps: number of active RPs(flows) that react to CNMs
+ */
+
+struct ieee_qcn_stats {
+	__u64 rppp_rp_centiseconds[IEEE_8021QAZ_MAX_TCS];
+	__u32 rppp_created_rps[IEEE_8021QAZ_MAX_TCS];
+};
+
+/* This structure contains the IEEE 802.1Qaz PFC managed object
+ *
+ * @pfc_cap: Indicates the number of traffic classes on the local device
+ *	     that may simultaneously have PFC enabled.
+ * @pfc_en: bitmap indicating pfc enabled traffic classes
+ * @mbc: enable macsec bypass capability
+ * @delay: the allowance made for a round-trip propagation delay of the
+ *	   link in bits.
+ * @requests: count of the sent pfc frames
+ * @indications: count of the received pfc frames
+ */
+struct ieee_pfc {
+	__u8	pfc_cap;
+	__u8	pfc_en;
+	__u8	mbc;
+	__u16	delay;
+	__u64	requests[IEEE_8021QAZ_MAX_TCS];
+	__u64	indications[IEEE_8021QAZ_MAX_TCS];
+};
+
+#define IEEE_8021Q_MAX_PRIORITIES 8
+#define DCBX_MAX_BUFFERS  8
+struct dcbnl_buffer {
+	/* priority to buffer mapping */
+	__u8    prio2buffer[IEEE_8021Q_MAX_PRIORITIES];
+	/* buffer size in Bytes */
+	__u32   buffer_size[DCBX_MAX_BUFFERS];
+	__u32   total_size;
+};
+
+/* CEE DCBX std supported values */
+#define CEE_DCBX_MAX_PGS	8
+#define CEE_DCBX_MAX_PRIO	8
+
+/**
+ * struct cee_pg - CEE Priority-Group managed object
+ *
+ * @willing: willing bit in the PG tlv
+ * @error: error bit in the PG tlv
+ * @pg_en: enable bit of the PG feature
+ * @tcs_supported: number of traffic classes supported
+ * @pg_bw: bandwidth percentage for each priority group
+ * @prio_pg: priority to PG mapping indexed by priority
+ */
+struct cee_pg {
+	__u8    willing;
+	__u8    error;
+	__u8    pg_en;
+	__u8    tcs_supported;
+	__u8    pg_bw[CEE_DCBX_MAX_PGS];
+	__u8    prio_pg[CEE_DCBX_MAX_PGS];
+};
+
+/**
+ * struct cee_pfc - CEE PFC managed object
+ *
+ * @willing: willing bit in the PFC tlv
+ * @error: error bit in the PFC tlv
+ * @pfc_en: bitmap indicating pfc enabled traffic classes
+ * @tcs_supported: number of traffic classes supported
+ */
+struct cee_pfc {
+	__u8    willing;
+	__u8    error;
+	__u8    pfc_en;
+	__u8    tcs_supported;
+};
+
+/* IEEE 802.1Qaz std supported values */
+#define IEEE_8021QAZ_APP_SEL_ETHERTYPE	1
+#define IEEE_8021QAZ_APP_SEL_STREAM	2
+#define IEEE_8021QAZ_APP_SEL_DGRAM	3
+#define IEEE_8021QAZ_APP_SEL_ANY	4
+#define IEEE_8021QAZ_APP_SEL_DSCP       5
+
+/* This structure contains the IEEE 802.1Qaz APP managed object. This
+ * object is also used for the CEE std as well.
+ *
+ * @selector: protocol identifier type
+ * @protocol: protocol of type indicated
+ * @priority: 3-bit unsigned integer indicating priority for IEEE
+ *            8-bit 802.1p user priority bitmap for CEE
+ *
+ * ----
+ *  Selector field values for IEEE 802.1Qaz
+ *	0	Reserved
+ *	1	Ethertype
+ *	2	Well known port number over TCP or SCTP
+ *	3	Well known port number over UDP or DCCP
+ *	4	Well known port number over TCP, SCTP, UDP, or DCCP
+ *	5	Differentiated Services Code Point (DSCP) value
+ *	6-7	Reserved
+ *
+ *  Selector field values for CEE
+ *	0	Ethertype
+ *	1	Well known port number over TCP or UDP
+ *	2-3	Reserved
+ */
+struct dcb_app {
+	__u8	selector;
+	__u8	priority;
+	__u16	protocol;
+};
+
+/**
+ * struct dcb_peer_app_info - APP feature information sent by the peer
+ *
+ * @willing: willing bit in the peer APP tlv
+ * @error: error bit in the peer APP tlv
+ *
+ * In addition to this information the full peer APP tlv also contains
+ * a table of 'app_count' APP objects defined above.
+ */
+struct dcb_peer_app_info {
+	__u8	willing;
+	__u8	error;
+};
+
+struct dcbmsg {
+	__u8               dcb_family;
+	__u8               cmd;
+	__u16              dcb_pad;
+};
+
+/**
+ * enum dcbnl_commands - supported DCB commands
+ *
+ * @DCB_CMD_UNDEFINED: unspecified command to catch errors
+ * @DCB_CMD_GSTATE: request the state of DCB in the device
+ * @DCB_CMD_SSTATE: set the state of DCB in the device
+ * @DCB_CMD_PGTX_GCFG: request the priority group configuration for Tx
+ * @DCB_CMD_PGTX_SCFG: set the priority group configuration for Tx
+ * @DCB_CMD_PGRX_GCFG: request the priority group configuration for Rx
+ * @DCB_CMD_PGRX_SCFG: set the priority group configuration for Rx
+ * @DCB_CMD_PFC_GCFG: request the priority flow control configuration
+ * @DCB_CMD_PFC_SCFG: set the priority flow control configuration
+ * @DCB_CMD_SET_ALL: apply all changes to the underlying device
+ * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying
+ *                        device.  Only useful when using bonding.
+ * @DCB_CMD_GCAP: request the DCB capabilities of the device
+ * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported
+ * @DCB_CMD_SNUMTCS: set the number of traffic classes
+ * @DCB_CMD_GBCN: set backward congestion notification configuration
+ * @DCB_CMD_SBCN: get backward congestion notification configuration.
+ * @DCB_CMD_GAPP: get application protocol configuration
+ * @DCB_CMD_SAPP: set application protocol configuration
+ * @DCB_CMD_IEEE_SET: set IEEE 802.1Qaz configuration
+ * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration
+ * @DCB_CMD_GDCBX: get DCBX engine configuration
+ * @DCB_CMD_SDCBX: set DCBX engine configuration
+ * @DCB_CMD_GFEATCFG: get DCBX features flags
+ * @DCB_CMD_SFEATCFG: set DCBX features negotiation flags
+ * @DCB_CMD_CEE_GET: get CEE aggregated configuration
+ * @DCB_CMD_IEEE_DEL: delete IEEE 802.1Qaz configuration
+ */
+enum dcbnl_commands {
+	DCB_CMD_UNDEFINED,
+
+	DCB_CMD_GSTATE,
+	DCB_CMD_SSTATE,
+
+	DCB_CMD_PGTX_GCFG,
+	DCB_CMD_PGTX_SCFG,
+	DCB_CMD_PGRX_GCFG,
+	DCB_CMD_PGRX_SCFG,
+
+	DCB_CMD_PFC_GCFG,
+	DCB_CMD_PFC_SCFG,
+
+	DCB_CMD_SET_ALL,
+
+	DCB_CMD_GPERM_HWADDR,
+
+	DCB_CMD_GCAP,
+
+	DCB_CMD_GNUMTCS,
+	DCB_CMD_SNUMTCS,
+
+	DCB_CMD_PFC_GSTATE,
+	DCB_CMD_PFC_SSTATE,
+
+	DCB_CMD_BCN_GCFG,
+	DCB_CMD_BCN_SCFG,
+
+	DCB_CMD_GAPP,
+	DCB_CMD_SAPP,
+
+	DCB_CMD_IEEE_SET,
+	DCB_CMD_IEEE_GET,
+
+	DCB_CMD_GDCBX,
+	DCB_CMD_SDCBX,
+
+	DCB_CMD_GFEATCFG,
+	DCB_CMD_SFEATCFG,
+
+	DCB_CMD_CEE_GET,
+	DCB_CMD_IEEE_DEL,
+
+	__DCB_CMD_ENUM_MAX,
+	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_attrs - DCB top-level netlink attributes
+ *
+ * @DCB_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_ATTR_IFNAME: interface name of the underlying device (NLA_STRING)
+ * @DCB_ATTR_STATE: enable state of DCB in the device (NLA_U8)
+ * @DCB_ATTR_PFC_STATE: enable state of PFC in the device (NLA_U8)
+ * @DCB_ATTR_PFC_CFG: priority flow control configuration (NLA_NESTED)
+ * @DCB_ATTR_NUM_TC: number of traffic classes supported in the device (NLA_U8)
+ * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED)
+ * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8)
+ * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED)
+ * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED)
+ * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED)
+ * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED)
+ * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED)
+ * @DCB_ATTR_DCBX: DCBX engine configuration in the device (NLA_U8)
+ * @DCB_ATTR_FEATCFG: DCBX features flags (NLA_NESTED)
+ * @DCB_ATTR_CEE: CEE std supported attributes (NLA_NESTED)
+ */
+enum dcbnl_attrs {
+	DCB_ATTR_UNDEFINED,
+
+	DCB_ATTR_IFNAME,
+	DCB_ATTR_STATE,
+	DCB_ATTR_PFC_STATE,
+	DCB_ATTR_PFC_CFG,
+	DCB_ATTR_NUM_TC,
+	DCB_ATTR_PG_CFG,
+	DCB_ATTR_SET_ALL,
+	DCB_ATTR_PERM_HWADDR,
+	DCB_ATTR_CAP,
+	DCB_ATTR_NUMTCS,
+	DCB_ATTR_BCN,
+	DCB_ATTR_APP,
+
+	/* IEEE std attributes */
+	DCB_ATTR_IEEE,
+
+	DCB_ATTR_DCBX,
+	DCB_ATTR_FEATCFG,
+
+	/* CEE nested attributes */
+	DCB_ATTR_CEE,
+
+	__DCB_ATTR_ENUM_MAX,
+	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum ieee_attrs - IEEE 802.1Qaz get/set attributes
+ *
+ * @DCB_ATTR_IEEE_UNSPEC: unspecified
+ * @DCB_ATTR_IEEE_ETS: negotiated ETS configuration
+ * @DCB_ATTR_IEEE_PFC: negotiated PFC configuration
+ * @DCB_ATTR_IEEE_APP_TABLE: negotiated APP configuration
+ * @DCB_ATTR_IEEE_PEER_ETS: peer ETS configuration - get only
+ * @DCB_ATTR_IEEE_PEER_PFC: peer PFC configuration - get only
+ * @DCB_ATTR_IEEE_PEER_APP: peer APP tlv - get only
+ */
+enum ieee_attrs {
+	DCB_ATTR_IEEE_UNSPEC,
+	DCB_ATTR_IEEE_ETS,
+	DCB_ATTR_IEEE_PFC,
+	DCB_ATTR_IEEE_APP_TABLE,
+	DCB_ATTR_IEEE_PEER_ETS,
+	DCB_ATTR_IEEE_PEER_PFC,
+	DCB_ATTR_IEEE_PEER_APP,
+	DCB_ATTR_IEEE_MAXRATE,
+	DCB_ATTR_IEEE_QCN,
+	DCB_ATTR_IEEE_QCN_STATS,
+	DCB_ATTR_DCB_BUFFER,
+	__DCB_ATTR_IEEE_MAX
+};
+#define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1)
+
+enum ieee_attrs_app {
+	DCB_ATTR_IEEE_APP_UNSPEC,
+	DCB_ATTR_IEEE_APP,
+	__DCB_ATTR_IEEE_APP_MAX
+};
+#define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1)
+
+/**
+ * enum cee_attrs - CEE DCBX get attributes.
+ *
+ * @DCB_ATTR_CEE_UNSPEC: unspecified
+ * @DCB_ATTR_CEE_PEER_PG: peer PG configuration - get only
+ * @DCB_ATTR_CEE_PEER_PFC: peer PFC configuration - get only
+ * @DCB_ATTR_CEE_PEER_APP_TABLE: peer APP tlv - get only
+ * @DCB_ATTR_CEE_TX_PG: TX PG configuration (DCB_CMD_PGTX_GCFG)
+ * @DCB_ATTR_CEE_RX_PG: RX PG configuration (DCB_CMD_PGRX_GCFG)
+ * @DCB_ATTR_CEE_PFC: PFC configuration (DCB_CMD_PFC_GCFG)
+ * @DCB_ATTR_CEE_APP_TABLE: APP configuration (multi DCB_CMD_GAPP)
+ * @DCB_ATTR_CEE_FEAT: DCBX features flags (DCB_CMD_GFEATCFG)
+ *
+ * An aggregated collection of the cee std negotiated parameters.
+ */
+enum cee_attrs {
+	DCB_ATTR_CEE_UNSPEC,
+	DCB_ATTR_CEE_PEER_PG,
+	DCB_ATTR_CEE_PEER_PFC,
+	DCB_ATTR_CEE_PEER_APP_TABLE,
+	DCB_ATTR_CEE_TX_PG,
+	DCB_ATTR_CEE_RX_PG,
+	DCB_ATTR_CEE_PFC,
+	DCB_ATTR_CEE_APP_TABLE,
+	DCB_ATTR_CEE_FEAT,
+	__DCB_ATTR_CEE_MAX
+};
+#define DCB_ATTR_CEE_MAX (__DCB_ATTR_CEE_MAX - 1)
+
+enum peer_app_attr {
+	DCB_ATTR_CEE_PEER_APP_UNSPEC,
+	DCB_ATTR_CEE_PEER_APP_INFO,
+	DCB_ATTR_CEE_PEER_APP,
+	__DCB_ATTR_CEE_PEER_APP_MAX
+};
+#define DCB_ATTR_CEE_PEER_APP_MAX (__DCB_ATTR_CEE_PEER_APP_MAX - 1)
+
+enum cee_attrs_app {
+	DCB_ATTR_CEE_APP_UNSPEC,
+	DCB_ATTR_CEE_APP,
+	__DCB_ATTR_CEE_APP_MAX
+};
+#define DCB_ATTR_CEE_APP_MAX (__DCB_ATTR_CEE_APP_MAX - 1)
+
+/**
+ * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs
+ *
+ * @DCB_PFC_UP_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_PFC_UP_ATTR_0: Priority Flow Control value for User Priority 0 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_1: Priority Flow Control value for User Priority 1 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_2: Priority Flow Control value for User Priority 2 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_3: Priority Flow Control value for User Priority 3 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_4: Priority Flow Control value for User Priority 4 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_5: Priority Flow Control value for User Priority 5 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_6: Priority Flow Control value for User Priority 6 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_7: Priority Flow Control value for User Priority 7 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_MAX: highest attribute number currently defined
+ * @DCB_PFC_UP_ATTR_ALL: apply to all priority flow control attrs (NLA_FLAG)
+ *
+ */
+enum dcbnl_pfc_up_attrs {
+	DCB_PFC_UP_ATTR_UNDEFINED,
+
+	DCB_PFC_UP_ATTR_0,
+	DCB_PFC_UP_ATTR_1,
+	DCB_PFC_UP_ATTR_2,
+	DCB_PFC_UP_ATTR_3,
+	DCB_PFC_UP_ATTR_4,
+	DCB_PFC_UP_ATTR_5,
+	DCB_PFC_UP_ATTR_6,
+	DCB_PFC_UP_ATTR_7,
+	DCB_PFC_UP_ATTR_ALL,
+
+	__DCB_PFC_UP_ATTR_ENUM_MAX,
+	DCB_PFC_UP_ATTR_MAX = __DCB_PFC_UP_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_pg_attrs - DCB Priority Group attributes
+ *
+ * @DCB_PG_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_PG_ATTR_TC_0: Priority Group Traffic Class 0 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_1: Priority Group Traffic Class 1 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_2: Priority Group Traffic Class 2 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_3: Priority Group Traffic Class 3 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_4: Priority Group Traffic Class 4 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_5: Priority Group Traffic Class 5 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_6: Priority Group Traffic Class 6 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_7: Priority Group Traffic Class 7 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_MAX: highest attribute number currently defined
+ * @DCB_PG_ATTR_TC_ALL: apply to all traffic classes (NLA_NESTED)
+ * @DCB_PG_ATTR_BW_ID_0: Percent of link bandwidth for Priority Group 0 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_1: Percent of link bandwidth for Priority Group 1 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_2: Percent of link bandwidth for Priority Group 2 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_3: Percent of link bandwidth for Priority Group 3 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_4: Percent of link bandwidth for Priority Group 4 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_5: Percent of link bandwidth for Priority Group 5 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_6: Percent of link bandwidth for Priority Group 6 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_7: Percent of link bandwidth for Priority Group 7 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_MAX: highest attribute number currently defined
+ * @DCB_PG_ATTR_BW_ID_ALL: apply to all priority groups (NLA_FLAG)
+ *
+ */
+enum dcbnl_pg_attrs {
+	DCB_PG_ATTR_UNDEFINED,
+
+	DCB_PG_ATTR_TC_0,
+	DCB_PG_ATTR_TC_1,
+	DCB_PG_ATTR_TC_2,
+	DCB_PG_ATTR_TC_3,
+	DCB_PG_ATTR_TC_4,
+	DCB_PG_ATTR_TC_5,
+	DCB_PG_ATTR_TC_6,
+	DCB_PG_ATTR_TC_7,
+	DCB_PG_ATTR_TC_MAX,
+	DCB_PG_ATTR_TC_ALL,
+
+	DCB_PG_ATTR_BW_ID_0,
+	DCB_PG_ATTR_BW_ID_1,
+	DCB_PG_ATTR_BW_ID_2,
+	DCB_PG_ATTR_BW_ID_3,
+	DCB_PG_ATTR_BW_ID_4,
+	DCB_PG_ATTR_BW_ID_5,
+	DCB_PG_ATTR_BW_ID_6,
+	DCB_PG_ATTR_BW_ID_7,
+	DCB_PG_ATTR_BW_ID_MAX,
+	DCB_PG_ATTR_BW_ID_ALL,
+
+	__DCB_PG_ATTR_ENUM_MAX,
+	DCB_PG_ATTR_MAX = __DCB_PG_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_tc_attrs - DCB Traffic Class attributes
+ *
+ * @DCB_TC_ATTR_PARAM_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_TC_ATTR_PARAM_PGID: (NLA_U8) Priority group the traffic class belongs to
+ *                          Valid values are:  0-7
+ * @DCB_TC_ATTR_PARAM_UP_MAPPING: (NLA_U8) Traffic class to user priority map
+ *                                Some devices may not support changing the
+ *                                user priority map of a TC.
+ * @DCB_TC_ATTR_PARAM_STRICT_PRIO: (NLA_U8) Strict priority setting
+ *                                 0 - none
+ *                                 1 - group strict
+ *                                 2 - link strict
+ * @DCB_TC_ATTR_PARAM_BW_PCT: optional - (NLA_U8) If supported by the device and
+ *                            not configured to use link strict priority,
+ *                            this is the percentage of bandwidth of the
+ *                            priority group this traffic class belongs to
+ * @DCB_TC_ATTR_PARAM_ALL: (NLA_FLAG) all traffic class parameters
+ *
+ */
+enum dcbnl_tc_attrs {
+	DCB_TC_ATTR_PARAM_UNDEFINED,
+
+	DCB_TC_ATTR_PARAM_PGID,
+	DCB_TC_ATTR_PARAM_UP_MAPPING,
+	DCB_TC_ATTR_PARAM_STRICT_PRIO,
+	DCB_TC_ATTR_PARAM_BW_PCT,
+	DCB_TC_ATTR_PARAM_ALL,
+
+	__DCB_TC_ATTR_PARAM_ENUM_MAX,
+	DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_cap_attrs - DCB Capability attributes
+ *
+ * @DCB_CAP_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_CAP_ATTR_ALL: (NLA_FLAG) all capability parameters
+ * @DCB_CAP_ATTR_PG: (NLA_U8) device supports Priority Groups
+ * @DCB_CAP_ATTR_PFC: (NLA_U8) device supports Priority Flow Control
+ * @DCB_CAP_ATTR_UP2TC: (NLA_U8) device supports user priority to
+ *                               traffic class mapping
+ * @DCB_CAP_ATTR_PG_TCS: (NLA_U8) bitmap where each bit represents a
+ *                                number of traffic classes the device
+ *                                can be configured to use for Priority Groups
+ * @DCB_CAP_ATTR_PFC_TCS: (NLA_U8) bitmap where each bit represents a
+ *                                 number of traffic classes the device can be
+ *                                 configured to use for Priority Flow Control
+ * @DCB_CAP_ATTR_GSP: (NLA_U8) device supports group strict priority
+ * @DCB_CAP_ATTR_BCN: (NLA_U8) device supports Backwards Congestion
+ *                             Notification
+ * @DCB_CAP_ATTR_DCBX: (NLA_U8) device supports DCBX engine
+ *
+ */
+enum dcbnl_cap_attrs {
+	DCB_CAP_ATTR_UNDEFINED,
+	DCB_CAP_ATTR_ALL,
+	DCB_CAP_ATTR_PG,
+	DCB_CAP_ATTR_PFC,
+	DCB_CAP_ATTR_UP2TC,
+	DCB_CAP_ATTR_PG_TCS,
+	DCB_CAP_ATTR_PFC_TCS,
+	DCB_CAP_ATTR_GSP,
+	DCB_CAP_ATTR_BCN,
+	DCB_CAP_ATTR_DCBX,
+
+	__DCB_CAP_ATTR_ENUM_MAX,
+	DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * DCBX capability flags
+ *
+ * @DCB_CAP_DCBX_HOST: DCBX negotiation is performed by the host LLDP agent.
+ *                     'set' routines are used to configure the device with
+ *                     the negotiated parameters
+ *
+ * @DCB_CAP_DCBX_LLD_MANAGED: DCBX negotiation is not performed in the host but
+ *                            by another entity
+ *                            'get' routines are used to retrieve the
+ *                            negotiated parameters
+ *                            'set' routines can be used to set the initial
+ *                            negotiation configuration
+ *
+ * @DCB_CAP_DCBX_VER_CEE: for a non-host DCBX engine, indicates the engine
+ *                        supports the CEE protocol flavor
+ *
+ * @DCB_CAP_DCBX_VER_IEEE: for a non-host DCBX engine, indicates the engine
+ *                         supports the IEEE protocol flavor
+ *
+ * @DCB_CAP_DCBX_STATIC: for a non-host DCBX engine, indicates the engine
+ *                       supports static configuration (i.e no actual
+ *                       negotiation is performed negotiated parameters equal
+ *                       the initial configuration)
+ *
+ */
+#define DCB_CAP_DCBX_HOST		0x01
+#define DCB_CAP_DCBX_LLD_MANAGED	0x02
+#define DCB_CAP_DCBX_VER_CEE		0x04
+#define DCB_CAP_DCBX_VER_IEEE		0x08
+#define DCB_CAP_DCBX_STATIC		0x10
+
+/**
+ * enum dcbnl_numtcs_attrs - number of traffic classes
+ *
+ * @DCB_NUMTCS_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_NUMTCS_ATTR_ALL: (NLA_FLAG) all traffic class attributes
+ * @DCB_NUMTCS_ATTR_PG: (NLA_U8) number of traffic classes used for
+ *                               priority groups
+ * @DCB_NUMTCS_ATTR_PFC: (NLA_U8) number of traffic classes which can
+ *                                support priority flow control
+ */
+enum dcbnl_numtcs_attrs {
+	DCB_NUMTCS_ATTR_UNDEFINED,
+	DCB_NUMTCS_ATTR_ALL,
+	DCB_NUMTCS_ATTR_PG,
+	DCB_NUMTCS_ATTR_PFC,
+
+	__DCB_NUMTCS_ATTR_ENUM_MAX,
+	DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1,
+};
+
+enum dcbnl_bcn_attrs{
+	DCB_BCN_ATTR_UNDEFINED = 0,
+
+	DCB_BCN_ATTR_RP_0,
+	DCB_BCN_ATTR_RP_1,
+	DCB_BCN_ATTR_RP_2,
+	DCB_BCN_ATTR_RP_3,
+	DCB_BCN_ATTR_RP_4,
+	DCB_BCN_ATTR_RP_5,
+	DCB_BCN_ATTR_RP_6,
+	DCB_BCN_ATTR_RP_7,
+	DCB_BCN_ATTR_RP_ALL,
+
+	DCB_BCN_ATTR_BCNA_0,
+	DCB_BCN_ATTR_BCNA_1,
+	DCB_BCN_ATTR_ALPHA,
+	DCB_BCN_ATTR_BETA,
+	DCB_BCN_ATTR_GD,
+	DCB_BCN_ATTR_GI,
+	DCB_BCN_ATTR_TMAX,
+	DCB_BCN_ATTR_TD,
+	DCB_BCN_ATTR_RMIN,
+	DCB_BCN_ATTR_W,
+	DCB_BCN_ATTR_RD,
+	DCB_BCN_ATTR_RU,
+	DCB_BCN_ATTR_WRTT,
+	DCB_BCN_ATTR_RI,
+	DCB_BCN_ATTR_C,
+	DCB_BCN_ATTR_ALL,
+
+	__DCB_BCN_ATTR_ENUM_MAX,
+	DCB_BCN_ATTR_MAX = __DCB_BCN_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcb_general_attr_values - general DCB attribute values
+ *
+ * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported
+ *
+ */
+enum dcb_general_attr_values {
+	DCB_ATTR_VALUE_UNDEFINED = 0xff
+};
+
+#define DCB_APP_IDTYPE_ETHTYPE	0x00
+#define DCB_APP_IDTYPE_PORTNUM	0x01
+enum dcbnl_app_attrs {
+	DCB_APP_ATTR_UNDEFINED,
+
+	DCB_APP_ATTR_IDTYPE,
+	DCB_APP_ATTR_ID,
+	DCB_APP_ATTR_PRIORITY,
+
+	__DCB_APP_ATTR_ENUM_MAX,
+	DCB_APP_ATTR_MAX = __DCB_APP_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_featcfg_attrs - features conifiguration flags
+ *
+ * @DCB_FEATCFG_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_FEATCFG_ATTR_ALL: (NLA_FLAG) all features configuration attributes
+ * @DCB_FEATCFG_ATTR_PG: (NLA_U8) configuration flags for priority groups
+ * @DCB_FEATCFG_ATTR_PFC: (NLA_U8) configuration flags for priority
+ *                                 flow control
+ * @DCB_FEATCFG_ATTR_APP: (NLA_U8) configuration flags for application TLV
+ *
+ */
+#define DCB_FEATCFG_ERROR	0x01	/* error in feature resolution */
+#define DCB_FEATCFG_ENABLE	0x02	/* enable feature */
+#define DCB_FEATCFG_WILLING	0x04	/* feature is willing */
+#define DCB_FEATCFG_ADVERTISE	0x08	/* advertise feature */
+enum dcbnl_featcfg_attrs {
+	DCB_FEATCFG_ATTR_UNDEFINED,
+	DCB_FEATCFG_ATTR_ALL,
+	DCB_FEATCFG_ATTR_PG,
+	DCB_FEATCFG_ATTR_PFC,
+	DCB_FEATCFG_ATTR_APP,
+
+	__DCB_FEATCFG_ATTR_ENUM_MAX,
+	DCB_FEATCFG_ATTR_MAX = __DCB_FEATCFG_ATTR_ENUM_MAX - 1,
+};
+
+#endif /* __LINUX_DCBNL_H__ */
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@ -13,6 +13,8 @@
 #ifndef _LINUX_DEVLINK_H_
 #define _LINUX_DEVLINK_H_

+#include <linux/const.h>
+
 #define DEVLINK_GENL_NAME "devlink"
 #define DEVLINK_GENL_VERSION 0x1
 #define DEVLINK_GENL_MCGRP_CONFIG_NAME "config"
@ -89,6 +91,46 @@ enum devlink_command {
 	DEVLINK_CMD_REGION_DEL,
 	DEVLINK_CMD_REGION_READ,

+	DEVLINK_CMD_PORT_PARAM_GET,	/* can dump */
+	DEVLINK_CMD_PORT_PARAM_SET,
+	DEVLINK_CMD_PORT_PARAM_NEW,
+	DEVLINK_CMD_PORT_PARAM_DEL,
+
+	DEVLINK_CMD_INFO_GET,		/* can dump */
+
+	DEVLINK_CMD_HEALTH_REPORTER_GET,
+	DEVLINK_CMD_HEALTH_REPORTER_SET,
+	DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+	DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+
+	DEVLINK_CMD_FLASH_UPDATE,
+	DEVLINK_CMD_FLASH_UPDATE_END,		/* notification only */
+	DEVLINK_CMD_FLASH_UPDATE_STATUS,	/* notification only */
+
+	DEVLINK_CMD_TRAP_GET,		/* can dump */
+	DEVLINK_CMD_TRAP_SET,
+	DEVLINK_CMD_TRAP_NEW,
+	DEVLINK_CMD_TRAP_DEL,
+
+	DEVLINK_CMD_TRAP_GROUP_GET,	/* can dump */
+	DEVLINK_CMD_TRAP_GROUP_SET,
+	DEVLINK_CMD_TRAP_GROUP_NEW,
+	DEVLINK_CMD_TRAP_GROUP_DEL,
+
+	DEVLINK_CMD_TRAP_POLICER_GET,	/* can dump */
+	DEVLINK_CMD_TRAP_POLICER_SET,
+	DEVLINK_CMD_TRAP_POLICER_NEW,
+	DEVLINK_CMD_TRAP_POLICER_DEL,
+
+	DEVLINK_CMD_HEALTH_REPORTER_TEST,
+
+	DEVLINK_CMD_RATE_GET,		/* can dump */
+	DEVLINK_CMD_RATE_SET,
+	DEVLINK_CMD_RATE_NEW,
+	DEVLINK_CMD_RATE_DEL,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@ -151,6 +193,27 @@ enum devlink_port_flavour {
 	DEVLINK_PORT_FLAVOUR_DSA, /* Distributed switch architecture
 				   * interconnect port.
 				   */
+	DEVLINK_PORT_FLAVOUR_PCI_PF, /* Represents eswitch port for
+				      * the PCI PF. It is an internal
+				      * port that faces the PCI PF.
+				      */
+	DEVLINK_PORT_FLAVOUR_PCI_VF, /* Represents eswitch port
+				      * for the PCI VF. It is an internal
+				      * port that faces the PCI VF.
+				      */
+	DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */
+	DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but
+				      * is not used in any way.
+				      */
+	DEVLINK_PORT_FLAVOUR_PCI_SF, /* Represents eswitch port
+				      * for the PCI SF. It is an internal
+				      * port that faces the PCI SF.
+				      */
+};
+
+enum devlink_rate_type {
+	DEVLINK_RATE_TYPE_LEAF,
+	DEVLINK_RATE_TYPE_NODE,
 };

 enum devlink_param_cmode {
@ -163,6 +226,118 @@ enum devlink_param_cmode {
 	DEVLINK_PARAM_CMODE_MAX = __DEVLINK_PARAM_CMODE_MAX - 1
 };

+enum devlink_param_fw_load_policy_value {
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER,
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_UNKNOWN,
+};
+
+enum devlink_param_reset_dev_on_drv_probe_value {
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN,
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS,
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER,
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK,
+};
+
+enum {
+	DEVLINK_ATTR_STATS_RX_PACKETS,		/* u64 */
+	DEVLINK_ATTR_STATS_RX_BYTES,		/* u64 */
+	DEVLINK_ATTR_STATS_RX_DROPPED,		/* u64 */
+
+	__DEVLINK_ATTR_STATS_MAX,
+	DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1
+};
+
+/* Specify what sections of a flash component can be overwritten when
+ * performing an update. Overwriting of firmware binary sections is always
+ * implicitly assumed to be allowed.
+ *
+ * Each section must be documented in
+ * Documentation/networking/devlink/devlink-flash.rst
+ *
+ */
+enum {
+	DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT,
+	DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT,
+
+	__DEVLINK_FLASH_OVERWRITE_MAX_BIT,
+	DEVLINK_FLASH_OVERWRITE_MAX_BIT = __DEVLINK_FLASH_OVERWRITE_MAX_BIT - 1
+};
+
+#define DEVLINK_FLASH_OVERWRITE_SETTINGS _BITUL(DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT)
+#define DEVLINK_FLASH_OVERWRITE_IDENTIFIERS _BITUL(DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT)
+
+#define DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS \
+	(_BITUL(__DEVLINK_FLASH_OVERWRITE_MAX_BIT) - 1)
+
+/**
+ * enum devlink_trap_action - Packet trap action.
+ * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not
+ *                            sent to the CPU.
+ * @DEVLINK_TRAP_ACTION_TRAP: The sole copy of the packet is sent to the CPU.
+ * @DEVLINK_TRAP_ACTION_MIRROR: Packet is forwarded by the device and a copy is
+ *                              sent to the CPU.
+ */
+enum devlink_trap_action {
+	DEVLINK_TRAP_ACTION_DROP,
+	DEVLINK_TRAP_ACTION_TRAP,
+	DEVLINK_TRAP_ACTION_MIRROR,
+};
+
+/**
+ * enum devlink_trap_type - Packet trap type.
+ * @DEVLINK_TRAP_TYPE_DROP: Trap reason is a drop. Trapped packets are only
+ *                          processed by devlink and not injected to the
+ *                          kernel's Rx path.
+ * @DEVLINK_TRAP_TYPE_EXCEPTION: Trap reason is an exception. Packet was not
+ *                               forwarded as intended due to an exception
+ *                               (e.g., missing neighbour entry) and trapped to
+ *                               control plane for resolution. Trapped packets
+ *                               are processed by devlink and injected to
+ *                               the kernel's Rx path.
+ * @DEVLINK_TRAP_TYPE_CONTROL: Packet was trapped because it is required for
+ *                             the correct functioning of the control plane.
+ *                             For example, an ARP request packet. Trapped
+ *                             packets are injected to the kernel's Rx path,
+ *                             but not reported to drop monitor.
+ */
+enum devlink_trap_type {
+	DEVLINK_TRAP_TYPE_DROP,
+	DEVLINK_TRAP_TYPE_EXCEPTION,
+	DEVLINK_TRAP_TYPE_CONTROL,
+};
+
+enum {
+	/* Trap can report input port as metadata */
+	DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT,
+	/* Trap can report flow action cookie as metadata */
+	DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE,
+};
+
+enum devlink_reload_action {
+	DEVLINK_RELOAD_ACTION_UNSPEC,
+	DEVLINK_RELOAD_ACTION_DRIVER_REINIT,	/* Driver entities re-instantiation */
+	DEVLINK_RELOAD_ACTION_FW_ACTIVATE,	/* FW activate */
+
+	/* Add new reload actions above */
+	__DEVLINK_RELOAD_ACTION_MAX,
+	DEVLINK_RELOAD_ACTION_MAX = __DEVLINK_RELOAD_ACTION_MAX - 1
+};
+
+enum devlink_reload_limit {
+	DEVLINK_RELOAD_LIMIT_UNSPEC,	/* unspecified, no constraints */
+	DEVLINK_RELOAD_LIMIT_NO_RESET,	/* No reset allowed, no down time allowed,
+					 * no link flap and no configuration is lost.
+					 */
+
+	/* Add new reload limit above */
+	__DEVLINK_RELOAD_LIMIT_MAX,
+	DEVLINK_RELOAD_LIMIT_MAX = __DEVLINK_RELOAD_LIMIT_MAX - 1
+};
+
+#define DEVLINK_RELOAD_LIMITS_VALID_MASK (_BITUL(__DEVLINK_RELOAD_LIMIT_MAX) - 1)
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@ -280,6 +455,104 @@ enum devlink_attr {
 	DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
 	DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */

+	DEVLINK_ATTR_INFO_DRIVER_NAME,		/* string */
+	DEVLINK_ATTR_INFO_SERIAL_NUMBER,	/* string */
+	DEVLINK_ATTR_INFO_VERSION_FIXED,	/* nested */
+	DEVLINK_ATTR_INFO_VERSION_RUNNING,	/* nested */
+	DEVLINK_ATTR_INFO_VERSION_STORED,	/* nested */
+	DEVLINK_ATTR_INFO_VERSION_NAME,		/* string */
+	DEVLINK_ATTR_INFO_VERSION_VALUE,	/* string */
+
+	DEVLINK_ATTR_SB_POOL_CELL_SIZE,		/* u32 */
+
+	DEVLINK_ATTR_FMSG,			/* nested */
+	DEVLINK_ATTR_FMSG_OBJ_NEST_START,	/* flag */
+	DEVLINK_ATTR_FMSG_PAIR_NEST_START,	/* flag */
+	DEVLINK_ATTR_FMSG_ARR_NEST_START,	/* flag */
+	DEVLINK_ATTR_FMSG_NEST_END,		/* flag */
+	DEVLINK_ATTR_FMSG_OBJ_NAME,		/* string */
+	DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,	/* u8 */
+	DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA,	/* dynamic */
+
+	DEVLINK_ATTR_HEALTH_REPORTER,			/* nested */
+	DEVLINK_ATTR_HEALTH_REPORTER_NAME,		/* string */
+	DEVLINK_ATTR_HEALTH_REPORTER_STATE,		/* u8 */
+	DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,	/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
+
+	DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME,	/* string */
+	DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,	/* string */
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,	/* string */
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,	/* u64 */
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,	/* u64 */
+
+	DEVLINK_ATTR_PORT_PCI_PF_NUMBER,	/* u16 */
+	DEVLINK_ATTR_PORT_PCI_VF_NUMBER,	/* u16 */
+
+	DEVLINK_ATTR_STATS,				/* nested */
+
+	DEVLINK_ATTR_TRAP_NAME,				/* string */
+	/* enum devlink_trap_action */
+	DEVLINK_ATTR_TRAP_ACTION,			/* u8 */
+	/* enum devlink_trap_type */
+	DEVLINK_ATTR_TRAP_TYPE,				/* u8 */
+	DEVLINK_ATTR_TRAP_GENERIC,			/* flag */
+	DEVLINK_ATTR_TRAP_METADATA,			/* nested */
+	DEVLINK_ATTR_TRAP_GROUP_NAME,			/* string */
+
+	DEVLINK_ATTR_RELOAD_FAILED,			/* u8 0 or 1 */
+
+	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,	/* u64 */
+
+	DEVLINK_ATTR_NETNS_FD,			/* u32 */
+	DEVLINK_ATTR_NETNS_PID,			/* u32 */
+	DEVLINK_ATTR_NETNS_ID,			/* u32 */
+
+	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,	/* u8 */
+
+	DEVLINK_ATTR_TRAP_POLICER_ID,			/* u32 */
+	DEVLINK_ATTR_TRAP_POLICER_RATE,			/* u64 */
+	DEVLINK_ATTR_TRAP_POLICER_BURST,		/* u64 */
+
+	DEVLINK_ATTR_PORT_FUNCTION,			/* nested */
+
+	DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,	/* string */
+
+	DEVLINK_ATTR_PORT_LANES,			/* u32 */
+	DEVLINK_ATTR_PORT_SPLITTABLE,			/* u8 */
+
+	DEVLINK_ATTR_PORT_EXTERNAL,		/* u8 */
+	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,	/* u32 */
+
+	DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,	/* u64 */
+	DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK,	/* bitfield32 */
+
+	DEVLINK_ATTR_RELOAD_ACTION,		/* u8 */
+	DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED,	/* bitfield32 */
+	DEVLINK_ATTR_RELOAD_LIMITS,		/* bitfield32 */
+
+	DEVLINK_ATTR_DEV_STATS,			/* nested */
+	DEVLINK_ATTR_RELOAD_STATS,		/* nested */
+	DEVLINK_ATTR_RELOAD_STATS_ENTRY,	/* nested */
+	DEVLINK_ATTR_RELOAD_STATS_LIMIT,	/* u8 */
+	DEVLINK_ATTR_RELOAD_STATS_VALUE,	/* u32 */
+	DEVLINK_ATTR_REMOTE_RELOAD_STATS,	/* nested */
+	DEVLINK_ATTR_RELOAD_ACTION_INFO,        /* nested */
+	DEVLINK_ATTR_RELOAD_ACTION_STATS,       /* nested */
+
+	DEVLINK_ATTR_PORT_PCI_SF_NUMBER,	/* u32 */
+
+	DEVLINK_ATTR_RATE_TYPE,			/* u16 */
+	DEVLINK_ATTR_RATE_TX_SHARE,		/* u64 */
+	DEVLINK_ATTR_RATE_TX_MAX,		/* u64 */
+	DEVLINK_ATTR_RATE_NODE_NAME,		/* string */
+	DEVLINK_ATTR_RATE_PARENT_NODE_NAME,	/* string */
+
+	DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,	/* u32 */
+
 	/* add new attributes above here, update the policy in devlink.c */

 	__DEVLINK_ATTR_MAX,
@ -326,4 +599,32 @@ enum devlink_resource_unit {
 	DEVLINK_RESOURCE_UNIT_ENTRY,
 };

+enum devlink_port_function_attr {
+	DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
+	DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR,	/* binary */
+	DEVLINK_PORT_FN_ATTR_STATE,	/* u8 */
+	DEVLINK_PORT_FN_ATTR_OPSTATE,	/* u8 */
+
+	__DEVLINK_PORT_FUNCTION_ATTR_MAX,
+	DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
+};
+
+enum devlink_port_fn_state {
+	DEVLINK_PORT_FN_STATE_INACTIVE,
+	DEVLINK_PORT_FN_STATE_ACTIVE,
+};
+
+/**
+ * enum devlink_port_fn_opstate - indicates operational state of the function
+ * @DEVLINK_PORT_FN_OPSTATE_ATTACHED: Driver is attached to the function.
+ * For graceful tear down of the function, after inactivation of the
+ * function, user should wait for operational state to turn DETACHED.
+ * @DEVLINK_PORT_FN_OPSTATE_DETACHED: Driver is detached from the function.
+ * It is safe to delete the port.
+ */
+enum devlink_port_fn_opstate {
+	DEVLINK_PORT_FN_OPSTATE_DETACHED,
+	DEVLINK_PORT_FN_OPSTATE_ATTACHED,
+};
+
 #endif /* _LINUX_DEVLINK_H_ */
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@ -34,15 +34,23 @@
 #define EM_M32R		88	/* Renesas M32R */
 #define EM_MN10300	89	/* Panasonic/MEI MN10300, AM33 */
 #define EM_OPENRISC     92     /* OpenRISC 32-bit embedded processor */
+#define EM_ARCOMPACT	93	/* ARCompact processor */
+#define EM_XTENSA	94	/* Tensilica Xtensa Architecture */
 #define EM_BLACKFIN     106     /* ADI Blackfin Processor */
+#define EM_UNICORE	110	/* UniCore-32 */
 #define EM_ALTERA_NIOS2	113	/* Altera Nios II soft-core processor */
 #define EM_TI_C6000	140	/* TI C6X DSPs */
+#define EM_HEXAGON	164	/* QUALCOMM Hexagon */
+#define EM_NDS32	167	/* Andes Technology compact code size
+				   embedded RISC processor family */
 #define EM_AARCH64	183	/* ARM 64 bit */
 #define EM_TILEPRO	188	/* Tilera TILEPro */
 #define EM_MICROBLAZE	189	/* Xilinx MicroBlaze */
 #define EM_TILEGX	191	/* Tilera TILE-Gx */
+#define EM_ARCV2	195	/* ARCv2 Cores */
 #define EM_RISCV	243	/* RISC-V */
 #define EM_BPF		247	/* Linux BPF - in-kernel virtual machine */
+#define EM_CSKY		252	/* C-SKY */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */

 /*
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@ -16,6 +16,12 @@ enum {
 	FOU_ATTR_IPPROTO,			/* u8 */
 	FOU_ATTR_TYPE,				/* u8 */
 	FOU_ATTR_REMCSUM_NOPARTIAL,		/* flag */
+	FOU_ATTR_LOCAL_V4,			/* u32 */
+	FOU_ATTR_LOCAL_V6,			/* in6_addr */
+	FOU_ATTR_PEER_V4,			/* u32 */
+	FOU_ATTR_PEER_V6,			/* in6_addr */
+	FOU_ATTR_PEER_PORT,			/* u16 */
+	FOU_ATTR_IFINDEX,			/* s32 */

 	__FOU_ATTR_MAX,
 };
--- a/include/uapi/linux/gen_stats.h
+++ b/include/uapi/linux/gen_stats.h
@ -13,6 +13,7 @@ enum {
 	TCA_STATS_RATE_EST64,
 	TCA_STATS_PAD,
 	TCA_STATS_BASIC_HW,
+	TCA_STATS_PKT64,
 	__TCA_STATS_MAX,
 };
 #define TCA_STATS_MAX (__TCA_STATS_MAX - 1)
@ -26,10 +27,6 @@ struct gnet_stats_basic {
 	__u64	bytes;
 	__u32	packets;
 };
-struct gnet_stats_basic_packed {
-	__u64	bytes;
-	__u32	packets;
-} __attribute__ ((packed));

 /**
 * struct gnet_stats_rate_est - rate estimator
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@ -48,6 +48,7 @@ enum {
 	CTRL_CMD_NEWMCAST_GRP,
 	CTRL_CMD_DELMCAST_GRP,
 	CTRL_CMD_GETMCAST_GRP, /* unused */
+	CTRL_CMD_GETPOLICY,
 	__CTRL_CMD_MAX,
 };

@ -62,6 +63,9 @@ enum {
 	CTRL_ATTR_MAXATTR,
 	CTRL_ATTR_OPS,
 	CTRL_ATTR_MCAST_GROUPS,
+	CTRL_ATTR_POLICY,
+	CTRL_ATTR_OP_POLICY,
+	CTRL_ATTR_OP,
 	__CTRL_ATTR_MAX,
 };

@ -83,6 +87,15 @@ enum {
 	__CTRL_ATTR_MCAST_GRP_MAX,
 };

+enum {
+	CTRL_ATTR_POLICY_UNSPEC,
+	CTRL_ATTR_POLICY_DO,
+	CTRL_ATTR_POLICY_DUMP,
+
+	__CTRL_ATTR_POLICY_DUMP_MAX,
+	CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1
+};
+
 #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)


--- a/include/uapi/linux/hdlc/ioctl.h
+++ b/include/uapi/linux/hdlc/ioctl.h
@ -79,6 +79,15 @@ typedef struct {
    unsigned int timeout;
 } cisco_proto;

+typedef struct {
+	unsigned short dce; /* 1 for DCE (network side) operation */
+	unsigned int modulo; /* modulo (8 = basic / 128 = extended) */
+	unsigned int window; /* frame window size */
+	unsigned int t1; /* timeout t1 */
+	unsigned int t2; /* timeout t2 */
+	unsigned int n2; /* frame retry counter */
+} x25_hdlc_proto;
+
 /* PPP doesn't need any info now - supply length = 0 to ioctl */

 #endif /* __ASSEMBLY__ */
--- a/include/uapi/linux/icmpv6.h
+++ b/include/uapi/linux/icmpv6.h
@ -68,6 +68,7 @@ struct icmp6hdr {
 #define icmp6_mtu		icmp6_dataun.un_data32[0]
 #define icmp6_unused		icmp6_dataun.un_data32[0]
 #define icmp6_maxdelay		icmp6_dataun.un_data16[0]
+#define icmp6_datagram_len	icmp6_dataun.un_data8[0]
 #define icmp6_router		icmp6_dataun.u_nd_advt.router
 #define icmp6_solicited		icmp6_dataun.u_nd_advt.solicited
 #define icmp6_override		icmp6_dataun.u_nd_advt.override
@ -90,6 +91,8 @@ struct icmp6hdr {
 #define ICMPV6_TIME_EXCEED		3
 #define ICMPV6_PARAMPROB		4

+#define ICMPV6_ERRMSG_MAX       127
+
 #define ICMPV6_INFOMSG_MASK		0x80

 #define ICMPV6_ECHO_REQUEST		128
@ -108,6 +111,10 @@ struct icmp6hdr {
 #define ICMPV6_MOBILE_PREFIX_SOL	146
 #define ICMPV6_MOBILE_PREFIX_ADV	147

+#define ICMPV6_MRDISC_ADV		151
+
+#define ICMPV6_MSG_MAX          255
+
 /*
 *	Codes for Destination Unreachable
 */
@ -131,7 +138,11 @@ struct icmp6hdr {
 #define ICMPV6_HDR_FIELD		0
 #define ICMPV6_UNK_NEXTHDR		1
 #define ICMPV6_UNK_OPTION		2
+#define ICMPV6_HDR_INCOMP		3

+/* Codes for EXT_ECHO (PROBE) */
+#define ICMPV6_EXT_ECHO_REQUEST		160
+#define ICMPV6_EXT_ECHO_REPLY		161
 /*
 *	constants for (set|get)sockopt
 */
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@ -31,6 +31,7 @@
 #define	IFNAMSIZ	16
 #endif /* __UAPI_DEF_IF_IFNAMSIZ */
 #define	IFALIASZ	256
+#define	ALTIFNAMSIZ	128
 #include <linux/hdlc/ioctl.h>

 /* For glibc compatibility. An empty enum does not compile. */
@ -175,6 +176,7 @@ enum {
 enum {
 	IF_LINK_MODE_DEFAULT,
 	IF_LINK_MODE_DORMANT,	/* limit upward transition to dormant */
+	IF_LINK_MODE_TESTING,	/* limit upward transition to testing */
 };

 /*
@ -210,6 +212,7 @@ struct if_settings {
 		fr_proto		*fr;
 		fr_proto_pvc		*fr_pvc;
 		fr_proto_pvc_info	*fr_pvc_info;
+		x25_hdlc_proto		*x25;

 		/* interface settings */
 		sync_serial_settings	*sync;
--- a/include/uapi/linux/if_alg.h
+++ b/include/uapi/linux/if_alg.h
@ -24,6 +24,22 @@ struct sockaddr_alg {
 	__u8	salg_name[64];
 };

+/*
+ * Linux v4.12 and later removed the 64-byte limit on salg_name[]; it's now an
+ * arbitrary-length field.  We had to keep the original struct above for source
+ * compatibility with existing userspace programs, though.  Use the new struct
+ * below if support for very long algorithm names is needed.  To do this,
+ * allocate 'sizeof(struct sockaddr_alg_new) + strlen(algname) + 1' bytes, and
+ * copy algname (including the null terminator) into salg_name.
+ */
+struct sockaddr_alg_new {
+	__u16	salg_family;
+	__u8	salg_type[14];
+	__u32	salg_feat;
+	__u32	salg_mask;
+	__u8	salg_name[];
+};
+
 struct af_alg_iv {
 	__u32	ivlen;
 	__u8	iv[0];
@ -35,6 +51,7 @@ struct af_alg_iv {
 #define ALG_SET_OP			3
 #define ALG_SET_AEAD_ASSOCLEN		4
 #define ALG_SET_AEAD_AUTHSIZE		5
+#define ALG_SET_DRBG_ENTROPY		6

 /* Operations */
 #define ALG_OP_DECRYPT			0
--- a/include/uapi/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
@ -54,6 +54,7 @@
 #define ARPHRD_X25	271		/* CCITT X.25			*/
 #define ARPHRD_HWX25	272		/* Boards with X.25 in firmware	*/
 #define ARPHRD_CAN	280		/* Controller Area Network      */
+#define ARPHRD_MCTP	290
 #define ARPHRD_PPP	512
 #define ARPHRD_CISCO	513		/* Cisco HDLC	 		*/
 #define ARPHRD_HDLC	ARPHRD_CISCO
--- a/include/uapi/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
@ -94,6 +94,17 @@
 #define BOND_XMIT_POLICY_LAYER23	2 /* layer 2+3 (IP ^ MAC) */
 #define BOND_XMIT_POLICY_ENCAP23	3 /* encapsulated layer 2+3 */
 #define BOND_XMIT_POLICY_ENCAP34	4 /* encapsulated layer 3+4 */
+#define BOND_XMIT_POLICY_VLAN_SRCMAC	5 /* vlan + source MAC */
+
+/* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */
+#define LACP_STATE_LACP_ACTIVITY   0x1
+#define LACP_STATE_LACP_TIMEOUT    0x2
+#define LACP_STATE_AGGREGATION     0x4
+#define LACP_STATE_SYNCHRONIZATION 0x8
+#define LACP_STATE_COLLECTING      0x10
+#define LACP_STATE_DISTRIBUTING    0x20
+#define LACP_STATE_DEFAULTED       0x40
+#define LACP_STATE_EXPIRED         0x80

 typedef struct ifbond {
 	__s32 bond_mode;
@ -117,15 +128,28 @@ struct ad_info {
 	__u8 partner_system[ETH_ALEN];
 };

+/* Embedded inside LINK_XSTATS_TYPE_BOND */
+enum {
+	BOND_XSTATS_UNSPEC,
+	BOND_XSTATS_3AD,
+	__BOND_XSTATS_MAX
+};
+#define BOND_XSTATS_MAX (__BOND_XSTATS_MAX - 1)
+
+/* Embedded inside BOND_XSTATS_3AD */
+enum {
+	BOND_3AD_STAT_LACPDU_RX,
+	BOND_3AD_STAT_LACPDU_TX,
+	BOND_3AD_STAT_LACPDU_UNKNOWN_RX,
+	BOND_3AD_STAT_LACPDU_ILLEGAL_RX,
+	BOND_3AD_STAT_MARKER_RX,
+	BOND_3AD_STAT_MARKER_TX,
+	BOND_3AD_STAT_MARKER_RESP_RX,
+	BOND_3AD_STAT_MARKER_RESP_TX,
+	BOND_3AD_STAT_MARKER_UNKNOWN_RX,
+	BOND_3AD_STAT_PAD,
+	__BOND_3AD_STAT_MAX
+};
+#define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1)
+
 #endif /* _LINUX_IF_BONDING_H */
-
-/*
- * Local variables:
- *  version-control: t
- *  kept-new-versions: 5
- *  c-indent-level: 8
- *  c-basic-offset: 8
- *  tab-width: 8
- * End:
- */
-
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@ -120,6 +120,8 @@ enum {
 	IFLA_BRIDGE_MODE,
 	IFLA_BRIDGE_VLAN_INFO,
 	IFLA_BRIDGE_VLAN_TUNNEL_INFO,
+	IFLA_BRIDGE_MRP,
+	IFLA_BRIDGE_CFM,
 	__IFLA_BRIDGE_MAX,
 };
 #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
@ -130,6 +132,7 @@ enum {
 #define BRIDGE_VLAN_INFO_RANGE_BEGIN	(1<<3) /* VLAN is start of vlan range */
 #define BRIDGE_VLAN_INFO_RANGE_END	(1<<4) /* VLAN is end of vlan range */
 #define BRIDGE_VLAN_INFO_BRENTRY	(1<<5) /* Global bridge VLAN entry */
+#define BRIDGE_VLAN_INFO_ONLY_OPTS	(1<<6) /* Skip create/delete/flags */

 struct bridge_vlan_info {
 	__u16 flags;
@ -156,6 +159,415 @@ struct bridge_vlan_xstats {
 	__u32 pad2;
 };

+enum {
+	IFLA_BRIDGE_MRP_UNSPEC,
+	IFLA_BRIDGE_MRP_INSTANCE,
+	IFLA_BRIDGE_MRP_PORT_STATE,
+	IFLA_BRIDGE_MRP_PORT_ROLE,
+	IFLA_BRIDGE_MRP_RING_STATE,
+	IFLA_BRIDGE_MRP_RING_ROLE,
+	IFLA_BRIDGE_MRP_START_TEST,
+	IFLA_BRIDGE_MRP_INFO,
+	IFLA_BRIDGE_MRP_IN_ROLE,
+	IFLA_BRIDGE_MRP_IN_STATE,
+	IFLA_BRIDGE_MRP_START_IN_TEST,
+	__IFLA_BRIDGE_MRP_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_INSTANCE_UNSPEC,
+	IFLA_BRIDGE_MRP_INSTANCE_RING_ID,
+	IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX,
+	IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX,
+	IFLA_BRIDGE_MRP_INSTANCE_PRIO,
+	__IFLA_BRIDGE_MRP_INSTANCE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_INSTANCE_MAX (__IFLA_BRIDGE_MRP_INSTANCE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC,
+	IFLA_BRIDGE_MRP_PORT_STATE_STATE,
+	__IFLA_BRIDGE_MRP_PORT_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_PORT_STATE_MAX (__IFLA_BRIDGE_MRP_PORT_STATE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC,
+	IFLA_BRIDGE_MRP_PORT_ROLE_ROLE,
+	__IFLA_BRIDGE_MRP_PORT_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_PORT_ROLE_MAX (__IFLA_BRIDGE_MRP_PORT_ROLE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_RING_STATE_UNSPEC,
+	IFLA_BRIDGE_MRP_RING_STATE_RING_ID,
+	IFLA_BRIDGE_MRP_RING_STATE_STATE,
+	__IFLA_BRIDGE_MRP_RING_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_RING_STATE_MAX (__IFLA_BRIDGE_MRP_RING_STATE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC,
+	IFLA_BRIDGE_MRP_RING_ROLE_RING_ID,
+	IFLA_BRIDGE_MRP_RING_ROLE_ROLE,
+	__IFLA_BRIDGE_MRP_RING_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_RING_ROLE_MAX (__IFLA_BRIDGE_MRP_RING_ROLE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_START_TEST_UNSPEC,
+	IFLA_BRIDGE_MRP_START_TEST_RING_ID,
+	IFLA_BRIDGE_MRP_START_TEST_INTERVAL,
+	IFLA_BRIDGE_MRP_START_TEST_MAX_MISS,
+	IFLA_BRIDGE_MRP_START_TEST_PERIOD,
+	IFLA_BRIDGE_MRP_START_TEST_MONITOR,
+	__IFLA_BRIDGE_MRP_START_TEST_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_START_TEST_MAX (__IFLA_BRIDGE_MRP_START_TEST_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_INFO_UNSPEC,
+	IFLA_BRIDGE_MRP_INFO_RING_ID,
+	IFLA_BRIDGE_MRP_INFO_P_IFINDEX,
+	IFLA_BRIDGE_MRP_INFO_S_IFINDEX,
+	IFLA_BRIDGE_MRP_INFO_PRIO,
+	IFLA_BRIDGE_MRP_INFO_RING_STATE,
+	IFLA_BRIDGE_MRP_INFO_RING_ROLE,
+	IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL,
+	IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS,
+	IFLA_BRIDGE_MRP_INFO_TEST_MONITOR,
+	IFLA_BRIDGE_MRP_INFO_I_IFINDEX,
+	IFLA_BRIDGE_MRP_INFO_IN_STATE,
+	IFLA_BRIDGE_MRP_INFO_IN_ROLE,
+	IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL,
+	IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS,
+	__IFLA_BRIDGE_MRP_INFO_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_INFO_MAX (__IFLA_BRIDGE_MRP_INFO_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_IN_STATE_UNSPEC,
+	IFLA_BRIDGE_MRP_IN_STATE_IN_ID,
+	IFLA_BRIDGE_MRP_IN_STATE_STATE,
+	__IFLA_BRIDGE_MRP_IN_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_IN_STATE_MAX (__IFLA_BRIDGE_MRP_IN_STATE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC,
+	IFLA_BRIDGE_MRP_IN_ROLE_RING_ID,
+	IFLA_BRIDGE_MRP_IN_ROLE_IN_ID,
+	IFLA_BRIDGE_MRP_IN_ROLE_ROLE,
+	IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX,
+	__IFLA_BRIDGE_MRP_IN_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_IN_ROLE_MAX (__IFLA_BRIDGE_MRP_IN_ROLE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC,
+	IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID,
+	IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL,
+	IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS,
+	IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD,
+	__IFLA_BRIDGE_MRP_START_IN_TEST_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_START_IN_TEST_MAX (__IFLA_BRIDGE_MRP_START_IN_TEST_MAX - 1)
+
+struct br_mrp_instance {
+	__u32 ring_id;
+	__u32 p_ifindex;
+	__u32 s_ifindex;
+	__u16 prio;
+};
+
+struct br_mrp_ring_state {
+	__u32 ring_id;
+	__u32 ring_state;
+};
+
+struct br_mrp_ring_role {
+	__u32 ring_id;
+	__u32 ring_role;
+};
+
+struct br_mrp_start_test {
+	__u32 ring_id;
+	__u32 interval;
+	__u32 max_miss;
+	__u32 period;
+	__u32 monitor;
+};
+
+struct br_mrp_in_state {
+	__u32 in_state;
+	__u16 in_id;
+};
+
+struct br_mrp_in_role {
+	__u32 ring_id;
+	__u32 in_role;
+	__u32 i_ifindex;
+	__u16 in_id;
+};
+
+struct br_mrp_start_in_test {
+	__u32 interval;
+	__u32 max_miss;
+	__u32 period;
+	__u16 in_id;
+};
+
+enum {
+	IFLA_BRIDGE_CFM_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_CREATE,
+	IFLA_BRIDGE_CFM_MEP_DELETE,
+	IFLA_BRIDGE_CFM_MEP_CONFIG,
+	IFLA_BRIDGE_CFM_CC_CONFIG,
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD,
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE,
+	IFLA_BRIDGE_CFM_CC_RDI,
+	IFLA_BRIDGE_CFM_CC_CCM_TX,
+	IFLA_BRIDGE_CFM_MEP_CREATE_INFO,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_INFO,
+	IFLA_BRIDGE_CFM_CC_CONFIG_INFO,
+	IFLA_BRIDGE_CFM_CC_RDI_INFO,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_INFO,
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO,
+	IFLA_BRIDGE_CFM_MEP_STATUS_INFO,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO,
+	__IFLA_BRIDGE_CFM_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MAX (__IFLA_BRIDGE_CFM_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE,
+	IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN,
+	IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION,
+	IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX,
+	__IFLA_BRIDGE_CFM_MEP_CREATE_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MEP_CREATE_MAX (__IFLA_BRIDGE_CFM_MEP_CREATE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE,
+	__IFLA_BRIDGE_CFM_MEP_DELETE_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MEP_DELETE_MAX (__IFLA_BRIDGE_CFM_MEP_DELETE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL,
+	IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID,
+	__IFLA_BRIDGE_CFM_MEP_CONFIG_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MEP_CONFIG_MAX (__IFLA_BRIDGE_CFM_MEP_CONFIG_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE,
+	IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL,
+	IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID,
+	__IFLA_BRIDGE_CFM_CC_CONFIG_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_CONFIG_MAX (__IFLA_BRIDGE_CFM_CC_CONFIG_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_PEER_MEPID,
+	__IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX (__IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_RDI_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_RDI_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_RDI_RDI,
+	__IFLA_BRIDGE_CFM_CC_RDI_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_RDI_MAX (__IFLA_BRIDGE_CFM_CC_RDI_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_CCM_TX_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV,
+	IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE,
+	__IFLA_BRIDGE_CFM_CC_CCM_TX_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_CCM_TX_MAX (__IFLA_BRIDGE_CFM_CC_CCM_TX_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_MEP_STATUS_UNSPEC,
+	IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE,
+	IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN,
+	IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN,
+	IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN,
+	__IFLA_BRIDGE_CFM_MEP_STATUS_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_MEP_STATUS_MAX (__IFLA_BRIDGE_CFM_MEP_STATUS_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_UNSPEC,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN,
+	IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN,
+	__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX,
+};
+
+#define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1)
+
+struct bridge_stp_xstats {
+	__u64 transition_blk;
+	__u64 transition_fwd;
+	__u64 rx_bpdu;
+	__u64 tx_bpdu;
+	__u64 rx_tcn;
+	__u64 tx_tcn;
+};
+
+/* Bridge vlan RTM header */
+struct br_vlan_msg {
+	__u8 family;
+	__u8 reserved1;
+	__u16 reserved2;
+	__u32 ifindex;
+};
+
+enum {
+	BRIDGE_VLANDB_DUMP_UNSPEC,
+	BRIDGE_VLANDB_DUMP_FLAGS,
+	__BRIDGE_VLANDB_DUMP_MAX,
+};
+#define BRIDGE_VLANDB_DUMP_MAX (__BRIDGE_VLANDB_DUMP_MAX - 1)
+
+/* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */
+#define BRIDGE_VLANDB_DUMPF_STATS	(1 << 0) /* Include stats in the dump */
+#define BRIDGE_VLANDB_DUMPF_GLOBAL	(1 << 1) /* Dump global vlan options only */
+
+/* Bridge vlan RTM attributes
+ * [BRIDGE_VLANDB_ENTRY] = {
+ *     [BRIDGE_VLANDB_ENTRY_INFO]
+ *     ...
+ * }
+ * [BRIDGE_VLANDB_GLOBAL_OPTIONS] = {
+ *     [BRIDGE_VLANDB_GOPTS_ID]
+ *     ...
+ * }
+ */
+enum {
+	BRIDGE_VLANDB_UNSPEC,
+	BRIDGE_VLANDB_ENTRY,
+	BRIDGE_VLANDB_GLOBAL_OPTIONS,
+	__BRIDGE_VLANDB_MAX,
+};
+#define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1)
+
+enum {
+	BRIDGE_VLANDB_ENTRY_UNSPEC,
+	BRIDGE_VLANDB_ENTRY_INFO,
+	BRIDGE_VLANDB_ENTRY_RANGE,
+	BRIDGE_VLANDB_ENTRY_STATE,
+	BRIDGE_VLANDB_ENTRY_TUNNEL_INFO,
+	BRIDGE_VLANDB_ENTRY_STATS,
+	BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
+	__BRIDGE_VLANDB_ENTRY_MAX,
+};
+#define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)
+
+/* [BRIDGE_VLANDB_ENTRY] = {
+ *     [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = {
+ *         [BRIDGE_VLANDB_TINFO_ID]
+ *         ...
+ *     }
+ * }
+ */
+enum {
+	BRIDGE_VLANDB_TINFO_UNSPEC,
+	BRIDGE_VLANDB_TINFO_ID,
+	BRIDGE_VLANDB_TINFO_CMD,
+	__BRIDGE_VLANDB_TINFO_MAX,
+};
+#define BRIDGE_VLANDB_TINFO_MAX (__BRIDGE_VLANDB_TINFO_MAX - 1)
+
+/* [BRIDGE_VLANDB_ENTRY] = {
+ *     [BRIDGE_VLANDB_ENTRY_STATS] = {
+ *         [BRIDGE_VLANDB_STATS_RX_BYTES]
+ *         ...
+ *     }
+ *     ...
+ * }
+ */
+enum {
+	BRIDGE_VLANDB_STATS_UNSPEC,
+	BRIDGE_VLANDB_STATS_RX_BYTES,
+	BRIDGE_VLANDB_STATS_RX_PACKETS,
+	BRIDGE_VLANDB_STATS_TX_BYTES,
+	BRIDGE_VLANDB_STATS_TX_PACKETS,
+	BRIDGE_VLANDB_STATS_PAD,
+	__BRIDGE_VLANDB_STATS_MAX,
+};
+#define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1)
+
+enum {
+	BRIDGE_VLANDB_GOPTS_UNSPEC,
+	BRIDGE_VLANDB_GOPTS_ID,
+	BRIDGE_VLANDB_GOPTS_RANGE,
+	BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING,
+	BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION,
+	BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION,
+	BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT,
+	BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT,
+	BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL,
+	BRIDGE_VLANDB_GOPTS_PAD,
+	BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL,
+	BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL,
+	BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL,
+	BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL,
+	BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL,
+	BRIDGE_VLANDB_GOPTS_MCAST_QUERIER,
+	BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS,
+	BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE,
+	__BRIDGE_VLANDB_GOPTS_MAX
+};
+#define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1)
+
 /* Bridge multicast database attributes
 * [MDBA_MDB] = {
 *     [MDBA_MDB_ENTRY] = {
@ -198,10 +610,33 @@ enum {
 enum {
 	MDBA_MDB_EATTR_UNSPEC,
 	MDBA_MDB_EATTR_TIMER,
+	MDBA_MDB_EATTR_SRC_LIST,
+	MDBA_MDB_EATTR_GROUP_MODE,
+	MDBA_MDB_EATTR_SOURCE,
+	MDBA_MDB_EATTR_RTPROT,
 	__MDBA_MDB_EATTR_MAX
 };
 #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)

+/* per mdb entry source */
+enum {
+	MDBA_MDB_SRCLIST_UNSPEC,
+	MDBA_MDB_SRCLIST_ENTRY,
+	__MDBA_MDB_SRCLIST_MAX
+};
+#define MDBA_MDB_SRCLIST_MAX (__MDBA_MDB_SRCLIST_MAX - 1)
+
+/* per mdb entry per source attributes
+ * these are embedded in MDBA_MDB_SRCLIST_ENTRY
+ */
+enum {
+	MDBA_MDB_SRCATTR_UNSPEC,
+	MDBA_MDB_SRCATTR_ADDRESS,
+	MDBA_MDB_SRCATTR_TIMER,
+	__MDBA_MDB_SRCATTR_MAX
+};
+#define MDBA_MDB_SRCATTR_MAX (__MDBA_MDB_SRCATTR_MAX - 1)
+
 /* multicast router types */
 enum {
 	MDB_RTR_TYPE_DISABLED,
@ -222,6 +657,9 @@ enum {
 	MDBA_ROUTER_PATTR_UNSPEC,
 	MDBA_ROUTER_PATTR_TIMER,
 	MDBA_ROUTER_PATTR_TYPE,
+	MDBA_ROUTER_PATTR_INET_TIMER,
+	MDBA_ROUTER_PATTR_INET6_TIMER,
+	MDBA_ROUTER_PATTR_VID,
 	__MDBA_ROUTER_PATTR_MAX
 };
 #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1)
@ -237,12 +675,16 @@ struct br_mdb_entry {
 #define MDB_PERMANENT 1
 	__u8 state;
 #define MDB_FLAGS_OFFLOAD	(1 << 0)
+#define MDB_FLAGS_FAST_LEAVE	(1 << 1)
+#define MDB_FLAGS_STAR_EXCL	(1 << 2)
+#define MDB_FLAGS_BLOCKED	(1 << 3)
 	__u8 flags;
 	__u16 vid;
 	struct {
 		union {
 			__be32	ip4;
 			struct in6_addr ip6;
+			unsigned char mac_addr[ETH_ALEN];
 		} u;
 		__be16		proto;
 	} addr;
@ -251,16 +693,30 @@ struct br_mdb_entry {
 enum {
 	MDBA_SET_ENTRY_UNSPEC,
 	MDBA_SET_ENTRY,
+	MDBA_SET_ENTRY_ATTRS,
 	__MDBA_SET_ENTRY_MAX,
 };
 #define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1)

+/* [MDBA_SET_ENTRY_ATTRS] = {
+ *    [MDBE_ATTR_xxx]
+ *    ...
+ * }
+ */
+enum {
+	MDBE_ATTR_UNSPEC,
+	MDBE_ATTR_SOURCE,
+	__MDBE_ATTR_MAX,
+};
+#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
+
 /* Embedded inside LINK_XSTATS_TYPE_BRIDGE */
 enum {
 	BRIDGE_XSTATS_UNSPEC,
 	BRIDGE_XSTATS_VLAN,
 	BRIDGE_XSTATS_MCAST,
 	BRIDGE_XSTATS_PAD,
+	BRIDGE_XSTATS_STP,
 	__BRIDGE_XSTATS_MAX
 };
 #define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1)
@ -292,4 +748,40 @@ struct br_mcast_stats {
 	__u64 mcast_bytes[BR_MCAST_DIR_SIZE];
 	__u64 mcast_packets[BR_MCAST_DIR_SIZE];
 };
+
+/* bridge boolean options
+ * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets
+ * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping
+ *
+ * IMPORTANT: if adding a new option do not forget to handle
+ *            it in br_boolopt_toggle/get and bridge sysfs
+ */
+enum br_boolopt_id {
+	BR_BOOLOPT_NO_LL_LEARN,
+	BR_BOOLOPT_MCAST_VLAN_SNOOPING,
+	BR_BOOLOPT_MAX
+};
+
+/* struct br_boolopt_multi - change multiple bridge boolean options
+ *
+ * @optval: new option values (bit per option)
+ * @optmask: options to change (bit per option)
+ */
+struct br_boolopt_multi {
+	__u32 optval;
+	__u32 optmask;
+};
+
+enum {
+	BRIDGE_QUERIER_UNSPEC,
+	BRIDGE_QUERIER_IP_ADDRESS,
+	BRIDGE_QUERIER_IP_PORT,
+	BRIDGE_QUERIER_IP_OTHER_TIMER,
+	BRIDGE_QUERIER_PAD,
+	BRIDGE_QUERIER_IPV6_ADDRESS,
+	BRIDGE_QUERIER_IPV6_PORT,
+	BRIDGE_QUERIER_IPV6_OTHER_TIMER,
+	__BRIDGE_QUERIER_MAX
+};
+#define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1)
 #endif /* _LINUX_IF_BRIDGE_H */
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@ -86,17 +86,21 @@
 					 * over Ethernet
 					 */
 #define ETH_P_PAE	0x888E		/* Port Access Entity (IEEE 802.1X) */
+#define ETH_P_REALTEK	0x8899          /* Multiple proprietary protocols */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
 #define ETH_P_802_EX1	0x88B5		/* 802.1 Local Experimental 1.  */
 #define ETH_P_PREAUTH	0x88C7		/* 802.11 Preauthentication */
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
+#define ETH_P_LLDP	0x88CC		/* Link Layer Discovery Protocol */
+#define ETH_P_MRP	0x88E3		/* Media Redundancy Protocol	*/
 #define ETH_P_MACSEC	0x88E5		/* 802.1ae MACsec */
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
 #define ETH_P_MVRP	0x88F5          /* 802.1Q MVRP                  */
 #define ETH_P_1588	0x88F7		/* IEEE 1588 Timesync */
 #define ETH_P_NCSI	0x88F8		/* NCSI protocol		*/
 #define ETH_P_PRP	0x88FB		/* IEC 62439-3 PRP/HSRv0	*/
+#define ETH_P_CFM	0x8902		/* Connectivity Fault Management */
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
 #define ETH_P_IBOE	0x8915		/* Infiniband over Ethernet	*/
 #define ETH_P_TDLS	0x890D          /* TDLS */
@ -109,10 +113,11 @@
 #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_DSA_8021Q	0xDADB		/* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_IFE	0xED3E		/* ForCES inter-FE LFB type */
 #define ETH_P_AF_IUCV   0xFBFB		/* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */

-#define ETH_P_802_3_MIN	0x0600		/* If the value in the ethernet type is less than this value
+#define ETH_P_802_3_MIN	0x0600		/* If the value in the ethernet type is more than this value
 					 * then the frame is Ethernet II. Else it is 802.3 */

 /*
@ -147,6 +152,9 @@
 #define ETH_P_MAP	0x00F9		/* Qualcomm multiplexing and
 					 * aggregation protocol
 					 */
+#define ETH_P_MCTP	0x00FA		/* Management component transport
+					 * protocol packets
+					 */

 /*
 *	This is an Ethernet frame header.
--- a/Show More
+++ b/Show More