Compare commits
5 Commits
main
...
iproute2-4
| Author | SHA1 | Date |
|---|---|---|
|
|
5e155b73f7 | |
|
|
e278010416 | |
|
|
0b60e8c016 | |
|
|
6b40ba172e | |
|
|
90f34ef5c9 |
130
.clang-format
130
.clang-format
|
|
@ -1,130 +0,0 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# clang-format configuration file. Intended for clang-format >= 4.
|
||||
#
|
||||
# For more information, see:
|
||||
#
|
||||
# Documentation/process/clang-format.rst
|
||||
# https://clang.llvm.org/docs/ClangFormat.html
|
||||
# https://clang.llvm.org/docs/ClangFormatStyleOptions.html
|
||||
#
|
||||
---
|
||||
AccessModifierOffset: -4
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignConsecutiveDeclarations: false
|
||||
#AlignEscapedNewlines: Left # Unknown to clang-format-4.0
|
||||
AlignOperands: true
|
||||
AlignTrailingComments: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: None
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BraceWrapping:
|
||||
AfterClass: false
|
||||
AfterControlStatement: false
|
||||
AfterEnum: false
|
||||
AfterFunction: true
|
||||
AfterNamespace: true
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
#AfterExternBlock: false # Unknown to clang-format-5.0
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
IndentBraces: false
|
||||
#SplitEmptyFunction: true # Unknown to clang-format-4.0
|
||||
#SplitEmptyRecord: true # Unknown to clang-format-4.0
|
||||
#SplitEmptyNamespace: true # Unknown to clang-format-4.0
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeBraces: Custom
|
||||
#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0
|
||||
BreakBeforeTernaryOperators: false
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: false
|
||||
ColumnLimit: 80
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
#CompactNamespaces: false # Unknown to clang-format-4.0
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||
ConstructorInitializerIndentWidth: 8
|
||||
ContinuationIndentWidth: 8
|
||||
Cpp11BracedListStyle: false
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
#FixNamespaceComments: false # Unknown to clang-format-4.0
|
||||
|
||||
# Taken from:
|
||||
# git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \
|
||||
# | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$, - '\1'," \
|
||||
# | sort | uniq
|
||||
ForEachMacros:
|
||||
- 'list_for_each_entry'
|
||||
- 'list_for_each_entry_safe'
|
||||
- 'mnl_attr_for_each_nested'
|
||||
- 'hlist_for_each'
|
||||
- 'hlist_for_each_safe'
|
||||
- 'hlist_for_each_entry'
|
||||
|
||||
#IncludeBlocks: Preserve # Unknown to clang-format-5.0
|
||||
IncludeCategories:
|
||||
- Regex: '.*'
|
||||
Priority: 1
|
||||
IncludeIsMainRegex: '(Test)?$'
|
||||
IndentCaseLabels: false
|
||||
#IndentPPDirectives: None # Unknown to clang-format-5.0
|
||||
IndentWidth: 8
|
||||
IndentWrappedFunctionNames: false
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: Inner
|
||||
#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0
|
||||
ObjCBlockIndentWidth: 8
|
||||
ObjCSpaceAfterProperty: true
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
|
||||
# Taken from git's rules
|
||||
#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0
|
||||
PenaltyBreakBeforeFirstCallParameter: 30
|
||||
PenaltyBreakComment: 10
|
||||
PenaltyBreakFirstLessLess: 0
|
||||
PenaltyBreakString: 10
|
||||
PenaltyExcessCharacter: 100
|
||||
PenaltyReturnTypeOnItsOwnLine: 60
|
||||
|
||||
PointerAlignment: Right
|
||||
ReflowComments: false
|
||||
SortIncludes: false
|
||||
#SortUsingDeclarations: false # Unknown to clang-format-4.0
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0
|
||||
#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0
|
||||
SpaceBeforeParens: ControlStatements
|
||||
#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 1
|
||||
SpacesInAngles: false
|
||||
SpacesInContainerLiterals: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: Cpp03
|
||||
TabWidth: 8
|
||||
UseTab: Always
|
||||
...
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
# locally generated
|
||||
Config
|
||||
static-syms.h
|
||||
config.*
|
||||
Config
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
|
@ -11,7 +10,6 @@ config.*
|
|||
# cscope
|
||||
cscope.*
|
||||
ncscope.*
|
||||
tags
|
||||
TAGS
|
||||
|
||||
# git files that we don't want to ignore even it they are dot-files
|
||||
|
|
@ -37,5 +35,12 @@ series
|
|||
# tests
|
||||
testsuite/results
|
||||
testsuite/iproute2/iproute2-this
|
||||
testsuite/tools/generate_nlmsg
|
||||
testsuite/tests/ip/link/dev_wo_vf_rate.nl
|
||||
|
||||
# doc files generated at runtime
|
||||
doc/*.aux
|
||||
doc/*.log
|
||||
doc/*.toc
|
||||
doc/*.ps
|
||||
doc/*.dvi
|
||||
doc/*.html
|
||||
doc/*.pdf
|
||||
|
|
|
|||
22
.mailmap
22
.mailmap
|
|
@ -1,22 +0,0 @@
|
|||
#
|
||||
# This list is used by git-shortlog to fix a few botched name translations
|
||||
# in the git archive, either because the author's full name was messed up
|
||||
# and/or not always written the same way, making contributions from the
|
||||
# same person appearing not to be so or badly displayed.
|
||||
#
|
||||
# Format
|
||||
# Full name <goodaddress> <badaddress>
|
||||
Steve Wise <larrystevenwise@gmail.com> <swise@opengridcomputing.com>
|
||||
Steve Wise <larrystevenwise@gmail.com> <swise@chelsio.com>
|
||||
|
||||
Stephen Hemminger <stephen@networkplumber.org> <sthemmin@microsoft.com>
|
||||
Stephen Hemminger <stephen@networkplumber.org> <shemming@brocade.com>
|
||||
Stephen Hemminger <stephen@networkplumber.org> <stephen.hemminger@vyatta.com>
|
||||
Stephen Hemminger <stephen@networkplumber.org> <shemminger@vyatta.com>
|
||||
Stephen Hemminger <stephen@networkplumber.org> <shemminger>
|
||||
Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
|
||||
Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
|
||||
Stephen Hemminger <stephen@networkplumber.org> <osdl.org!shemminger>
|
||||
Stephen Hemminger <stephen@networkplumber.org> <osdl.net!shemminger>
|
||||
|
||||
David Ahern <dsahern@gmail.com> <dsa@cumulusnetworks.com>
|
||||
112
Makefile
112
Makefile
|
|
@ -1,31 +1,12 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
# Top level Makefile for iproute2
|
||||
|
||||
-include config.mk
|
||||
|
||||
ifeq ("$(origin V)", "command line")
|
||||
VERBOSE = $(V)
|
||||
endif
|
||||
ifndef VERBOSE
|
||||
VERBOSE = 0
|
||||
endif
|
||||
|
||||
ifeq ($(VERBOSE),0)
|
||||
MAKEFLAGS += --no-print-directory
|
||||
endif
|
||||
|
||||
PREFIX?=/usr
|
||||
LIBDIR?=$(PREFIX)/lib
|
||||
SBINDIR?=/sbin
|
||||
CONFDIR?=/etc/iproute2
|
||||
NETNS_RUN_DIR?=/var/run/netns
|
||||
NETNS_ETC_DIR?=/etc/netns
|
||||
DATADIR?=$(PREFIX)/share
|
||||
HDRDIR?=$(PREFIX)/include/iproute2
|
||||
DOCDIR?=$(DATADIR)/doc/iproute2
|
||||
MANDIR?=$(DATADIR)/man
|
||||
ARPDDIR?=/var/lib/arpd
|
||||
KERNEL_INCLUDE?=/usr/include
|
||||
BASH_COMPDIR?=$(DATADIR)/bash-completion/completions
|
||||
|
||||
# Path to db_185.h include
|
||||
DBM_INCLUDE:=$(DESTDIR)/usr/include
|
||||
|
|
@ -37,101 +18,70 @@ ifneq ($(SHARED_LIBS),y)
|
|||
DEFINES+= -DNO_SHARED_LIBS
|
||||
endif
|
||||
|
||||
DEFINES+=-DCONFDIR=\"$(CONFDIR)\" \
|
||||
-DNETNS_RUN_DIR=\"$(NETNS_RUN_DIR)\" \
|
||||
-DNETNS_ETC_DIR=\"$(NETNS_ETC_DIR)\"
|
||||
DEFINES+=-DCONFDIR=\"$(CONFDIR)\"
|
||||
|
||||
#options for AX.25
|
||||
ADDLIB+=ax25_ntop.o
|
||||
#options for decnet
|
||||
ADDLIB+=dnet_ntop.o dnet_pton.o
|
||||
|
||||
#options for AX.25
|
||||
ADDLIB+=rose_ntop.o
|
||||
#options for ipx
|
||||
ADDLIB+=ipx_ntop.o ipx_pton.o
|
||||
|
||||
#options for mpls
|
||||
ADDLIB+=mpls_ntop.o mpls_pton.o
|
||||
|
||||
#options for NETROM
|
||||
ADDLIB+=netrom_ntop.o
|
||||
|
||||
CC := gcc
|
||||
HOSTCC ?= $(CC)
|
||||
CC = gcc
|
||||
HOSTCC = gcc
|
||||
DEFINES += -D_GNU_SOURCE
|
||||
# Turn on transparent support for LFS
|
||||
DEFINES += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
|
||||
CCOPTS = -O2 -pipe
|
||||
CCOPTS = -O2
|
||||
WFLAGS := -Wall -Wstrict-prototypes -Wmissing-prototypes
|
||||
WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2
|
||||
|
||||
CFLAGS := $(WFLAGS) $(CCOPTS) -I../include -I../include/uapi $(DEFINES) $(CFLAGS)
|
||||
CFLAGS := $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) $(CFLAGS)
|
||||
YACCFLAGS = -d -t -v
|
||||
|
||||
SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma dcb man vdpa
|
||||
SUBDIRS=lib ip tc bridge misc netem genl tipc man
|
||||
|
||||
LIBNETLINK=../lib/libutil.a ../lib/libnetlink.a
|
||||
LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
|
||||
LDLIBS += $(LIBNETLINK)
|
||||
|
||||
all: config.mk
|
||||
all: Config
|
||||
@set -e; \
|
||||
for i in $(SUBDIRS); \
|
||||
do echo; echo $$i; $(MAKE) -C $$i; done
|
||||
do $(MAKE) $(MFLAGS) -C $$i; done
|
||||
|
||||
.PHONY: clean clobber distclean check cscope version
|
||||
|
||||
help:
|
||||
@echo "Make Targets:"
|
||||
@echo " all - build binaries"
|
||||
@echo " clean - remove products of build"
|
||||
@echo " distclean - remove configuration and build"
|
||||
@echo " install - install binaries on local machine"
|
||||
@echo " check - run tests"
|
||||
@echo " cscope - build cscope database"
|
||||
@echo " version - update version"
|
||||
@echo ""
|
||||
@echo "Make Arguments:"
|
||||
@echo " V=[0|1] - set build verbosity level"
|
||||
|
||||
config.mk:
|
||||
@if [ ! -f config.mk -o configure -nt config.mk ]; then \
|
||||
sh configure $(KERNEL_INCLUDE); \
|
||||
fi
|
||||
Config:
|
||||
sh configure $(KERNEL_INCLUDE)
|
||||
|
||||
install: all
|
||||
install -m 0755 -d $(DESTDIR)$(SBINDIR)
|
||||
install -m 0755 -d $(DESTDIR)$(CONFDIR)
|
||||
install -m 0755 -d $(DESTDIR)$(ARPDDIR)
|
||||
install -m 0755 -d $(DESTDIR)$(HDRDIR)
|
||||
@for i in $(SUBDIRS); do $(MAKE) -C $$i install; done
|
||||
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples
|
||||
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv
|
||||
install -m 0644 README.iproute2+tc $(shell find examples -maxdepth 1 -type f) \
|
||||
$(DESTDIR)$(DOCDIR)/examples
|
||||
install -m 0644 $(shell find examples/diffserv -maxdepth 1 -type f) \
|
||||
$(DESTDIR)$(DOCDIR)/examples/diffserv
|
||||
@for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done
|
||||
install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
|
||||
install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR)
|
||||
install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR)
|
||||
install -m 0644 bash-completion/devlink $(DESTDIR)$(BASH_COMPDIR)
|
||||
install -m 0644 include/bpf_elf.h $(DESTDIR)$(HDRDIR)
|
||||
|
||||
version:
|
||||
echo "static const char version[] = \""`git describe --tags --long`"\";" \
|
||||
> include/version.h
|
||||
snapshot:
|
||||
echo "static const char SNAPSHOT[] = \""`date +%y%m%d`"\";" \
|
||||
> include/SNAPSHOT.h
|
||||
|
||||
clean:
|
||||
@for i in $(SUBDIRS) testsuite; \
|
||||
do $(MAKE) -C $$i clean; done
|
||||
@for i in $(SUBDIRS) doc; \
|
||||
do $(MAKE) $(MFLAGS) -C $$i clean; done
|
||||
|
||||
clobber:
|
||||
touch config.mk
|
||||
$(MAKE) clean
|
||||
rm -f config.mk cscope.*
|
||||
touch Config
|
||||
$(MAKE) $(MFLAGS) clean
|
||||
rm -f Config cscope.*
|
||||
|
||||
distclean: clobber
|
||||
|
||||
check: all
|
||||
$(MAKE) -C testsuite
|
||||
$(MAKE) -C testsuite alltests
|
||||
@if command -v man >/dev/null 2>&1; then \
|
||||
echo "Checking manpages for syntax errors..."; \
|
||||
$(MAKE) -C man check; \
|
||||
else \
|
||||
echo "man not installed, skipping checks for syntax errors."; \
|
||||
fi
|
||||
|
||||
cscope:
|
||||
cscope -b -q -R -Iinclude -sip -slib -smisc -snetem -stc
|
||||
|
||||
|
|
|
|||
33
README
33
README
|
|
@ -1,39 +1,40 @@
|
|||
This is a set of utilities for Linux networking.
|
||||
|
||||
Information:
|
||||
https://wiki.linuxfoundation.org/networking/iproute2
|
||||
http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2
|
||||
|
||||
Download:
|
||||
http://www.kernel.org/pub/linux/utils/net/iproute2/
|
||||
|
||||
Stable version repository:
|
||||
git://git.kernel.org/pub/scm/network/iproute2/iproute2.git
|
||||
|
||||
Development repository:
|
||||
git://git.kernel.org/pub/scm/network/iproute2/iproute2-next.git
|
||||
Repository:
|
||||
git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git
|
||||
|
||||
How to compile this.
|
||||
--------------------
|
||||
1. libdbm
|
||||
|
||||
arpd needs to have the berkeleydb development libraries. For Debian
|
||||
users this is the package with a name like libdbX.X-dev.
|
||||
arpd needs to have the db4 development libraries. For Debian
|
||||
users this is the package with a name like libdb4.x-dev.
|
||||
DBM_INCLUDE points to the directory with db_185.h which
|
||||
is the include file used by arpd to get to the old format Berkeley
|
||||
database routines. Often this is in the db-devel package.
|
||||
|
||||
2. make
|
||||
|
||||
The makefile will automatically build a config.mk file which
|
||||
contains definitions of libraries that may or may not be available
|
||||
on the system such as: ATM, ELF, MNL, and SELINUX.
|
||||
The makefile will automatically build a Config file which
|
||||
contains whether or not ATM is available, etc.
|
||||
|
||||
3. include/uapi
|
||||
3. To make documentation, cd to doc/ directory , then
|
||||
look at start of Makefile and set correct values for
|
||||
PAGESIZE=a4 , ie: a4 , letter ... (string)
|
||||
PAGESPERPAGE=2 , ie: 1 , 2 ... (numeric)
|
||||
and make there. It assumes, that latex, dvips and psnup
|
||||
are in your path.
|
||||
|
||||
This package includes matching sanitized kernel headers because
|
||||
the build environment may not have up to date versions. See Makefile
|
||||
if you have special requirements and need to point at different
|
||||
kernel include files.
|
||||
4. This package includes matching sanitized kernel headers because
|
||||
the build environment may not have up to date versions. See Makefile
|
||||
if you have special requirements and need to point at different
|
||||
kernel include files.
|
||||
|
||||
Stephen Hemminger
|
||||
stephen@networkplumber.org
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
|
||||
Here are a few quick points about DECnet support...
|
||||
|
||||
o iproute2 is the tool of choice for configuring the DECnet support for
|
||||
Linux. For many features, it is the only tool which can be used to
|
||||
configure them.
|
||||
|
||||
o No name resolution is available as yet, all addresses must be
|
||||
entered numerically.
|
||||
|
||||
o Remember to set the hardware address of the interface using:
|
||||
|
||||
ip link set ethX address xx:xx:xx:xx:xx:xx
|
||||
(where xx:xx:xx:xx:xx:xx is the MAC address for your DECnet node
|
||||
address)
|
||||
|
||||
if your Ethernet card won't listen to more than one unicast
|
||||
mac address at once. If the Linux DECnet stack doesn't talk to
|
||||
any other DECnet nodes, then check this with tcpdump and if its
|
||||
a problem, change the mac address (but do this _before_ starting
|
||||
any other network protocol on the interface)
|
||||
|
||||
o Whilst you can use ip addr add to add more than one DECnet address to an
|
||||
interface, don't expect addresses which are not the same as the
|
||||
kernels node address to work properly with 2.4 kernels. This should
|
||||
be fine with 2.6 kernels as the routing code has been extensively
|
||||
modified and improved.
|
||||
|
||||
o The DECnet support is currently self contained. It does not depend on
|
||||
the libdnet library.
|
||||
|
||||
Steve Whitehouse <steve@chygwyn.com>
|
||||
|
||||
17
README.devel
17
README.devel
|
|
@ -4,15 +4,12 @@ development. Most new features require a kernel and a utility component.
|
|||
Please submit both to the Linux networking mailing list
|
||||
<netdev@vger.kernel.org>
|
||||
|
||||
The current source for the stable version is in the git repository:
|
||||
git://git.kernel.org/pub/scm/network/iproute2/iproute2.git
|
||||
The current source is in the git repository:
|
||||
git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git
|
||||
|
||||
The development git repository is available at the following address:
|
||||
git://git.kernel.org/pub/scm/network/iproute2/iproute2-next.git
|
||||
The master branch contains the source corresponding to the current
|
||||
code in the mainline Linux kernel (ie follows Linus). The net-next
|
||||
branch is a temporary branch that tracks the code intended for the
|
||||
next release; it corresponds with networking development branch in
|
||||
the kernel.
|
||||
|
||||
The stable repository contains the source corresponding to the
|
||||
current code in the Linux networking tree (net), which in turn is
|
||||
aligned on the mainline Linux kernel (ie follows Linus).
|
||||
The iproute2-next repository tracks the code intended for the next
|
||||
release; it corresponds with networking development tree (net-next)
|
||||
in the kernel.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,95 @@
|
|||
I. About the distribution tables
|
||||
|
||||
The table used for "synthesizing" the distribution is essentially a scaled,
|
||||
translated, inverse to the cumulative distribution function.
|
||||
|
||||
Here's how to think about it: Let F() be the cumulative distribution
|
||||
function for a probability distribution X. We'll assume we've scaled
|
||||
things so that X has mean 0 and standard deviation 1, though that's not
|
||||
so important here. Then:
|
||||
|
||||
F(x) = P(X <= x) = \int_{-inf}^x f
|
||||
|
||||
where f is the probability density function.
|
||||
|
||||
F is monotonically increasing, so has an inverse function G, with range
|
||||
0 to 1. Here, G(t) = the x such that P(X <= x) = t. (In general, G may
|
||||
have singularities if X has point masses, i.e., points x such that
|
||||
P(X = x) > 0.)
|
||||
|
||||
Now we create a tabular representation of G as follows: Choose some table
|
||||
size N, and for the ith entry, put in G(i/N). Let's call this table T.
|
||||
|
||||
The claim now is, I can create a (discrete) random variable Y whose
|
||||
distribution has the same approximate "shape" as X, simply by letting
|
||||
Y = T(U), where U is a discrete uniform random variable with range 1 to N.
|
||||
To see this, it's enough to show that Y's cumulative distribution function,
|
||||
(let's call it H), is a discrete approximation to F. But
|
||||
|
||||
H(x) = P(Y <= x)
|
||||
= (# of entries in T <= x) / N -- as Y chosen uniformly from T
|
||||
= i/N, where i is the largest integer such that G(i/N) <= x
|
||||
= i/N, where i is the largest integer such that i/N <= F(x)
|
||||
-- since G and F are inverse functions (and F is
|
||||
increasing)
|
||||
= floor(N*F(x))/N
|
||||
|
||||
as desired.
|
||||
|
||||
II. How to create distribution tables (in theory)
|
||||
|
||||
How can we create this table in practice? In some cases, F may have a
|
||||
simple expression which allows evaluating its inverse directly. The
|
||||
Pareto distribution is one example of this. In other cases, and
|
||||
especially for matching an experimentally observed distribution, it's
|
||||
easiest simply to create a table for F and "invert" it. Here, we give
|
||||
a concrete example, namely how the new "experimental" distribution was
|
||||
created.
|
||||
|
||||
1. Collect enough data points to characterize the distribution. Here, I
|
||||
collected 25,000 "ping" roundtrip times to a "distant" point (time.nist.gov).
|
||||
That's far more data than is really necessary, but it was fairly painless to
|
||||
collect it, so...
|
||||
|
||||
2. Normalize the data so that it has mean 0 and standard deviation 1.
|
||||
|
||||
3. Determine the cumulative distribution. The code I wrote creates a table
|
||||
covering the range -10 to +10, with granularity .00005. Obviously, this
|
||||
is absurdly over-precise, but since it's a one-time only computation, I
|
||||
figured it hardly mattered.
|
||||
|
||||
4. Invert the table: for each table entry F(x) = y, make the y*TABLESIZE
|
||||
(here, 4096) entry be x*TABLEFACTOR (here, 8192). This creates a table
|
||||
for the ("normalized") inverse of size TABLESIZE, covering its domain 0
|
||||
to 1 with granularity 1/TABLESIZE. Note that even with the granularity
|
||||
used in creating the table for F, it's possible not all the entries in
|
||||
the table for G will be filled in. So, make a pass through the
|
||||
inverse's table, filling in any missing entries by linear interpolation.
|
||||
|
||||
III. How to create distribution tables (in practice)
|
||||
|
||||
If you want to do all this yourself, I've provided several tools to help:
|
||||
|
||||
1. maketable does the steps 2-4 above, and then generates the appropriate
|
||||
header file. So if you have your own time distribution, you can generate
|
||||
the header simply by:
|
||||
|
||||
maketable < time.values > header.h
|
||||
|
||||
2. As explained in the other README file, the somewhat sleazy way I have
|
||||
of generating correlated values needs correction. You can generate your
|
||||
own correction tables by compiling makesigtable and makemutable with
|
||||
your header file. Check the Makefile to see how this is done.
|
||||
|
||||
3. Warning: maketable, makesigtable and especially makemutable do
|
||||
enormous amounts of floating point arithmetic. Don't try running
|
||||
these on an old 486. (NIST Net itself will run fine on such a
|
||||
system, since in operation, it just needs to do a few simple integral
|
||||
calculations. But getting there takes some work.)
|
||||
|
||||
4. The tables produced are all normalized for mean 0 and standard
|
||||
deviation 1. How do you know what values to use for real? Here, I've
|
||||
provided a simple "stats" utility. Give it a series of floating point
|
||||
values, and it will return their mean (mu), standard deviation (sigma),
|
||||
and correlation coefficient (rho). You can then plug these values
|
||||
directly into NIST Net.
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
iproute2+tc*
|
||||
|
||||
It's the first release of Linux traffic control engine.
|
||||
|
||||
|
||||
NOTES.
|
||||
* csz scheduler is inoperational at the moment, and probably
|
||||
never will be repaired but replaced with h-pfq scheduler.
|
||||
* To use "fw" classifier you will need ipfwchains patch.
|
||||
* No manual available. Ask me, if you have problems (only try to guess
|
||||
answer yourself at first 8)).
|
||||
|
||||
|
||||
Micro-manual how to start it the first time
|
||||
-------------------------------------------
|
||||
|
||||
A. Attach CBQ to eth1:
|
||||
|
||||
tc qdisc add dev eth1 root handle 1: cbq bandwidth 10Mbit allot 1514 cell 8 \
|
||||
avpkt 1000 mpu 64
|
||||
|
||||
B. Add root class:
|
||||
|
||||
tc class add dev eth1 parent 1:0 classid 1:1 cbq bandwidth 10Mbit rate 10Mbit \
|
||||
allot 1514 cell 8 weight 1Mbit prio 8 maxburst 20 avpkt 1000
|
||||
|
||||
C. Add default interactive class:
|
||||
|
||||
tc class add dev eth1 parent 1:1 classid 1:2 cbq bandwidth 10Mbit rate 1Mbit \
|
||||
allot 1514 cell 8 weight 100Kbit prio 3 maxburst 20 avpkt 1000 split 1:0 \
|
||||
defmap c0
|
||||
|
||||
D. Add default class:
|
||||
|
||||
tc class add dev eth1 parent 1:1 classid 1:3 cbq bandwidth 10Mbit rate 8Mbit \
|
||||
allot 1514 cell 8 weight 800Kbit prio 7 maxburst 20 avpkt 1000 split 1:0 \
|
||||
defmap 3f
|
||||
|
||||
etc. etc. etc. Well, it is enough to start 8) The rest can be guessed 8)
|
||||
Look also at more elaborated example, ready to start rsvpd,
|
||||
in rsvp/cbqinit.eth1.
|
||||
|
||||
|
||||
Terminology and advices about setting CBQ parameters may be found in Sally Floyd
|
||||
papers.
|
||||
|
||||
|
||||
Pairs X:Y are class handles, X:0 are qdisc handles.
|
||||
weight should be proportional to rate for leaf classes
|
||||
(I choosed it ten times less, but it is not necessary)
|
||||
|
||||
defmap is bitmap of logical priorities served by this class.
|
||||
|
||||
E. Another qdiscs are simpler. F.e. let's join TBF on class 1:2
|
||||
|
||||
tc qdisc add dev eth1 parent 1:2 tbf rate 64Kbit buffer 5Kb/8 limit 10Kb
|
||||
|
||||
F. Look at all that we created:
|
||||
|
||||
tc qdisc ls dev eth1
|
||||
tc class ls dev eth1
|
||||
|
||||
G. Install "route" classifier on root of cbq and map destination from realm
|
||||
1 to class 1:2
|
||||
|
||||
tc filter add dev eth1 parent 1:0 protocol ip prio 100 route to 1 classid 1:2
|
||||
|
||||
H. Assign routes to 10.11.12.0/24 to realm 1
|
||||
|
||||
ip route add 10.11.12.0/24 dev eth1 via whatever realm 1
|
||||
|
||||
etc. The same thing can be made with rules.
|
||||
I still did not test ipchains, but they should work too.
|
||||
|
||||
|
||||
Setup and code example of BPF classifier and action can be found under
|
||||
examples/bpf/, which should explain everything for getting started.
|
||||
|
||||
|
||||
Setup of rsvp and u32 classifiers is more hairy.
|
||||
If you read RSVP specs, you will understand how rsvp classifier
|
||||
works easily. What's about u32... That's example:
|
||||
|
||||
|
||||
#! /bin/sh
|
||||
|
||||
TC=/home/root/tc
|
||||
|
||||
# Setup classifier root on eth1 root (it is cbq)
|
||||
$TC filter add dev eth1 parent 1:0 prio 5 protocol ip u32
|
||||
|
||||
# Create hash table of 256 slots with ID 1:
|
||||
$TC filter add dev eth1 parent 1:0 prio 5 handle 1: u32 divisor 256
|
||||
|
||||
# Add to 6th slot of hash table rule to select tcp/telnet to 193.233.7.75
|
||||
# direct it to class 1:4 and prescribe to fall to best effort,
|
||||
# if traffic violate TBF (32kbit,5K)
|
||||
$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:6: \
|
||||
match ip dst 193.233.7.75 \
|
||||
match tcp dst 0x17 0xffff \
|
||||
flowid 1:4 \
|
||||
police rate 32kbit buffer 5kb/8 mpu 64 mtu 1514 index 1
|
||||
|
||||
# Add to 1th slot of hash table rule to select icmp to 193.233.7.75
|
||||
# direct it to class 1:4 and prescribe to fall to best effort,
|
||||
# if traffic violate TBF (10kbit,5K)
|
||||
$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:: \
|
||||
sample ip protocol 1 0xff \
|
||||
match ip dst 193.233.7.75 \
|
||||
flowid 1:4 \
|
||||
police rate 10kbit buffer 5kb/8 mpu 64 mtu 1514 index 2
|
||||
|
||||
# Lookup hash table, if it is not fragmented frame
|
||||
# Use protocol as hash key
|
||||
$TC filter add dev eth1 parent 1:0 prio 5 handle ::1 u32 ht 800:: \
|
||||
match ip nofrag \
|
||||
offset mask 0x0F00 shift 6 \
|
||||
hashkey mask 0x00ff0000 at 8 \
|
||||
link 1:
|
||||
|
||||
|
||||
Alexey Kuznetsov
|
||||
kuznet@ms2.inr.ac.ru
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
lnstat - linux networking statistics
|
||||
(C) 2004 Harald Welte <laforge@gnumonks.org
|
||||
======================================================================
|
||||
|
||||
This tool is a generalized and more feature-complete replacement for the old
|
||||
'rtstat' program.
|
||||
|
||||
In addition to routing cache statistics, it supports any kind of statistics
|
||||
the linux kernel exports via a file in /proc/net/stat. In a stock 2.6.9
|
||||
kernel, this is
|
||||
per-protocol neighbour cache statistics
|
||||
(ipv4, ipv6, atm, decnet)
|
||||
routing cache statistics
|
||||
(ipv4)
|
||||
connection tracking statistics
|
||||
(ipv4)
|
||||
|
||||
Please note that lnstat will adopt to any additional statistics that might be
|
||||
added to the kernel at some later point
|
||||
|
||||
I personally always like examples more than any reference documentation, so I
|
||||
list the following examples. If somebody wants to do a manpage, feel free
|
||||
to send me a patch :)
|
||||
|
||||
EXAMPLES:
|
||||
|
||||
In order to get a list of supported statistics files, you can run
|
||||
|
||||
lnstat -d
|
||||
|
||||
It will display something like
|
||||
|
||||
/proc/net/stat/arp_cache:
|
||||
1: entries
|
||||
2: allocs
|
||||
3: destroys
|
||||
[...]
|
||||
/proc/net/stat/rt_cache:
|
||||
1: entries
|
||||
2: in_hit
|
||||
3: in_slow_tot
|
||||
|
||||
You can now select the files/keys you are interested by something like
|
||||
|
||||
lnstat -k arp_cache:entries,rt_cache:in_hit,arp_cache:destroys
|
||||
|
||||
arp_cach|rt_cache|arp_cach|
|
||||
entries| in_hit|destroys|
|
||||
6| 6| 0|
|
||||
6| 0| 0|
|
||||
6| 2| 0|
|
||||
|
||||
|
||||
You can specify the interval (e.g. 10 seconds) by:
|
||||
|
||||
lnstat -i 10
|
||||
|
||||
You can specify to only use one particular statistics file:
|
||||
|
||||
lnstat -f ip_conntrack
|
||||
|
||||
You can specify individual field widths
|
||||
|
||||
lnstat -k arp_cache:entries,rt_cache:entries -w 20,8
|
||||
|
||||
You can specify not to print a header at all
|
||||
|
||||
lnstat -s 0
|
||||
|
||||
You can specify to print a header only at start of the program
|
||||
|
||||
lnstat -s 1
|
||||
|
||||
You can specify to print a header at start and every 20 lines:
|
||||
|
||||
lnstat -s 20
|
||||
|
||||
You can specify the number of samples you want to take (e.g. 5):
|
||||
|
||||
lnstat -c 5
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,809 +0,0 @@
|
|||
# tc(8) completion -*- shell-script -*-
|
||||
# Copyright 2016 6WIND S.A.
|
||||
# Copyright 2016 Quentin Monnet <quentin.monnet@6wind.com>
|
||||
|
||||
QDISC_KIND=' choke codel bfifo pfifo pfifo_head_drop fq fq_codel gred hhf \
|
||||
mqprio multiq netem pfifo_fast pie fq_pie red rr sfb sfq tbf atm \
|
||||
cbq drr dsmark hfsc htb prio qfq '
|
||||
FILTER_KIND=' basic bpf cgroup flow flower fw route rsvp tcindex u32 matchall '
|
||||
ACTION_KIND=' gact mirred bpf sample '
|
||||
|
||||
# Takes a list of words in argument; each one of them is added to COMPREPLY if
|
||||
# it is not already present on the command line. Returns no value.
|
||||
_tc_once_attr()
|
||||
{
|
||||
local w subcword found
|
||||
for w in $*; do
|
||||
found=0
|
||||
for (( subcword=3; subcword < ${#words[@]}-1; subcword++ )); do
|
||||
if [[ $w == ${words[subcword]} ]]; then
|
||||
found=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
[[ $found -eq 0 ]] && \
|
||||
COMPREPLY+=( $( compgen -W "$w" -- "$cur" ) )
|
||||
done
|
||||
}
|
||||
|
||||
# Takes a list of words in argument; each one of them is added to COMPREPLY if
|
||||
# it is not already present on the command line from the provided index. Returns
|
||||
# no value.
|
||||
_tc_once_attr_from()
|
||||
{
|
||||
local w subcword found from=$1
|
||||
shift
|
||||
for w in $*; do
|
||||
found=0
|
||||
for (( subcword=$from; subcword < ${#words[@]}-1; subcword++ )); do
|
||||
if [[ $w == ${words[subcword]} ]]; then
|
||||
found=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
[[ $found -eq 0 ]] && \
|
||||
COMPREPLY+=( $( compgen -W "$w" -- "$cur" ) )
|
||||
done
|
||||
}
|
||||
|
||||
# Takes a list of words in argument; adds them all to COMPREPLY if none of them
|
||||
# is already present on the command line. Returns no value.
|
||||
_tc_one_of_list()
|
||||
{
|
||||
local w subcword
|
||||
for w in $*; do
|
||||
for (( subcword=3; subcword < ${#words[@]}-1; subcword++ )); do
|
||||
[[ $w == ${words[subcword]} ]] && return 1
|
||||
done
|
||||
done
|
||||
COMPREPLY+=( $( compgen -W "$*" -- "$cur" ) )
|
||||
}
|
||||
|
||||
# Takes a list of words in argument; adds them all to COMPREPLY if none of them
|
||||
# is already present on the command line from the provided index. Returns no
|
||||
# value.
|
||||
_tc_one_of_list_from()
|
||||
{
|
||||
local w subcword from=$1
|
||||
shift
|
||||
for w in $*; do
|
||||
for (( subcword=$from; subcword < ${#words[@]}-1; subcword++ )); do
|
||||
[[ $w == ${words[subcword]} ]] && return 1
|
||||
done
|
||||
done
|
||||
COMPREPLY+=( $( compgen -W "$*" -- "$cur" ) )
|
||||
}
|
||||
|
||||
# Returns "$cur ${cur}arg1 ${cur}arg2 ..."
|
||||
_tc_expand_units()
|
||||
{
|
||||
[[ $cur =~ ^[0-9]+ ]] || return 1
|
||||
local value=${cur%%[^0-9]*}
|
||||
[[ $cur == $value ]] && echo $cur
|
||||
echo ${@/#/$value}
|
||||
}
|
||||
|
||||
# Complete based on given word, usually $prev (or possibly the word before),
|
||||
# for when an argument or an option name has but a few possible arguments (so
|
||||
# tc does not take particular commands into account here).
|
||||
# Returns 0 is completion should stop after running this function, 1 otherwise.
|
||||
_tc_direct_complete()
|
||||
{
|
||||
case $1 in
|
||||
# Command options
|
||||
dev)
|
||||
_available_interfaces
|
||||
return 0
|
||||
;;
|
||||
classid)
|
||||
return 0
|
||||
;;
|
||||
estimator)
|
||||
local list=$( _tc_expand_units 'secs' 'msecs' 'usecs' )
|
||||
COMPREPLY+=( $( compgen -W "$list" -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
handle)
|
||||
return 0
|
||||
;;
|
||||
parent|flowid)
|
||||
local i iface ids cmd
|
||||
for (( i=3; i < ${#words[@]}-2; i++ )); do
|
||||
[[ ${words[i]} == dev ]] && iface=${words[i+1]}
|
||||
break
|
||||
done
|
||||
for cmd in qdisc class; do
|
||||
if [[ -n $iface ]]; then
|
||||
ids+=$( tc $cmd show dev $iface 2>/dev/null | \
|
||||
cut -d\ -f 3 )" "
|
||||
else
|
||||
ids+=$( tc $cmd show 2>/dev/null | cut -d\ -f 3 )
|
||||
fi
|
||||
done
|
||||
[[ $ids != " " ]] && \
|
||||
COMPREPLY+=( $( compgen -W "$ids" -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
protocol) # list comes from lib/ll_proto.c
|
||||
COMPREPLY+=( $( compgen -W ' 802.1Q 802.1ad 802_2 802_3 LLDP aarp \
|
||||
all aoe arp atalk atmfate atmmpoa ax25 bpq can control cust \
|
||||
ddcmp dec diag dna_dl dna_rc dna_rt econet ieeepup ieeepupat \
|
||||
ip ipv4 ipv6 ipx irda lat localtalk loop mobitex ppp_disc \
|
||||
ppp_mp ppp_ses ppptalk pup pupat rarp sca snap tipc tr_802_2 \
|
||||
wan_ppp x25' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
prio)
|
||||
return 0
|
||||
;;
|
||||
stab)
|
||||
COMPREPLY+=( $( compgen -W 'mtu tsize mpu overhead
|
||||
linklayer' -- "$cur" ) )
|
||||
;;
|
||||
|
||||
# Qdiscs and classes options
|
||||
alpha|bands|beta|buckets|corrupt|debug|decrement|default|\
|
||||
default_index|depth|direct_qlen|divisor|duplicate|ewma|flow_limit|\
|
||||
flows|hh_limit|increment|indices|linklayer|non_hh_weight|num_tc|\
|
||||
penalty_burst|penalty_rate|prio|priomap|probability|queues|r2q|\
|
||||
reorder|vq|vqs)
|
||||
return 0
|
||||
;;
|
||||
setup)
|
||||
COMPREPLY+=( $( compgen -W 'vqs' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
hw)
|
||||
COMPREPLY+=( $( compgen -W '1 0' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
distribution)
|
||||
COMPREPLY+=( $( compgen -W 'uniform normal pareto
|
||||
paretonormal' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
loss)
|
||||
COMPREPLY+=( $( compgen -W 'random state gmodel' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
|
||||
# Qdiscs and classes options options
|
||||
gap|gmodel|state)
|
||||
return 0
|
||||
;;
|
||||
|
||||
# Filters options
|
||||
map)
|
||||
COMPREPLY+=( $( compgen -W 'key' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
hash)
|
||||
COMPREPLY+=( $( compgen -W 'keys' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
indev)
|
||||
_available_interfaces
|
||||
return 0
|
||||
;;
|
||||
eth_type)
|
||||
COMPREPLY+=( $( compgen -W 'ipv4 ipv6' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
ip_proto)
|
||||
COMPREPLY+=( $( compgen -W 'tcp udp' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
|
||||
# Filters options options
|
||||
key|keys)
|
||||
[[ ${words[@]} =~ graft ]] && return 1
|
||||
COMPREPLY+=( $( compgen -W 'src dst proto proto-src proto-dst iif \
|
||||
priority mark nfct nfct-src nfct-dst nfct-proto-src \
|
||||
nfct-proto-dst rt-classid sk-uid sk-gid vlan-tag rxhash' -- \
|
||||
"$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
|
||||
# BPF options - used for filters, actions, and exec
|
||||
export|bytecode|bytecode-file|object-file)
|
||||
_filedir
|
||||
return 0
|
||||
;;
|
||||
object-pinned|graft) # Pinned object is probably under /sys/fs/bpf/
|
||||
[[ -n "$cur" ]] && _filedir && return 0
|
||||
COMPREPLY=( $( compgen -G "/sys/fs/bpf/*" -- "$cur" ) ) || _filedir
|
||||
compopt -o nospace
|
||||
return 0
|
||||
;;
|
||||
section)
|
||||
if (type objdump > /dev/null 2>&1) ; then
|
||||
local fword objfile section_list
|
||||
for (( fword=3; fword < ${#words[@]}-3; fword++ )); do
|
||||
if [[ ${words[fword]} == object-file ]]; then
|
||||
objfile=${words[fword+1]}
|
||||
break
|
||||
fi
|
||||
done
|
||||
section_list=$( objdump -h $objfile 2>/dev/null | \
|
||||
sed -n 's/^ *[0-9]\+ \([^ ]*\) *.*/\1/p' )
|
||||
COMPREPLY+=( $( compgen -W "$section_list" -- "$cur" ) )
|
||||
fi
|
||||
return 0
|
||||
;;
|
||||
import|run)
|
||||
_filedir
|
||||
return 0
|
||||
;;
|
||||
type)
|
||||
COMPREPLY+=( $( compgen -W 'cls act' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
|
||||
# Actions options
|
||||
random)
|
||||
_tc_one_of_list 'netrand determ'
|
||||
return 0
|
||||
;;
|
||||
|
||||
# Units for option arguments
|
||||
bandwidth|maxrate|peakrate|rate)
|
||||
local list=$( _tc_expand_units 'bit' \
|
||||
'kbit' 'kibit' 'kbps' 'kibps' \
|
||||
'mbit' 'mibit' 'mbps' 'mibps' \
|
||||
'gbit' 'gibit' 'gbps' 'gibps' \
|
||||
'tbit' 'tibit' 'tbps' 'tibps' )
|
||||
COMPREPLY+=( $( compgen -W "$list" -- "$cur" ) )
|
||||
;;
|
||||
admit_bytes|avpkt|burst|cell|initial_quantum|limit|max|min|mtu|mpu|\
|
||||
overhead|quantum|redflowlist)
|
||||
local list=$( _tc_expand_units \
|
||||
'b' 'kbit' 'k' 'mbit' 'm' 'gbit' 'g' )
|
||||
COMPREPLY+=( $( compgen -W "$list" -- "$cur" ) )
|
||||
;;
|
||||
db|delay|evict_timeout|interval|latency|perturb|rehash|reset_timeout|\
|
||||
target|tupdate)
|
||||
local list=$( _tc_expand_units 'secs' 'msecs' 'usecs' )
|
||||
COMPREPLY+=( $( compgen -W "$list" -- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
return 1
|
||||
}
|
||||
|
||||
# Complete with options names for qdiscs. Each qdisc has its own set of options
|
||||
# and it seems we cannot really parse it from anywhere, so we add it manually
|
||||
# in this function.
|
||||
# Returns 0 is completion should stop after running this function, 1 otherwise.
|
||||
_tc_qdisc_options()
|
||||
{
|
||||
case $1 in
|
||||
choke)
|
||||
_tc_once_attr 'limit bandwidth ecn min max burst'
|
||||
return 0
|
||||
;;
|
||||
codel)
|
||||
_tc_once_attr 'limit target interval'
|
||||
_tc_one_of_list 'ecn noecn'
|
||||
return 0
|
||||
;;
|
||||
bfifo|pfifo|pfifo_head_drop)
|
||||
_tc_once_attr 'limit'
|
||||
return 0
|
||||
;;
|
||||
fq)
|
||||
_tc_once_attr 'limit flow_limit quantum initial_quantum maxrate \
|
||||
buckets'
|
||||
_tc_one_of_list 'pacing nopacing'
|
||||
return 0
|
||||
;;
|
||||
fq_codel)
|
||||
_tc_once_attr 'limit flows target interval quantum'
|
||||
_tc_one_of_list 'ecn noecn'
|
||||
return 0
|
||||
;;
|
||||
gred)
|
||||
_tc_once_attr 'setup vqs default grio vq prio limit min max avpkt \
|
||||
burst probability bandwidth ecn harddrop'
|
||||
return 0
|
||||
;;
|
||||
hhf)
|
||||
_tc_once_attr 'limit quantum hh_limit reset_timeout admit_bytes \
|
||||
evict_timeout non_hh_weight'
|
||||
return 0
|
||||
;;
|
||||
mqprio)
|
||||
_tc_once_attr 'num_tc map queues hw'
|
||||
return 0
|
||||
;;
|
||||
netem)
|
||||
_tc_once_attr 'delay distribution corrupt duplicate loss ecn \
|
||||
reorder rate'
|
||||
return 0
|
||||
;;
|
||||
pie)
|
||||
_tc_once_attr 'limit target tupdate alpha beta'
|
||||
_tc_one_of_list 'bytemode nobytemode'
|
||||
_tc_one_of_list 'ecn noecn'
|
||||
_tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
|
||||
return 0
|
||||
;;
|
||||
fq_pie)
|
||||
_tc_once_attr 'limit flows target tupdate \
|
||||
alpha beta quantum memory_limit ecn_prob'
|
||||
_tc_one_of_list 'ecn noecn'
|
||||
_tc_one_of_list 'bytemode nobytemode'
|
||||
_tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
|
||||
return 0
|
||||
;;
|
||||
red)
|
||||
_tc_once_attr 'limit min max avpkt burst adaptive probability \
|
||||
bandwidth ecn harddrop'
|
||||
return 0
|
||||
;;
|
||||
rr|prio)
|
||||
_tc_once_attr 'bands priomap multiqueue'
|
||||
return 0
|
||||
;;
|
||||
sfb)
|
||||
_tc_once_attr 'rehash db limit max target increment decrement \
|
||||
penalty_rate penalty_burst'
|
||||
return 0
|
||||
;;
|
||||
sfq)
|
||||
_tc_once_attr 'limit perturb quantum divisor flows depth headdrop \
|
||||
redflowlimit min max avpkt burst probability ecn harddrop'
|
||||
return 0
|
||||
;;
|
||||
tbf)
|
||||
_tc_once_attr 'limit burst rate mtu peakrate latency overhead \
|
||||
linklayer'
|
||||
return 0
|
||||
;;
|
||||
cbq)
|
||||
_tc_once_attr 'bandwidth avpkt mpu cell ewma'
|
||||
return 0
|
||||
;;
|
||||
dsmark)
|
||||
_tc_once_attr 'indices default_index set_tc_index'
|
||||
return 0
|
||||
;;
|
||||
hfsc)
|
||||
_tc_once_attr 'default'
|
||||
return 0
|
||||
;;
|
||||
htb)
|
||||
_tc_once_attr 'default r2q direct_qlen debug'
|
||||
return 0
|
||||
;;
|
||||
multiq|pfifo_fast|atm|drr|qfq)
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
return 1
|
||||
}
|
||||
|
||||
# Complete with options names for BPF filters or actions.
|
||||
# Returns 0 is completion should stop after running this function, 1 otherwise.
|
||||
_tc_bpf_options()
|
||||
{
|
||||
[[ ${words[${#words[@]}-3]} == object-file ]] && \
|
||||
_tc_once_attr 'section export'
|
||||
[[ ${words[${#words[@]}-5]} == object-file ]] && \
|
||||
[[ ${words[${#words[@]}-3]} =~ (section|export) ]] && \
|
||||
_tc_once_attr 'section export'
|
||||
_tc_one_of_list 'bytecode bytecode-file object-file object-pinned'
|
||||
_tc_once_attr 'verbose index direct-action action classid'
|
||||
return 0
|
||||
}
|
||||
|
||||
# Complete with options names for filter actions.
|
||||
# This function is recursive, thus allowing multiple actions statement to be
|
||||
# parsed.
|
||||
# Returns 0 is completion should stop after running this function, 1 otherwise.
|
||||
_tc_filter_action_options()
|
||||
{
|
||||
for ((acwd=$1; acwd < ${#words[@]}-1; acwd++));
|
||||
do
|
||||
if [[ action == ${words[acwd]} ]]; then
|
||||
_tc_filter_action_options $((acwd+1)) && return 0
|
||||
fi
|
||||
done
|
||||
|
||||
local action acwd
|
||||
for ((acwd=$1; acwd < ${#words[@]}-1; acwd++)); do
|
||||
if [[ $ACTION_KIND =~ ' '${words[acwd]}' ' ]]; then
|
||||
_tc_one_of_list_from $acwd action
|
||||
_tc_action_options $acwd && return 0
|
||||
fi
|
||||
done
|
||||
_tc_one_of_list_from $acwd $ACTION_KIND
|
||||
return 0
|
||||
}
|
||||
|
||||
# Complete with options names for filters.
|
||||
# Returns 0 is completion should stop after running this function, 1 otherwise.
|
||||
_tc_filter_options()
|
||||
{
|
||||
|
||||
for ((acwd=$1; acwd < ${#words[@]}-1; acwd++));
|
||||
do
|
||||
if [[ action == ${words[acwd]} ]]; then
|
||||
_tc_filter_action_options $((acwd+1)) && return 0
|
||||
fi
|
||||
done
|
||||
|
||||
filter=${words[$1]}
|
||||
case $filter in
|
||||
basic)
|
||||
_tc_once_attr 'match action classid'
|
||||
return 0
|
||||
;;
|
||||
bpf)
|
||||
_tc_bpf_options
|
||||
return 0
|
||||
;;
|
||||
cgroup)
|
||||
_tc_once_attr 'match action'
|
||||
return 0
|
||||
;;
|
||||
flow)
|
||||
local i
|
||||
for (( i=5; i < ${#words[@]}-1; i++ )); do
|
||||
if [[ ${words[i]} =~ ^keys?$ ]]; then
|
||||
_tc_direct_complete 'key'
|
||||
COMPREPLY+=( $( compgen -W 'or and xor rshift addend' -- \
|
||||
"$cur" ) )
|
||||
break
|
||||
fi
|
||||
done
|
||||
_tc_once_attr 'map hash divisor baseclass match action'
|
||||
return 0
|
||||
;;
|
||||
matchall)
|
||||
_tc_once_attr 'action classid skip_sw skip_hw'
|
||||
return 0
|
||||
;;
|
||||
flower)
|
||||
_tc_once_attr 'action classid indev dst_mac src_mac eth_type \
|
||||
ip_proto dst_ip src_ip dst_port src_port'
|
||||
return 0
|
||||
;;
|
||||
fw)
|
||||
_tc_once_attr 'action classid'
|
||||
return 0
|
||||
;;
|
||||
route)
|
||||
_tc_one_of_list 'from fromif'
|
||||
_tc_once_attr 'to classid action'
|
||||
return 0
|
||||
;;
|
||||
rsvp)
|
||||
_tc_once_attr 'ipproto session sender classid action tunnelid \
|
||||
tunnel flowlabel spi/ah spi/esp u8 u16 u32'
|
||||
[[ ${words[${#words[@]}-3]} == tunnel ]] && \
|
||||
COMPREPLY+=( $( compgen -W 'skip' -- "$cur" ) )
|
||||
[[ ${words[${#words[@]}-3]} =~ u(8|16|32) ]] && \
|
||||
COMPREPLY+=( $( compgen -W 'mask' -- "$cur" ) )
|
||||
[[ ${words[${#words[@]}-3]} == mask ]] && \
|
||||
COMPREPLY+=( $( compgen -W 'at' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
tcindex)
|
||||
_tc_once_attr 'hash mask shift classid action'
|
||||
_tc_one_of_list 'pass_on fall_through'
|
||||
return 0
|
||||
;;
|
||||
u32)
|
||||
_tc_once_attr 'match link classid action offset ht hashkey sample'
|
||||
COMPREPLY+=( $( compgen -W 'ip ip6 udp tcp icmp u8 u16 u32 mark \
|
||||
divisor' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
return 1
|
||||
}
|
||||
|
||||
# Complete with options names for actions.
|
||||
# Returns 0 is completion should stop after running this function, 1 otherwise.
|
||||
_tc_action_options()
|
||||
{
|
||||
local from=$1
|
||||
local action=${words[from]}
|
||||
case $action in
|
||||
bpf)
|
||||
_tc_bpf_options
|
||||
return 0
|
||||
;;
|
||||
mirred)
|
||||
_tc_one_of_list_from $from 'ingress egress'
|
||||
_tc_one_of_list_from $from 'mirror redirect'
|
||||
_tc_once_attr_from $from 'index dev'
|
||||
return 0
|
||||
;;
|
||||
sample)
|
||||
_tc_once_attr_from $from 'rate'
|
||||
_tc_once_attr_from $from 'trunc'
|
||||
_tc_once_attr_from $from 'group'
|
||||
return 0
|
||||
;;
|
||||
gact)
|
||||
_tc_one_of_list_from $from 'reclassify drop continue pass'
|
||||
_tc_once_attr_from $from 'random'
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
return 1
|
||||
}
|
||||
|
||||
# Complete with options names for exec.
|
||||
# Returns 0 is completion should stop after running this function, 1 otherwise.
|
||||
_tc_exec_options()
|
||||
{
|
||||
case $1 in
|
||||
import)
|
||||
[[ ${words[${#words[@]}-3]} == import ]] && \
|
||||
_tc_once_attr 'run'
|
||||
return 0
|
||||
;;
|
||||
graft)
|
||||
COMPREPLY+=( $( compgen -W 'key type' -- "$cur" ) )
|
||||
[[ ${words[${#words[@]}-3]} == object-file ]] && \
|
||||
_tc_once_attr 'type'
|
||||
_tc_bpf_options
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
return 1
|
||||
}
|
||||
|
||||
# Main completion function
|
||||
# Logic is as follows:
|
||||
# 1. Check if previous word is a global option; if so, propose arguments.
|
||||
# 2. Check if current word is a global option; if so, propose completion.
|
||||
# 3. Check for the presence of a main command (qdisc|class|filter|...). If
|
||||
# there is one, first call _tc_direct_complete to see if previous word is
|
||||
# waiting for a particular completion. If so, propose completion and exit.
|
||||
# 4. Extract main command and -- if available -- its subcommand
|
||||
# (add|delete|show|...).
|
||||
# 5. Propose completion based on main and sub- command in use. Additional
|
||||
# functions may be called for qdiscs, classes or filter options.
|
||||
_tc()
|
||||
{
|
||||
local cur prev words cword
|
||||
_init_completion || return
|
||||
|
||||
case $prev in
|
||||
-V|-Version)
|
||||
return 0
|
||||
;;
|
||||
-b|-batch|-cf|-conf)
|
||||
_filedir
|
||||
return 0
|
||||
;;
|
||||
-force)
|
||||
COMPREPLY=( $( compgen -W '-batch' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
-nm|name)
|
||||
[[ -r /etc/iproute2/tc_cls ]] || \
|
||||
COMPREPLY=( $( compgen -W '-conf' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
-n|-net|-netns)
|
||||
local nslist=$( ip netns list 2>/dev/null )
|
||||
COMPREPLY+=( $( compgen -W "$nslist" -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
-tshort)
|
||||
_tc_once_attr '-statistics'
|
||||
COMPREPLY+=( $( compgen -W 'monitor' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
-timestamp)
|
||||
_tc_once_attr '-statistics -tshort'
|
||||
COMPREPLY+=( $( compgen -W 'monitor' -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
|
||||
# Search for main commands
|
||||
local subcword cmd subcmd
|
||||
for (( subcword=1; subcword < ${#words[@]}-1; subcword++ )); do
|
||||
[[ ${words[subcword]} == -b?(atch) ]] && return 0
|
||||
[[ -n $cmd ]] && subcmd=${words[subcword]} && break
|
||||
[[ ${words[subcword]} != -* && \
|
||||
${words[subcword-1]} != -@(n?(et?(ns))|c?(on)f) ]] && \
|
||||
cmd=${words[subcword]}
|
||||
done
|
||||
|
||||
if [[ -z $cmd ]]; then
|
||||
case $cur in
|
||||
-*)
|
||||
local c='-Version -statistics -details -raw -pretty \
|
||||
-iec -graphe -batch -name -netns -timestamp'
|
||||
[[ $cword -eq 1 ]] && c+=' -force'
|
||||
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
*)
|
||||
COMPREPLY=( $( compgen -W "help $( tc help 2>&1 | \
|
||||
command sed \
|
||||
-e '/OBJECT := /!d' \
|
||||
-e 's/.*{//' \
|
||||
-e 's/}.*//' \
|
||||
-e \ 's/|//g' )" -- "$cur" ) )
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
[[ $subcmd == help ]] && return 0
|
||||
|
||||
# For this set of commands we may create COMPREPLY just by analysing the
|
||||
# previous word, if it expects for a specific list of options or values.
|
||||
if [[ $cmd =~ (qdisc|class|filter|action|exec) ]]; then
|
||||
_tc_direct_complete $prev && return 0
|
||||
if [[ ${words[${#words[@]}-3]} == estimator ]]; then
|
||||
local list=$( _tc_expand_units 'secs' 'msecs' 'usecs' )
|
||||
COMPREPLY+=( $( compgen -W "$list" -- "$cur" ) ) && return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Completion depends on main command and subcommand in use.
|
||||
case $cmd in
|
||||
qdisc)
|
||||
case $subcmd in
|
||||
add|change|replace|link|del|delete)
|
||||
if [[ $(($cword-$subcword)) -eq 1 ]]; then
|
||||
COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) )
|
||||
return 0
|
||||
fi
|
||||
local qdisc qdwd
|
||||
for ((qdwd=$subcword; qdwd < ${#words[@]}-1; qdwd++)); do
|
||||
if [[ $QDISC_KIND =~ ' '${words[qdwd]}' ' ]]; then
|
||||
qdisc=${words[qdwd]}
|
||||
_tc_qdisc_options $qdisc && return 0
|
||||
fi
|
||||
done
|
||||
_tc_one_of_list $QDISC_KIND
|
||||
_tc_one_of_list 'root ingress parent clsact'
|
||||
_tc_once_attr 'handle estimator stab'
|
||||
;;
|
||||
show)
|
||||
_tc_once_attr 'dev'
|
||||
_tc_one_of_list 'ingress clsact'
|
||||
_tc_once_attr '-statistics -details -raw -pretty -iec \
|
||||
-graph -name'
|
||||
;;
|
||||
help)
|
||||
return 0
|
||||
;;
|
||||
*)
|
||||
[[ $cword -eq $subcword ]] && \
|
||||
COMPREPLY=( $( compgen -W 'help add delete change \
|
||||
replace link show' -- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
class)
|
||||
case $subcmd in
|
||||
add|change|replace|del|delete)
|
||||
if [[ $(($cword-$subcword)) -eq 1 ]]; then
|
||||
COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) )
|
||||
return 0
|
||||
fi
|
||||
local qdisc qdwd
|
||||
for ((qdwd=$subcword; qdwd < ${#words[@]}-1; qdwd++)); do
|
||||
if [[ $QDISC_KIND =~ ' '${words[qdwd]}' ' ]]; then
|
||||
qdisc=${words[qdwd]}
|
||||
_tc_qdisc_options $qdisc && return 0
|
||||
fi
|
||||
done
|
||||
_tc_one_of_list $QDISC_KIND
|
||||
_tc_one_of_list 'root parent'
|
||||
_tc_once_attr 'classid'
|
||||
;;
|
||||
show)
|
||||
_tc_once_attr 'dev'
|
||||
_tc_one_of_list 'root parent'
|
||||
_tc_once_attr '-statistics -details -raw -pretty -iec \
|
||||
-graph -name'
|
||||
;;
|
||||
help)
|
||||
return 0
|
||||
;;
|
||||
*)
|
||||
[[ $cword -eq $subcword ]] && \
|
||||
COMPREPLY=( $( compgen -W 'help add delete change \
|
||||
replace show' -- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
filter)
|
||||
case $subcmd in
|
||||
add|change|replace|del|delete)
|
||||
if [[ $(($cword-$subcword)) -eq 1 ]]; then
|
||||
COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) )
|
||||
return 0
|
||||
fi
|
||||
local filter fltwd
|
||||
for ((fltwd=$subcword; fltwd < ${#words[@]}-1; fltwd++));
|
||||
do
|
||||
if [[ $FILTER_KIND =~ ' '${words[fltwd]}' ' ]]; then
|
||||
_tc_filter_options $fltwd && return 0
|
||||
fi
|
||||
done
|
||||
_tc_one_of_list $FILTER_KIND
|
||||
_tc_one_of_list 'root ingress egress parent'
|
||||
_tc_once_attr 'handle estimator pref protocol'
|
||||
;;
|
||||
show)
|
||||
_tc_once_attr 'dev'
|
||||
_tc_one_of_list 'root ingress egress parent'
|
||||
_tc_once_attr '-statistics -details -raw -pretty -iec \
|
||||
-graph -name'
|
||||
;;
|
||||
help)
|
||||
return 0
|
||||
;;
|
||||
*)
|
||||
[[ $cword -eq $subcword ]] && \
|
||||
COMPREPLY=( $( compgen -W 'help add delete change \
|
||||
replace show' -- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
action)
|
||||
case $subcmd in
|
||||
add|change|replace)
|
||||
local action acwd
|
||||
for ((acwd=$subcword; acwd < ${#words[@]}-1; acwd++)); do
|
||||
if [[ $ACTION_KIND =~ ' '${words[acwd]}' ' ]]; then
|
||||
_tc_action_options $acwd && return 0
|
||||
fi
|
||||
done
|
||||
_tc_one_of_list $ACTION_KIND
|
||||
;;
|
||||
get|del|delete)
|
||||
_tc_once_attr 'index'
|
||||
;;
|
||||
lst|list|flush|show)
|
||||
_tc_one_of_list $ACTION_KIND
|
||||
;;
|
||||
*)
|
||||
[[ $cword -eq $subcword ]] && \
|
||||
COMPREPLY=( $( compgen -W 'help add delete change \
|
||||
replace show list flush action' -- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
monitor)
|
||||
COMPREPLY=( $( compgen -W 'help' -- "$cur" ) )
|
||||
;;
|
||||
|
||||
exec)
|
||||
case $subcmd in
|
||||
bpf)
|
||||
local excmd exwd EXEC_KIND=' import debug graft '
|
||||
for ((exwd=$subcword; exwd < ${#words[@]}-1; exwd++)); do
|
||||
if [[ $EXEC_KIND =~ ' '${words[exwd]}' ' ]]; then
|
||||
excmd=${words[exwd]}
|
||||
_tc_exec_options $excmd && return 0
|
||||
fi
|
||||
done
|
||||
_tc_one_of_list $EXEC_KIND
|
||||
;;
|
||||
*)
|
||||
[[ $cword -eq $subcword ]] && \
|
||||
COMPREPLY=( $( compgen -W 'bpf' -- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
} &&
|
||||
complete -F _tc tc
|
||||
|
||||
# ex: ts=4 sw=4 et filetype=sh
|
||||
|
|
@ -1,15 +1,18 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
BROBJ = bridge.o fdb.o monitor.o link.o mdb.o vlan.o
|
||||
|
||||
include ../config.mk
|
||||
include ../Config
|
||||
|
||||
ifeq ($(IP_CONFIG_SETNS),y)
|
||||
CFLAGS += -DHAVE_SETNS
|
||||
endif
|
||||
|
||||
all: bridge
|
||||
|
||||
bridge: $(BROBJ) $(LIBNETLINK)
|
||||
$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
|
||||
bridge: $(BROBJ) $(LIBNETLINK)
|
||||
|
||||
install: all
|
||||
install -m 0755 bridge $(DESTDIR)$(SBINDIR)
|
||||
|
||||
clean:
|
||||
rm -f $(BROBJ) bridge
|
||||
|
||||
|
|
|
|||
|
|
@ -1,31 +1,20 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
extern int print_linkinfo(const struct sockaddr_nl *who,
|
||||
struct nlmsghdr *n,
|
||||
void *arg);
|
||||
extern int print_fdb(const struct sockaddr_nl *who,
|
||||
struct nlmsghdr *n, void *arg);
|
||||
extern int print_mdb(const struct sockaddr_nl *who,
|
||||
struct nlmsghdr *n, void *arg);
|
||||
|
||||
#define MDB_RTA(r) \
|
||||
((struct rtattr *)(((char *)(r)) + RTA_ALIGN(sizeof(struct br_mdb_entry))))
|
||||
|
||||
#define MDB_RTR_RTA(r) \
|
||||
((struct rtattr *)(((char *)(r)) + RTA_ALIGN(sizeof(__u32))))
|
||||
|
||||
void print_vlan_info(struct rtattr *tb, int ifindex);
|
||||
int print_linkinfo(struct nlmsghdr *n, void *arg);
|
||||
int print_mdb_mon(struct nlmsghdr *n, void *arg);
|
||||
int print_fdb(struct nlmsghdr *n, void *arg);
|
||||
void print_stp_state(__u8 state);
|
||||
int parse_stp_state(const char *arg);
|
||||
int print_vlan_rtm(struct nlmsghdr *n, void *arg, bool monitor,
|
||||
bool global_only);
|
||||
void br_print_router_port_stats(struct rtattr *pattr);
|
||||
|
||||
int do_fdb(int argc, char **argv);
|
||||
int do_mdb(int argc, char **argv);
|
||||
int do_monitor(int argc, char **argv);
|
||||
int do_vlan(int argc, char **argv);
|
||||
int do_link(int argc, char **argv);
|
||||
extern int do_fdb(int argc, char **argv);
|
||||
extern int do_mdb(int argc, char **argv);
|
||||
extern int do_monitor(int argc, char **argv);
|
||||
extern int do_vlan(int argc, char **argv);
|
||||
extern int do_link(int argc, char **argv);
|
||||
|
||||
extern int preferred_family;
|
||||
extern int show_stats;
|
||||
extern int show_details;
|
||||
extern int timestamp;
|
||||
extern int compress_vlans;
|
||||
extern int json;
|
||||
extern struct rtnl_handle rth;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Get/set/delete bridge with netlink
|
||||
*
|
||||
|
|
@ -10,25 +9,21 @@
|
|||
#include <unistd.h>
|
||||
#include <sys/socket.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "version.h"
|
||||
#include "SNAPSHOT.h"
|
||||
#include "utils.h"
|
||||
#include "br_common.h"
|
||||
#include "namespace.h"
|
||||
#include "color.h"
|
||||
|
||||
struct rtnl_handle rth = { .fd = -1 };
|
||||
int preferred_family = AF_UNSPEC;
|
||||
int oneline;
|
||||
int resolve_hosts;
|
||||
int oneline = 0;
|
||||
int show_stats;
|
||||
int show_details;
|
||||
static int color;
|
||||
int compress_vlans;
|
||||
int json;
|
||||
int timestamp;
|
||||
static const char *batch_file;
|
||||
int force;
|
||||
char * _SL_ = NULL;
|
||||
|
||||
static void usage(void) __attribute__((noreturn));
|
||||
|
||||
|
|
@ -36,11 +31,10 @@ static void usage(void)
|
|||
{
|
||||
fprintf(stderr,
|
||||
"Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n"
|
||||
" bridge [ -force ] -batch filename\n"
|
||||
"where OBJECT := { link | fdb | mdb | vlan | monitor }\n"
|
||||
" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n"
|
||||
" -o[neline] | -t[imestamp] | -n[etns] name |\n"
|
||||
" -c[ompressvlans] -color -p[retty] -j[son] }\n");
|
||||
" -c[ompressvlans] }\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
|
|
@ -54,9 +48,9 @@ static const struct cmd {
|
|||
const char *cmd;
|
||||
int (*func)(int argc, char **argv);
|
||||
} cmds[] = {
|
||||
{ "link", do_link },
|
||||
{ "fdb", do_fdb },
|
||||
{ "mdb", do_mdb },
|
||||
{ "link", do_link },
|
||||
{ "fdb", do_fdb },
|
||||
{ "mdb", do_mdb },
|
||||
{ "vlan", do_vlan },
|
||||
{ "monitor", do_monitor },
|
||||
{ "help", do_help },
|
||||
|
|
@ -72,40 +66,16 @@ static int do_cmd(const char *argv0, int argc, char **argv)
|
|||
return c->func(argc-1, argv+1);
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
"Object \"%s\" is unknown, try \"bridge help\".\n", argv0);
|
||||
fprintf(stderr, "Object \"%s\" is unknown, try \"bridge help\".\n", argv0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int br_batch_cmd(int argc, char *argv[], void *data)
|
||||
{
|
||||
return do_cmd(argv[0], argc, argv);
|
||||
}
|
||||
|
||||
static int batch(const char *name)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (rtnl_open(&rth, 0) < 0) {
|
||||
fprintf(stderr, "Cannot open rtnetlink\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
rtnl_set_strict_dump(&rth);
|
||||
|
||||
ret = do_batch(name, force, br_batch_cmd, NULL);
|
||||
|
||||
rtnl_close(&rth);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
while (argc > 1) {
|
||||
const char *opt = argv[1];
|
||||
|
||||
if (strcmp(opt, "--") == 0) {
|
||||
char *opt = argv[1];
|
||||
if (strcmp(opt,"--") == 0) {
|
||||
argc--; argv++;
|
||||
break;
|
||||
}
|
||||
|
|
@ -117,7 +87,7 @@ main(int argc, char **argv)
|
|||
if (matches(opt, "-help") == 0) {
|
||||
usage();
|
||||
} else if (matches(opt, "-Version") == 0) {
|
||||
printf("bridge utility, %s\n", version);
|
||||
printf("bridge utility, 0.0\n");
|
||||
exit(0);
|
||||
} else if (matches(opt, "-stats") == 0 ||
|
||||
matches(opt, "-statistics") == 0) {
|
||||
|
|
@ -128,7 +98,7 @@ main(int argc, char **argv)
|
|||
++oneline;
|
||||
} else if (matches(opt, "-timestamp") == 0) {
|
||||
++timestamp;
|
||||
} else if (matches(opt, "-family") == 0) {
|
||||
} else if (matches(opt, "-family") == 0) {
|
||||
argc--;
|
||||
argv++;
|
||||
if (argc <= 1)
|
||||
|
|
@ -149,42 +119,20 @@ main(int argc, char **argv)
|
|||
NEXT_ARG();
|
||||
if (netns_switch(argv[1]))
|
||||
exit(-1);
|
||||
} else if (matches_color(opt, &color)) {
|
||||
} else if (matches(opt, "-compressvlans") == 0) {
|
||||
++compress_vlans;
|
||||
} else if (matches(opt, "-force") == 0) {
|
||||
++force;
|
||||
} else if (matches(opt, "-json") == 0) {
|
||||
++json;
|
||||
} else if (matches(opt, "-pretty") == 0) {
|
||||
++pretty;
|
||||
} else if (matches(opt, "-batch") == 0) {
|
||||
argc--;
|
||||
argv++;
|
||||
if (argc <= 1)
|
||||
usage();
|
||||
batch_file = argv[1];
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"Option \"%s\" is unknown, try \"bridge help\".\n",
|
||||
opt);
|
||||
fprintf(stderr, "Option \"%s\" is unknown, try \"bridge help\".\n", opt);
|
||||
exit(-1);
|
||||
}
|
||||
argc--; argv++;
|
||||
}
|
||||
|
||||
_SL_ = oneline ? "\\" : "\n";
|
||||
|
||||
check_enable_color(color, json);
|
||||
|
||||
if (batch_file)
|
||||
return batch(batch_file);
|
||||
_SL_ = oneline ? "\\" : "\n" ;
|
||||
|
||||
if (rtnl_open(&rth, 0) < 0)
|
||||
exit(1);
|
||||
|
||||
rtnl_set_strict_dump(&rth);
|
||||
|
||||
if (argc > 1)
|
||||
return do_cmd(argv[1], argc-1, argv+1);
|
||||
|
||||
|
|
|
|||
502
bridge/fdb.c
502
bridge/fdb.c
|
|
@ -1,4 +1,3 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Get/set/delete fdb table with netlink
|
||||
*
|
||||
|
|
@ -22,33 +21,24 @@
|
|||
#include <linux/neighbour.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "json_print.h"
|
||||
#include "libnetlink.h"
|
||||
#include "br_common.h"
|
||||
#include "rt_names.h"
|
||||
#include "utils.h"
|
||||
|
||||
static unsigned int filter_index, filter_dynamic, filter_master,
|
||||
filter_state, filter_vlan;
|
||||
static unsigned int filter_index;
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: bridge fdb { add | append | del | replace } ADDR dev DEV\n"
|
||||
" [ self ] [ master ] [ use ] [ router ] [ extern_learn ]\n"
|
||||
" [ sticky ] [ local | static | dynamic ] [ vlan VID ]\n"
|
||||
" { [ dst IPADDR ] [ port PORT] [ vni VNI ] | [ nhid NHID ] }\n"
|
||||
" [ via DEV ] [ src_vni VNI ]\n"
|
||||
" bridge fdb [ show [ br BRDEV ] [ brport DEV ] [ vlan VID ]\n"
|
||||
" [ state STATE ] [ dynamic ] ]\n"
|
||||
" bridge fdb get [ to ] LLADDR [ br BRDEV ] { brport | dev } DEV\n"
|
||||
" [ vlan VID ] [ vni VNI ] [ self ] [ master ] [ dynamic ]\n");
|
||||
fprintf(stderr, "Usage: bridge fdb { add | append | del | replace } ADDR dev DEV {self|master} [ temp ]\n"
|
||||
" [router] [ dst IPADDR] [ vlan VID ]\n"
|
||||
" [ port PORT] [ vni VNI ] [via DEV]\n");
|
||||
fprintf(stderr, " bridge fdb {show} [ br BRDEV ] [ brport DEV ]\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
static const char *state_n2a(unsigned int s)
|
||||
static const char *state_n2a(unsigned s)
|
||||
{
|
||||
static char buf[32];
|
||||
|
||||
|
|
@ -64,87 +54,21 @@ static const char *state_n2a(unsigned int s)
|
|||
if (s & NUD_REACHABLE)
|
||||
return "";
|
||||
|
||||
if (is_json_context())
|
||||
sprintf(buf, "%#x", s);
|
||||
else
|
||||
sprintf(buf, "state=%#x", s);
|
||||
sprintf(buf, "state=%#x", s);
|
||||
return buf;
|
||||
}
|
||||
|
||||
static int state_a2n(unsigned int *s, const char *arg)
|
||||
{
|
||||
if (matches(arg, "permanent") == 0)
|
||||
*s = NUD_PERMANENT;
|
||||
else if (matches(arg, "static") == 0 || matches(arg, "temp") == 0)
|
||||
*s = NUD_NOARP;
|
||||
else if (matches(arg, "stale") == 0)
|
||||
*s = NUD_STALE;
|
||||
else if (matches(arg, "reachable") == 0 || matches(arg, "dynamic") == 0)
|
||||
*s = NUD_REACHABLE;
|
||||
else if (strcmp(arg, "all") == 0)
|
||||
*s = ~0;
|
||||
else if (get_unsigned(s, arg, 0))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fdb_print_flags(FILE *fp, unsigned int flags)
|
||||
{
|
||||
open_json_array(PRINT_JSON,
|
||||
is_json_context() ? "flags" : "");
|
||||
|
||||
if (flags & NTF_SELF)
|
||||
print_string(PRINT_ANY, NULL, "%s ", "self");
|
||||
|
||||
if (flags & NTF_ROUTER)
|
||||
print_string(PRINT_ANY, NULL, "%s ", "router");
|
||||
|
||||
if (flags & NTF_EXT_LEARNED)
|
||||
print_string(PRINT_ANY, NULL, "%s ", "extern_learn");
|
||||
|
||||
if (flags & NTF_OFFLOADED)
|
||||
print_string(PRINT_ANY, NULL, "%s ", "offload");
|
||||
|
||||
if (flags & NTF_MASTER)
|
||||
print_string(PRINT_ANY, NULL, "%s ", "master");
|
||||
|
||||
if (flags & NTF_STICKY)
|
||||
print_string(PRINT_ANY, NULL, "%s ", "sticky");
|
||||
|
||||
close_json_array(PRINT_JSON, NULL);
|
||||
}
|
||||
|
||||
static void fdb_print_stats(FILE *fp, const struct nda_cacheinfo *ci)
|
||||
{
|
||||
static int hz;
|
||||
|
||||
if (!hz)
|
||||
hz = get_user_hz();
|
||||
|
||||
if (is_json_context()) {
|
||||
print_uint(PRINT_JSON, "used", NULL,
|
||||
ci->ndm_used / hz);
|
||||
print_uint(PRINT_JSON, "updated", NULL,
|
||||
ci->ndm_updated / hz);
|
||||
} else {
|
||||
fprintf(fp, "used %d/%d ", ci->ndm_used / hz,
|
||||
ci->ndm_updated / hz);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int print_fdb(struct nlmsghdr *n, void *arg)
|
||||
int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
|
||||
{
|
||||
FILE *fp = arg;
|
||||
struct ndmsg *r = NLMSG_DATA(n);
|
||||
int len = n->nlmsg_len;
|
||||
struct rtattr *tb[NDA_MAX+1];
|
||||
__u16 vid = 0;
|
||||
struct rtattr * tb[NDA_MAX+1];
|
||||
|
||||
if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH) {
|
||||
fprintf(stderr, "Not RTM_NEWNEIGH: %08x %08x %08x\n",
|
||||
n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -160,165 +84,102 @@ int print_fdb(struct nlmsghdr *n, void *arg)
|
|||
if (filter_index && filter_index != r->ndm_ifindex)
|
||||
return 0;
|
||||
|
||||
if (filter_state && !(r->ndm_state & filter_state))
|
||||
return 0;
|
||||
|
||||
parse_rtattr(tb, NDA_MAX, NDA_RTA(r),
|
||||
n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
|
||||
|
||||
if (tb[NDA_VLAN])
|
||||
vid = rta_getattr_u16(tb[NDA_VLAN]);
|
||||
|
||||
if (filter_vlan && filter_vlan != vid)
|
||||
return 0;
|
||||
|
||||
if (filter_dynamic && (r->ndm_state & NUD_PERMANENT))
|
||||
return 0;
|
||||
|
||||
open_json_object(NULL);
|
||||
if (n->nlmsg_type == RTM_DELNEIGH)
|
||||
print_bool(PRINT_ANY, "deleted", "Deleted ", true);
|
||||
fprintf(fp, "Deleted ");
|
||||
|
||||
if (tb[NDA_LLADDR]) {
|
||||
const char *lladdr;
|
||||
SPRINT_BUF(b1);
|
||||
|
||||
lladdr = ll_addr_n2a(RTA_DATA(tb[NDA_LLADDR]),
|
||||
RTA_PAYLOAD(tb[NDA_LLADDR]),
|
||||
ll_index_to_type(r->ndm_ifindex),
|
||||
b1, sizeof(b1));
|
||||
|
||||
print_color_string(PRINT_ANY, COLOR_MAC,
|
||||
"mac", "%s ", lladdr);
|
||||
fprintf(fp, "%s ",
|
||||
ll_addr_n2a(RTA_DATA(tb[NDA_LLADDR]),
|
||||
RTA_PAYLOAD(tb[NDA_LLADDR]),
|
||||
ll_index_to_type(r->ndm_ifindex),
|
||||
b1, sizeof(b1)));
|
||||
}
|
||||
|
||||
if (!filter_index && r->ndm_ifindex) {
|
||||
print_string(PRINT_FP, NULL, "dev ", NULL);
|
||||
|
||||
print_color_string(PRINT_ANY, COLOR_IFNAME,
|
||||
"ifname", "%s ",
|
||||
ll_index_to_name(r->ndm_ifindex));
|
||||
}
|
||||
if (!filter_index && r->ndm_ifindex)
|
||||
fprintf(fp, "dev %s ", ll_index_to_name(r->ndm_ifindex));
|
||||
|
||||
if (tb[NDA_DST]) {
|
||||
SPRINT_BUF(abuf);
|
||||
int family = AF_INET;
|
||||
const char *dst;
|
||||
|
||||
if (RTA_PAYLOAD(tb[NDA_DST]) == sizeof(struct in6_addr))
|
||||
family = AF_INET6;
|
||||
|
||||
dst = format_host(family,
|
||||
RTA_PAYLOAD(tb[NDA_DST]),
|
||||
RTA_DATA(tb[NDA_DST]));
|
||||
|
||||
print_string(PRINT_FP, NULL, "dst ", NULL);
|
||||
|
||||
print_color_string(PRINT_ANY,
|
||||
ifa_family_color(family),
|
||||
"dst", "%s ", dst);
|
||||
fprintf(fp, "dst %s ",
|
||||
format_host(family,
|
||||
RTA_PAYLOAD(tb[NDA_DST]),
|
||||
RTA_DATA(tb[NDA_DST]),
|
||||
abuf, sizeof(abuf)));
|
||||
}
|
||||
|
||||
if (vid)
|
||||
print_uint(PRINT_ANY,
|
||||
"vlan", "vlan %hu ", vid);
|
||||
if (tb[NDA_VLAN]) {
|
||||
__u16 vid = rta_getattr_u16(tb[NDA_VLAN]);
|
||||
fprintf(fp, "vlan %hu ", vid);
|
||||
}
|
||||
|
||||
if (tb[NDA_PORT])
|
||||
print_uint(PRINT_ANY,
|
||||
"port", "port %u ",
|
||||
rta_getattr_be16(tb[NDA_PORT]));
|
||||
|
||||
fprintf(fp, "port %d ", ntohs(rta_getattr_u16(tb[NDA_PORT])));
|
||||
if (tb[NDA_VNI])
|
||||
print_uint(PRINT_ANY,
|
||||
"vni", "vni %u ",
|
||||
rta_getattr_u32(tb[NDA_VNI]));
|
||||
|
||||
if (tb[NDA_SRC_VNI])
|
||||
print_uint(PRINT_ANY,
|
||||
"src_vni", "src_vni %u ",
|
||||
rta_getattr_u32(tb[NDA_SRC_VNI]));
|
||||
|
||||
fprintf(fp, "vni %d ", rta_getattr_u32(tb[NDA_VNI]));
|
||||
if (tb[NDA_IFINDEX]) {
|
||||
unsigned int ifindex = rta_getattr_u32(tb[NDA_IFINDEX]);
|
||||
|
||||
if (tb[NDA_LINK_NETNSID])
|
||||
print_uint(PRINT_ANY,
|
||||
"viaIfIndex", "via ifindex %u ",
|
||||
ifindex);
|
||||
else
|
||||
print_string(PRINT_ANY,
|
||||
"viaIf", "via %s ",
|
||||
ll_index_to_name(ifindex));
|
||||
if (ifindex) {
|
||||
char ifname[IF_NAMESIZE];
|
||||
|
||||
if (!tb[NDA_LINK_NETNSID] &&
|
||||
if_indextoname(ifindex, ifname))
|
||||
fprintf(fp, "via %s ", ifname);
|
||||
else
|
||||
fprintf(fp, "via ifindex %u ", ifindex);
|
||||
}
|
||||
}
|
||||
|
||||
if (tb[NDA_NH_ID])
|
||||
print_uint(PRINT_ANY, "nhid", "nhid %u ",
|
||||
rta_getattr_u32(tb[NDA_NH_ID]));
|
||||
|
||||
if (tb[NDA_LINK_NETNSID])
|
||||
print_uint(PRINT_ANY,
|
||||
"linkNetNsId", "link-netnsid %d ",
|
||||
rta_getattr_u32(tb[NDA_LINK_NETNSID]));
|
||||
|
||||
if (show_stats && tb[NDA_CACHEINFO])
|
||||
fdb_print_stats(fp, RTA_DATA(tb[NDA_CACHEINFO]));
|
||||
|
||||
fdb_print_flags(fp, r->ndm_flags);
|
||||
fprintf(fp, "link-netnsid %d ",
|
||||
rta_getattr_u32(tb[NDA_LINK_NETNSID]));
|
||||
|
||||
if (show_stats && tb[NDA_CACHEINFO]) {
|
||||
struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]);
|
||||
int hz = get_user_hz();
|
||||
|
||||
fprintf(fp, "used %d/%d ", ci->ndm_used/hz,
|
||||
ci->ndm_updated/hz);
|
||||
}
|
||||
if (r->ndm_flags & NTF_SELF)
|
||||
fprintf(fp, "self ");
|
||||
if (tb[NDA_MASTER])
|
||||
print_string(PRINT_ANY, "master", "master %s ",
|
||||
ll_index_to_name(rta_getattr_u32(tb[NDA_MASTER])));
|
||||
|
||||
print_string(PRINT_ANY, "state", "%s\n",
|
||||
state_n2a(r->ndm_state));
|
||||
close_json_object();
|
||||
fflush(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fdb_linkdump_filter(struct nlmsghdr *nlh, int reqlen)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (filter_index) {
|
||||
struct ifinfomsg *ifm = NLMSG_DATA(nlh);
|
||||
|
||||
ifm->ifi_index = filter_index;
|
||||
}
|
||||
|
||||
if (filter_master) {
|
||||
err = addattr32(nlh, reqlen, IFLA_MASTER, filter_master);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fdb_dump_filter(struct nlmsghdr *nlh, int reqlen)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (filter_index) {
|
||||
struct ndmsg *ndm = NLMSG_DATA(nlh);
|
||||
|
||||
ndm->ndm_ifindex = filter_index;
|
||||
}
|
||||
|
||||
if (filter_master) {
|
||||
err = addattr32(nlh, reqlen, NDA_MASTER, filter_master);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
fprintf(fp, "master %s ",
|
||||
ll_index_to_name(rta_getattr_u32(tb[NDA_MASTER])));
|
||||
else if (r->ndm_flags & NTF_MASTER)
|
||||
fprintf(fp, "master ");
|
||||
if (r->ndm_flags & NTF_ROUTER)
|
||||
fprintf(fp, "router ");
|
||||
if (r->ndm_flags & NTF_EXT_LEARNED)
|
||||
fprintf(fp, "offload ");
|
||||
|
||||
fprintf(fp, "%s\n", state_n2a(r->ndm_state));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fdb_show(int argc, char **argv)
|
||||
{
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct ifinfomsg ifm;
|
||||
char buf[256];
|
||||
} req;
|
||||
|
||||
char *filter_dev = NULL;
|
||||
char *br = NULL;
|
||||
int rc;
|
||||
int msg_size = sizeof(struct ifinfomsg);
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
|
||||
req.ifm.ifi_family = PF_BRIDGE;
|
||||
|
||||
while (argc > 0) {
|
||||
if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
|
||||
|
|
@ -327,20 +188,6 @@ static int fdb_show(int argc, char **argv)
|
|||
} else if (strcmp(*argv, "br") == 0) {
|
||||
NEXT_ARG();
|
||||
br = *argv;
|
||||
} else if (strcmp(*argv, "vlan") == 0) {
|
||||
NEXT_ARG();
|
||||
if (filter_vlan)
|
||||
duparg("vlan", *argv);
|
||||
filter_vlan = atoi(*argv);
|
||||
} else if (strcmp(*argv, "state") == 0) {
|
||||
unsigned int state;
|
||||
|
||||
NEXT_ARG();
|
||||
if (state_a2n(&state, *argv))
|
||||
invarg("invalid state", *argv);
|
||||
filter_state |= state;
|
||||
} else if (strcmp(*argv, "dynamic") == 0) {
|
||||
filter_dynamic = 1;
|
||||
} else {
|
||||
if (matches(*argv, "help") == 0)
|
||||
usage();
|
||||
|
|
@ -350,37 +197,34 @@ static int fdb_show(int argc, char **argv)
|
|||
|
||||
if (br) {
|
||||
int br_ifindex = ll_name_to_index(br);
|
||||
|
||||
if (br_ifindex == 0) {
|
||||
fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
|
||||
return -1;
|
||||
}
|
||||
filter_master = br_ifindex;
|
||||
addattr32(&req.n, sizeof(req), IFLA_MASTER, br_ifindex);
|
||||
msg_size += RTA_LENGTH(4);
|
||||
}
|
||||
|
||||
/*we'll keep around filter_dev for older kernels */
|
||||
if (filter_dev) {
|
||||
filter_index = ll_name_to_index(filter_dev);
|
||||
if (!filter_index)
|
||||
return nodev(filter_dev);
|
||||
filter_index = if_nametoindex(filter_dev);
|
||||
if (filter_index == 0) {
|
||||
fprintf(stderr, "Cannot find device \"%s\"\n",
|
||||
filter_dev);
|
||||
return -1;
|
||||
}
|
||||
req.ifm.ifi_index = filter_index;
|
||||
}
|
||||
|
||||
if (rth.flags & RTNL_HANDLE_F_STRICT_CHK)
|
||||
rc = rtnl_neighdump_req(&rth, PF_BRIDGE, fdb_dump_filter);
|
||||
else
|
||||
rc = rtnl_fdb_linkdump_req_filter_fn(&rth, fdb_linkdump_filter);
|
||||
if (rc < 0) {
|
||||
if (rtnl_dump_request(&rth, RTM_GETNEIGH, &req.ifm, msg_size) < 0) {
|
||||
perror("Cannot send dump request");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
new_json_obj(json);
|
||||
if (rtnl_dump_filter(&rth, print_fdb, stdout) < 0) {
|
||||
fprintf(stderr, "Dump terminated\n");
|
||||
exit(1);
|
||||
}
|
||||
delete_json_obj();
|
||||
fflush(stdout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -388,16 +232,10 @@ static int fdb_show(int argc, char **argv)
|
|||
static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
||||
{
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct ndmsg ndm;
|
||||
char buf[256];
|
||||
} req = {
|
||||
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
|
||||
.n.nlmsg_flags = NLM_F_REQUEST | flags,
|
||||
.n.nlmsg_type = cmd,
|
||||
.ndm.ndm_family = PF_BRIDGE,
|
||||
.ndm.ndm_state = NUD_NOARP,
|
||||
};
|
||||
struct nlmsghdr n;
|
||||
struct ndmsg ndm;
|
||||
char buf[256];
|
||||
} req;
|
||||
char *addr = NULL;
|
||||
char *d = NULL;
|
||||
char abuf[ETH_ALEN];
|
||||
|
|
@ -405,11 +243,17 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
|||
inet_prefix dst;
|
||||
unsigned long port = 0;
|
||||
unsigned long vni = ~0;
|
||||
unsigned long src_vni = ~0;
|
||||
unsigned int via = 0;
|
||||
char *endptr;
|
||||
short vid = -1;
|
||||
__u32 nhid = 0;
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
|
||||
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
|
||||
req.n.nlmsg_flags = NLM_F_REQUEST|flags;
|
||||
req.n.nlmsg_type = cmd;
|
||||
req.ndm.ndm_family = PF_BRIDGE;
|
||||
req.ndm.ndm_state = NUD_NOARP;
|
||||
|
||||
while (argc > 0) {
|
||||
if (strcmp(*argv, "dev") == 0) {
|
||||
|
|
@ -421,10 +265,6 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
|||
duparg2("dst", *argv);
|
||||
get_addr(&dst, *argv, preferred_family);
|
||||
dst_ok = 1;
|
||||
} else if (strcmp(*argv, "nhid") == 0) {
|
||||
NEXT_ARG();
|
||||
if (get_u32(&nhid, *argv, 0))
|
||||
invarg("\"id\" value is invalid\n", *argv);
|
||||
} else if (strcmp(*argv, "port") == 0) {
|
||||
|
||||
NEXT_ARG();
|
||||
|
|
@ -444,47 +284,31 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
|||
if ((endptr && *endptr) ||
|
||||
(vni >> 24) || vni == ULONG_MAX)
|
||||
invarg("invalid VNI\n", *argv);
|
||||
} else if (strcmp(*argv, "src_vni") == 0) {
|
||||
NEXT_ARG();
|
||||
src_vni = strtoul(*argv, &endptr, 0);
|
||||
if ((endptr && *endptr) ||
|
||||
(src_vni >> 24) || src_vni == ULONG_MAX)
|
||||
invarg("invalid src VNI\n", *argv);
|
||||
} else if (strcmp(*argv, "via") == 0) {
|
||||
NEXT_ARG();
|
||||
via = ll_name_to_index(*argv);
|
||||
if (!via)
|
||||
exit(nodev(*argv));
|
||||
via = if_nametoindex(*argv);
|
||||
if (via == 0)
|
||||
invarg("invalid device\n", *argv);
|
||||
} else if (strcmp(*argv, "self") == 0) {
|
||||
req.ndm.ndm_flags |= NTF_SELF;
|
||||
} else if (matches(*argv, "master") == 0) {
|
||||
req.ndm.ndm_flags |= NTF_MASTER;
|
||||
} else if (matches(*argv, "router") == 0) {
|
||||
req.ndm.ndm_flags |= NTF_ROUTER;
|
||||
} else if (matches(*argv, "local") == 0 ||
|
||||
} else if (matches(*argv, "local") == 0||
|
||||
matches(*argv, "permanent") == 0) {
|
||||
req.ndm.ndm_state |= NUD_PERMANENT;
|
||||
} else if (matches(*argv, "temp") == 0 ||
|
||||
matches(*argv, "static") == 0) {
|
||||
} else if (matches(*argv, "temp") == 0) {
|
||||
req.ndm.ndm_state |= NUD_REACHABLE;
|
||||
} else if (matches(*argv, "dynamic") == 0) {
|
||||
req.ndm.ndm_state |= NUD_REACHABLE;
|
||||
req.ndm.ndm_state &= ~NUD_NOARP;
|
||||
} else if (matches(*argv, "vlan") == 0) {
|
||||
if (vid >= 0)
|
||||
duparg2("vlan", *argv);
|
||||
NEXT_ARG();
|
||||
vid = atoi(*argv);
|
||||
} else if (matches(*argv, "use") == 0) {
|
||||
req.ndm.ndm_flags |= NTF_USE;
|
||||
} else if (matches(*argv, "extern_learn") == 0) {
|
||||
req.ndm.ndm_flags |= NTF_EXT_LEARNED;
|
||||
} else if (matches(*argv, "sticky") == 0) {
|
||||
req.ndm.ndm_flags |= NTF_STICKY;
|
||||
} else {
|
||||
if (strcmp(*argv, "to") == 0)
|
||||
if (strcmp(*argv, "to") == 0) {
|
||||
NEXT_ARG();
|
||||
|
||||
}
|
||||
if (matches(*argv, "help") == 0)
|
||||
usage();
|
||||
if (addr)
|
||||
|
|
@ -499,11 +323,6 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (nhid && (dst_ok || port || vni != ~0)) {
|
||||
fprintf(stderr, "dst, port, vni are mutually exclusive with nhid\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Assume self */
|
||||
if (!(req.ndm.ndm_flags&(NTF_SELF|NTF_MASTER)))
|
||||
req.ndm.ndm_flags |= NTF_SELF;
|
||||
|
|
@ -525,8 +344,6 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
|||
|
||||
if (vid >= 0)
|
||||
addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
|
||||
if (nhid > 0)
|
||||
addattr32(&req.n, sizeof(req), NDA_NH_ID, nhid);
|
||||
|
||||
if (port) {
|
||||
unsigned short dport;
|
||||
|
|
@ -536,132 +353,17 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
|||
}
|
||||
if (vni != ~0)
|
||||
addattr32(&req.n, sizeof(req), NDA_VNI, vni);
|
||||
if (src_vni != ~0)
|
||||
addattr32(&req.n, sizeof(req), NDA_SRC_VNI, src_vni);
|
||||
if (via)
|
||||
addattr32(&req.n, sizeof(req), NDA_IFINDEX, via);
|
||||
|
||||
req.ndm.ndm_ifindex = ll_name_to_index(d);
|
||||
if (!req.ndm.ndm_ifindex)
|
||||
return nodev(d);
|
||||
|
||||
if (rtnl_talk(&rth, &req.n, NULL) < 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fdb_get(int argc, char **argv)
|
||||
{
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct ndmsg ndm;
|
||||
char buf[1024];
|
||||
} req = {
|
||||
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
|
||||
.n.nlmsg_flags = NLM_F_REQUEST,
|
||||
.n.nlmsg_type = RTM_GETNEIGH,
|
||||
.ndm.ndm_family = AF_BRIDGE,
|
||||
};
|
||||
char *d = NULL, *br = NULL;
|
||||
struct nlmsghdr *answer;
|
||||
unsigned long vni = ~0;
|
||||
char abuf[ETH_ALEN];
|
||||
int br_ifindex = 0;
|
||||
char *addr = NULL;
|
||||
short vlan = -1;
|
||||
char *endptr;
|
||||
|
||||
while (argc > 0) {
|
||||
if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
|
||||
NEXT_ARG();
|
||||
d = *argv;
|
||||
} else if (strcmp(*argv, "br") == 0) {
|
||||
NEXT_ARG();
|
||||
br = *argv;
|
||||
} else if (strcmp(*argv, "dev") == 0) {
|
||||
NEXT_ARG();
|
||||
d = *argv;
|
||||
} else if (strcmp(*argv, "vni") == 0) {
|
||||
NEXT_ARG();
|
||||
vni = strtoul(*argv, &endptr, 0);
|
||||
if ((endptr && *endptr) ||
|
||||
(vni >> 24) || vni == ULONG_MAX)
|
||||
invarg("invalid VNI\n", *argv);
|
||||
} else if (strcmp(*argv, "self") == 0) {
|
||||
req.ndm.ndm_flags |= NTF_SELF;
|
||||
} else if (matches(*argv, "master") == 0) {
|
||||
req.ndm.ndm_flags |= NTF_MASTER;
|
||||
} else if (matches(*argv, "vlan") == 0) {
|
||||
if (vlan >= 0)
|
||||
duparg2("vlan", *argv);
|
||||
NEXT_ARG();
|
||||
vlan = atoi(*argv);
|
||||
} else if (matches(*argv, "dynamic") == 0) {
|
||||
filter_dynamic = 1;
|
||||
} else {
|
||||
if (strcmp(*argv, "to") == 0)
|
||||
NEXT_ARG();
|
||||
|
||||
if (matches(*argv, "help") == 0)
|
||||
usage();
|
||||
if (addr)
|
||||
duparg2("to", *argv);
|
||||
addr = *argv;
|
||||
}
|
||||
argc--; argv++;
|
||||
}
|
||||
|
||||
if ((d == NULL && br == NULL) || addr == NULL) {
|
||||
fprintf(stderr, "Device or master and address are required arguments.\n");
|
||||
if (req.ndm.ndm_ifindex == 0) {
|
||||
fprintf(stderr, "Cannot find device \"%s\"\n", d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (sscanf(addr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
|
||||
abuf, abuf+1, abuf+2,
|
||||
abuf+3, abuf+4, abuf+5) != 6) {
|
||||
fprintf(stderr, "Invalid mac address %s\n", addr);
|
||||
if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
addattr_l(&req.n, sizeof(req), NDA_LLADDR, abuf, ETH_ALEN);
|
||||
|
||||
if (vlan >= 0)
|
||||
addattr16(&req.n, sizeof(req), NDA_VLAN, vlan);
|
||||
|
||||
if (vni != ~0)
|
||||
addattr32(&req.n, sizeof(req), NDA_VNI, vni);
|
||||
|
||||
if (d) {
|
||||
req.ndm.ndm_ifindex = ll_name_to_index(d);
|
||||
if (!req.ndm.ndm_ifindex) {
|
||||
fprintf(stderr, "Cannot find device \"%s\"\n", d);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (br) {
|
||||
br_ifindex = ll_name_to_index(br);
|
||||
if (!br_ifindex) {
|
||||
fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
|
||||
return -1;
|
||||
}
|
||||
addattr32(&req.n, sizeof(req), NDA_MASTER, br_ifindex);
|
||||
}
|
||||
|
||||
if (rtnl_talk(&rth, &req.n, &answer) < 0)
|
||||
return -2;
|
||||
|
||||
/*
|
||||
* Initialize a json_writer and open an array object
|
||||
* if -json was specified.
|
||||
*/
|
||||
new_json_obj(json);
|
||||
if (print_fdb(answer, stdout) < 0) {
|
||||
fprintf(stderr, "An error :-)\n");
|
||||
return -1;
|
||||
}
|
||||
delete_json_obj();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -679,8 +381,6 @@ int do_fdb(int argc, char **argv)
|
|||
return fdb_modify(RTM_NEWNEIGH, NLM_F_CREATE|NLM_F_REPLACE, argc-1, argv+1);
|
||||
if (matches(*argv, "delete") == 0)
|
||||
return fdb_modify(RTM_DELNEIGH, 0, argc-1, argv+1);
|
||||
if (matches(*argv, "get") == 0)
|
||||
return fdb_get(argc-1, argv+1);
|
||||
if (matches(*argv, "show") == 0 ||
|
||||
matches(*argv, "lst") == 0 ||
|
||||
matches(*argv, "list") == 0)
|
||||
|
|
|
|||
499
bridge/link.c
499
bridge/link.c
|
|
@ -1,4 +1,3 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
|
@ -12,14 +11,13 @@
|
|||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "json_print.h"
|
||||
#include "libnetlink.h"
|
||||
#include "utils.h"
|
||||
#include "br_common.h"
|
||||
|
||||
static unsigned int filter_index;
|
||||
|
||||
static const char *stp_states[] = {
|
||||
static const char *port_states[] = {
|
||||
[BR_STATE_DISABLED] = "disabled",
|
||||
[BR_STATE_LISTENING] = "listening",
|
||||
[BR_STATE_LEARNING] = "learning",
|
||||
|
|
@ -27,21 +25,17 @@ static const char *stp_states[] = {
|
|||
[BR_STATE_BLOCKING] = "blocking",
|
||||
};
|
||||
|
||||
static const char *hw_mode[] = {
|
||||
"VEB", "VEPA"
|
||||
};
|
||||
extern char *if_indextoname (unsigned int __ifindex, char *__ifname);
|
||||
|
||||
static void print_link_flags(FILE *fp, unsigned int flags, unsigned int mdown)
|
||||
static void print_link_flags(FILE *fp, unsigned flags)
|
||||
{
|
||||
open_json_array(PRINT_ANY, is_json_context() ? "flags" : "<");
|
||||
fprintf(fp, "<");
|
||||
if (flags & IFF_UP && !(flags & IFF_RUNNING))
|
||||
print_string(PRINT_ANY, NULL,
|
||||
flags ? "%s," : "%s", "NO-CARRIER");
|
||||
fprintf(fp, "NO-CARRIER%s", flags ? "," : "");
|
||||
flags &= ~IFF_RUNNING;
|
||||
|
||||
#define _PF(f) if (flags&IFF_##f) { \
|
||||
flags &= ~IFF_##f ; \
|
||||
print_string(PRINT_ANY, NULL, flags ? "%s," : "%s", #f); }
|
||||
#define _PF(f) if (flags&IFF_##f) { \
|
||||
flags &= ~IFF_##f ; \
|
||||
fprintf(fp, #f "%s", flags ? "," : ""); }
|
||||
_PF(LOOPBACK);
|
||||
_PF(BROADCAST);
|
||||
_PF(POINTOPOINT);
|
||||
|
|
@ -61,159 +55,61 @@ static void print_link_flags(FILE *fp, unsigned int flags, unsigned int mdown)
|
|||
_PF(DORMANT);
|
||||
_PF(ECHO);
|
||||
#undef _PF
|
||||
if (flags)
|
||||
print_hex(PRINT_ANY, NULL, "%x", flags);
|
||||
if (mdown)
|
||||
print_string(PRINT_ANY, NULL, ",%s", "M-DOWN");
|
||||
close_json_array(PRINT_ANY, "> ");
|
||||
if (flags)
|
||||
fprintf(fp, "%x", flags);
|
||||
fprintf(fp, "> ");
|
||||
}
|
||||
|
||||
void print_stp_state(__u8 state)
|
||||
static const char *oper_states[] = {
|
||||
"UNKNOWN", "NOTPRESENT", "DOWN", "LOWERLAYERDOWN",
|
||||
"TESTING", "DORMANT", "UP"
|
||||
};
|
||||
|
||||
static const char *hw_mode[] = {"VEB", "VEPA"};
|
||||
|
||||
static void print_operstate(FILE *f, __u8 state)
|
||||
{
|
||||
if (state >= sizeof(oper_states)/sizeof(oper_states[0]))
|
||||
fprintf(f, "state %#x ", state);
|
||||
else
|
||||
fprintf(f, "state %s ", oper_states[state]);
|
||||
}
|
||||
|
||||
static void print_portstate(FILE *f, __u8 state)
|
||||
{
|
||||
if (state <= BR_STATE_BLOCKING)
|
||||
print_string(PRINT_ANY, "state",
|
||||
"state %s ", stp_states[state]);
|
||||
fprintf(f, "state %s ", port_states[state]);
|
||||
else
|
||||
print_uint(PRINT_ANY, "state",
|
||||
"state (%d) ", state);
|
||||
fprintf(f, "state (%d) ", state);
|
||||
}
|
||||
|
||||
int parse_stp_state(const char *arg)
|
||||
static void print_onoff(FILE *f, char *flag, __u8 val)
|
||||
{
|
||||
size_t nstates = ARRAY_SIZE(stp_states);
|
||||
int state;
|
||||
|
||||
for (state = 0; state < nstates; state++)
|
||||
if (strcmp(stp_states[state], arg) == 0)
|
||||
break;
|
||||
|
||||
if (state == nstates)
|
||||
state = -1;
|
||||
|
||||
return state;
|
||||
fprintf(f, "%s %s ", flag, val ? "on" : "off");
|
||||
}
|
||||
|
||||
static void print_hwmode(__u16 mode)
|
||||
static void print_hwmode(FILE *f, __u16 mode)
|
||||
{
|
||||
if (mode >= ARRAY_SIZE(hw_mode))
|
||||
print_0xhex(PRINT_ANY, "hwmode",
|
||||
"hwmode %#llx ", mode);
|
||||
if (mode >= sizeof(hw_mode)/sizeof(hw_mode[0]))
|
||||
fprintf(f, "hwmode %#hx ", mode);
|
||||
else
|
||||
print_string(PRINT_ANY, "hwmode",
|
||||
"hwmode %s ", hw_mode[mode]);
|
||||
fprintf(f, "hwmode %s ", hw_mode[mode]);
|
||||
}
|
||||
|
||||
static void print_protinfo(FILE *fp, struct rtattr *attr)
|
||||
{
|
||||
if (attr->rta_type & NLA_F_NESTED) {
|
||||
struct rtattr *prtb[IFLA_BRPORT_MAX + 1];
|
||||
|
||||
parse_rtattr_nested(prtb, IFLA_BRPORT_MAX, attr);
|
||||
|
||||
if (prtb[IFLA_BRPORT_STATE])
|
||||
print_stp_state(rta_getattr_u8(prtb[IFLA_BRPORT_STATE]));
|
||||
|
||||
if (prtb[IFLA_BRPORT_PRIORITY])
|
||||
print_uint(PRINT_ANY, "priority",
|
||||
"priority %u ",
|
||||
rta_getattr_u16(prtb[IFLA_BRPORT_PRIORITY]));
|
||||
|
||||
if (prtb[IFLA_BRPORT_COST])
|
||||
print_uint(PRINT_ANY, "cost",
|
||||
"cost %u ",
|
||||
rta_getattr_u32(prtb[IFLA_BRPORT_COST]));
|
||||
|
||||
if (!show_details)
|
||||
return;
|
||||
|
||||
if (!is_json_context())
|
||||
fprintf(fp, "%s ", _SL_);
|
||||
|
||||
if (prtb[IFLA_BRPORT_MODE])
|
||||
print_on_off(PRINT_ANY, "hairpin", "hairpin %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_MODE]));
|
||||
if (prtb[IFLA_BRPORT_GUARD])
|
||||
print_on_off(PRINT_ANY, "guard", "guard %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_GUARD]));
|
||||
if (prtb[IFLA_BRPORT_PROTECT])
|
||||
print_on_off(PRINT_ANY, "root_block", "root_block %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT]));
|
||||
if (prtb[IFLA_BRPORT_FAST_LEAVE])
|
||||
print_on_off(PRINT_ANY, "fastleave", "fastleave %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE]));
|
||||
if (prtb[IFLA_BRPORT_LEARNING])
|
||||
print_on_off(PRINT_ANY, "learning", "learning %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING]));
|
||||
if (prtb[IFLA_BRPORT_LEARNING_SYNC])
|
||||
print_on_off(PRINT_ANY, "learning_sync", "learning_sync %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC]));
|
||||
if (prtb[IFLA_BRPORT_UNICAST_FLOOD])
|
||||
print_on_off(PRINT_ANY, "flood", "flood %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD]));
|
||||
if (prtb[IFLA_BRPORT_MCAST_FLOOD])
|
||||
print_on_off(PRINT_ANY, "mcast_flood", "mcast_flood %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_FLOOD]));
|
||||
if (prtb[IFLA_BRPORT_MCAST_TO_UCAST])
|
||||
print_on_off(PRINT_ANY, "mcast_to_unicast", "mcast_to_unicast %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_TO_UCAST]));
|
||||
if (prtb[IFLA_BRPORT_NEIGH_SUPPRESS])
|
||||
print_on_off(PRINT_ANY, "neigh_suppress", "neigh_suppress %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_NEIGH_SUPPRESS]));
|
||||
if (prtb[IFLA_BRPORT_VLAN_TUNNEL])
|
||||
print_on_off(PRINT_ANY, "vlan_tunnel", "vlan_tunnel %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_VLAN_TUNNEL]));
|
||||
|
||||
if (prtb[IFLA_BRPORT_BACKUP_PORT]) {
|
||||
int ifidx;
|
||||
|
||||
ifidx = rta_getattr_u32(prtb[IFLA_BRPORT_BACKUP_PORT]);
|
||||
print_string(PRINT_ANY,
|
||||
"backup_port", "backup_port %s ",
|
||||
ll_index_to_name(ifidx));
|
||||
}
|
||||
|
||||
if (prtb[IFLA_BRPORT_ISOLATED])
|
||||
print_on_off(PRINT_ANY, "isolated", "isolated %s ",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_ISOLATED]));
|
||||
} else
|
||||
print_stp_state(rta_getattr_u8(attr));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This is reported by HW devices that have some bridging
|
||||
* capabilities.
|
||||
*/
|
||||
static void print_af_spec(struct rtattr *attr, int ifindex)
|
||||
{
|
||||
struct rtattr *aftb[IFLA_BRIDGE_MAX+1];
|
||||
|
||||
parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, attr);
|
||||
|
||||
if (aftb[IFLA_BRIDGE_MODE])
|
||||
print_hwmode(rta_getattr_u16(aftb[IFLA_BRIDGE_MODE]));
|
||||
|
||||
if (!show_details)
|
||||
return;
|
||||
|
||||
if (aftb[IFLA_BRIDGE_VLAN_INFO])
|
||||
print_vlan_info(aftb[IFLA_BRIDGE_VLAN_INFO], ifindex);
|
||||
}
|
||||
|
||||
int print_linkinfo(struct nlmsghdr *n, void *arg)
|
||||
int print_linkinfo(const struct sockaddr_nl *who,
|
||||
struct nlmsghdr *n, void *arg)
|
||||
{
|
||||
FILE *fp = arg;
|
||||
struct ifinfomsg *ifi = NLMSG_DATA(n);
|
||||
struct rtattr *tb[IFLA_MAX+1];
|
||||
unsigned int m_flag = 0;
|
||||
int len = n->nlmsg_len;
|
||||
const char *name;
|
||||
struct ifinfomsg *ifi = NLMSG_DATA(n);
|
||||
struct rtattr * tb[IFLA_MAX+1];
|
||||
char b1[IFNAMSIZ];
|
||||
|
||||
len -= NLMSG_LENGTH(sizeof(*ifi));
|
||||
if (len < 0) {
|
||||
fprintf(stderr, "Message too short!\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(ifi->ifi_family == AF_BRIDGE || ifi->ifi_family == AF_UNSPEC))
|
||||
return 0;
|
||||
|
|
@ -223,87 +119,146 @@ int print_linkinfo(struct nlmsghdr *n, void *arg)
|
|||
|
||||
parse_rtattr_flags(tb, IFLA_MAX, IFLA_RTA(ifi), len, NLA_F_NESTED);
|
||||
|
||||
name = get_ifname_rta(ifi->ifi_index, tb[IFLA_IFNAME]);
|
||||
if (!name)
|
||||
if (tb[IFLA_IFNAME] == NULL) {
|
||||
fprintf(stderr, "BUG: nil ifname\n");
|
||||
return -1;
|
||||
|
||||
open_json_object(NULL);
|
||||
if (n->nlmsg_type == RTM_DELLINK)
|
||||
print_bool(PRINT_ANY, "deleted", "Deleted ", true);
|
||||
|
||||
print_int(PRINT_ANY, "ifindex", "%d: ", ifi->ifi_index);
|
||||
m_flag = print_name_and_link("%s: ", name, tb);
|
||||
print_link_flags(fp, ifi->ifi_flags, m_flag);
|
||||
|
||||
if (tb[IFLA_MTU])
|
||||
print_int(PRINT_ANY,
|
||||
"mtu", "mtu %u ",
|
||||
rta_getattr_u32(tb[IFLA_MTU]));
|
||||
|
||||
if (tb[IFLA_MASTER]) {
|
||||
int master = rta_getattr_u32(tb[IFLA_MASTER]);
|
||||
|
||||
print_string(PRINT_ANY, "master", "master %s ",
|
||||
ll_index_to_name(master));
|
||||
}
|
||||
|
||||
if (tb[IFLA_PROTINFO])
|
||||
print_protinfo(fp, tb[IFLA_PROTINFO]);
|
||||
if (n->nlmsg_type == RTM_DELLINK)
|
||||
fprintf(fp, "Deleted ");
|
||||
|
||||
if (tb[IFLA_AF_SPEC])
|
||||
print_af_spec(tb[IFLA_AF_SPEC], ifi->ifi_index);
|
||||
fprintf(fp, "%d: %s ", ifi->ifi_index,
|
||||
tb[IFLA_IFNAME] ? rta_getattr_str(tb[IFLA_IFNAME]) : "<nil>");
|
||||
|
||||
print_string(PRINT_FP, NULL, "%s", "\n");
|
||||
close_json_object();
|
||||
if (tb[IFLA_OPERSTATE])
|
||||
print_operstate(fp, rta_getattr_u8(tb[IFLA_OPERSTATE]));
|
||||
|
||||
if (tb[IFLA_LINK]) {
|
||||
SPRINT_BUF(b1);
|
||||
int iflink = rta_getattr_u32(tb[IFLA_LINK]);
|
||||
if (iflink == 0)
|
||||
fprintf(fp, "@NONE: ");
|
||||
else
|
||||
fprintf(fp, "@%s: ",
|
||||
if_indextoname(iflink, b1));
|
||||
} else
|
||||
fprintf(fp, ": ");
|
||||
|
||||
print_link_flags(fp, ifi->ifi_flags);
|
||||
|
||||
if (tb[IFLA_MTU])
|
||||
fprintf(fp, "mtu %u ", rta_getattr_u32(tb[IFLA_MTU]));
|
||||
|
||||
if (tb[IFLA_MASTER])
|
||||
fprintf(fp, "master %s ",
|
||||
if_indextoname(rta_getattr_u32(tb[IFLA_MASTER]), b1));
|
||||
|
||||
if (tb[IFLA_PROTINFO]) {
|
||||
if (tb[IFLA_PROTINFO]->rta_type & NLA_F_NESTED) {
|
||||
struct rtattr *prtb[IFLA_BRPORT_MAX+1];
|
||||
|
||||
parse_rtattr_nested(prtb, IFLA_BRPORT_MAX,
|
||||
tb[IFLA_PROTINFO]);
|
||||
|
||||
if (prtb[IFLA_BRPORT_STATE])
|
||||
print_portstate(fp,
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_STATE]));
|
||||
if (prtb[IFLA_BRPORT_PRIORITY])
|
||||
fprintf(fp, "priority %hu ",
|
||||
rta_getattr_u16(prtb[IFLA_BRPORT_PRIORITY]));
|
||||
if (prtb[IFLA_BRPORT_COST])
|
||||
fprintf(fp, "cost %u ",
|
||||
rta_getattr_u32(prtb[IFLA_BRPORT_COST]));
|
||||
|
||||
if (show_details) {
|
||||
fprintf(fp, "%s ", _SL_);
|
||||
|
||||
if (prtb[IFLA_BRPORT_MODE])
|
||||
print_onoff(fp, "hairpin",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_MODE]));
|
||||
if (prtb[IFLA_BRPORT_GUARD])
|
||||
print_onoff(fp, "guard",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_GUARD]));
|
||||
if (prtb[IFLA_BRPORT_PROTECT])
|
||||
print_onoff(fp, "root_block",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT]));
|
||||
if (prtb[IFLA_BRPORT_FAST_LEAVE])
|
||||
print_onoff(fp, "fastleave",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE]));
|
||||
if (prtb[IFLA_BRPORT_LEARNING])
|
||||
print_onoff(fp, "learning",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING]));
|
||||
if (prtb[IFLA_BRPORT_LEARNING_SYNC])
|
||||
print_onoff(fp, "learning_sync",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC]));
|
||||
if (prtb[IFLA_BRPORT_UNICAST_FLOOD])
|
||||
print_onoff(fp, "flood",
|
||||
rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD]));
|
||||
}
|
||||
} else
|
||||
print_portstate(fp, rta_getattr_u8(tb[IFLA_PROTINFO]));
|
||||
}
|
||||
|
||||
if (tb[IFLA_AF_SPEC]) {
|
||||
/* This is reported by HW devices that have some bridging
|
||||
* capabilities.
|
||||
*/
|
||||
struct rtattr *aftb[IFLA_BRIDGE_MAX+1];
|
||||
|
||||
parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, tb[IFLA_AF_SPEC]);
|
||||
|
||||
if (aftb[IFLA_BRIDGE_MODE])
|
||||
print_hwmode(fp, rta_getattr_u16(aftb[IFLA_BRIDGE_MODE]));
|
||||
}
|
||||
|
||||
fprintf(fp, "\n");
|
||||
fflush(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: bridge link set dev DEV [ cost COST ] [ priority PRIO ] [ state STATE ]\n"
|
||||
" [ guard {on | off} ]\n"
|
||||
" [ hairpin {on | off} ]\n"
|
||||
" [ fastleave {on | off} ]\n"
|
||||
" [ root_block {on | off} ]\n"
|
||||
" [ learning {on | off} ]\n"
|
||||
" [ learning_sync {on | off} ]\n"
|
||||
" [ flood {on | off} ]\n"
|
||||
" [ mcast_flood {on | off} ]\n"
|
||||
" [ mcast_to_unicast {on | off} ]\n"
|
||||
" [ neigh_suppress {on | off} ]\n"
|
||||
" [ vlan_tunnel {on | off} ]\n"
|
||||
" [ isolated {on | off} ]\n"
|
||||
" [ hwmode {vepa | veb} ]\n"
|
||||
" [ backup_port DEVICE ] [ nobackup_port ]\n"
|
||||
" [ self ] [ master ]\n"
|
||||
" bridge link show [dev DEV]\n");
|
||||
fprintf(stderr, "Usage: bridge link set dev DEV [ cost COST ] [ priority PRIO ] [ state STATE ]\n");
|
||||
fprintf(stderr, " [ guard {on | off} ]\n");
|
||||
fprintf(stderr, " [ hairpin {on | off} ] \n");
|
||||
fprintf(stderr, " [ fastleave {on | off} ]\n");
|
||||
fprintf(stderr, " [ root_block {on | off} ]\n");
|
||||
fprintf(stderr, " [ learning {on | off} ]\n");
|
||||
fprintf(stderr, " [ learning_sync {on | off} ]\n");
|
||||
fprintf(stderr, " [ flood {on | off} ]\n");
|
||||
fprintf(stderr, " [ hwmode {vepa | veb} ]\n");
|
||||
fprintf(stderr, " [ self ] [ master ]\n");
|
||||
fprintf(stderr, " bridge link show [dev DEV]\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
static bool on_off(char *arg, __s8 *attr, char *val)
|
||||
{
|
||||
if (strcmp(val, "on") == 0)
|
||||
*attr = 1;
|
||||
else if (strcmp(val, "off") == 0)
|
||||
*attr = 0;
|
||||
else {
|
||||
fprintf(stderr,
|
||||
"Error: argument of \"%s\" must be \"on\" or \"off\"\n",
|
||||
arg);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int brlink_modify(int argc, char **argv)
|
||||
{
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct ifinfomsg ifm;
|
||||
char buf[512];
|
||||
} req = {
|
||||
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
|
||||
.n.nlmsg_flags = NLM_F_REQUEST,
|
||||
.n.nlmsg_type = RTM_SETLINK,
|
||||
.ifm.ifi_family = PF_BRIDGE,
|
||||
};
|
||||
} req;
|
||||
char *d = NULL;
|
||||
int backup_port_idx = -1;
|
||||
__s8 neigh_suppress = -1;
|
||||
__s8 learning = -1;
|
||||
__s8 learning_sync = -1;
|
||||
__s8 flood = -1;
|
||||
__s8 vlan_tunnel = -1;
|
||||
__s8 mcast_flood = -1;
|
||||
__s8 mcast_to_unicast = -1;
|
||||
__s8 isolated = -1;
|
||||
__s8 hairpin = -1;
|
||||
__s8 bpdu_guard = -1;
|
||||
__s8 fast_leave = -1;
|
||||
|
|
@ -314,7 +269,13 @@ static int brlink_modify(int argc, char **argv)
|
|||
__s16 mode = -1;
|
||||
__u16 flags = 0;
|
||||
struct rtattr *nest;
|
||||
int ret;
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
|
||||
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
|
||||
req.n.nlmsg_flags = NLM_F_REQUEST;
|
||||
req.n.nlmsg_type = RTM_SETLINK;
|
||||
req.ifm.ifi_family = PF_BRIDGE;
|
||||
|
||||
while (argc > 0) {
|
||||
if (strcmp(*argv, "dev") == 0) {
|
||||
|
|
@ -322,49 +283,32 @@ static int brlink_modify(int argc, char **argv)
|
|||
d = *argv;
|
||||
} else if (strcmp(*argv, "guard") == 0) {
|
||||
NEXT_ARG();
|
||||
bpdu_guard = parse_on_off("guard", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!on_off("guard", &bpdu_guard, *argv))
|
||||
return -1;
|
||||
} else if (strcmp(*argv, "hairpin") == 0) {
|
||||
NEXT_ARG();
|
||||
hairpin = parse_on_off("hairpin", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!on_off("hairping", &hairpin, *argv))
|
||||
return -1;
|
||||
} else if (strcmp(*argv, "fastleave") == 0) {
|
||||
NEXT_ARG();
|
||||
fast_leave = parse_on_off("fastleave", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!on_off("fastleave", &fast_leave, *argv))
|
||||
return -1;
|
||||
} else if (strcmp(*argv, "root_block") == 0) {
|
||||
NEXT_ARG();
|
||||
root_block = parse_on_off("root_block", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!on_off("root_block", &root_block, *argv))
|
||||
return -1;
|
||||
} else if (strcmp(*argv, "learning") == 0) {
|
||||
NEXT_ARG();
|
||||
learning = parse_on_off("learning", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!on_off("learning", &learning, *argv))
|
||||
return -1;
|
||||
} else if (strcmp(*argv, "learning_sync") == 0) {
|
||||
NEXT_ARG();
|
||||
learning_sync = parse_on_off("learning_sync", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!on_off("learning_sync", &learning_sync, *argv))
|
||||
return -1;
|
||||
} else if (strcmp(*argv, "flood") == 0) {
|
||||
NEXT_ARG();
|
||||
flood = parse_on_off("flood", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else if (strcmp(*argv, "mcast_flood") == 0) {
|
||||
NEXT_ARG();
|
||||
mcast_flood = parse_on_off("mcast_flood", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else if (strcmp(*argv, "mcast_to_unicast") == 0) {
|
||||
NEXT_ARG();
|
||||
mcast_to_unicast = parse_on_off("mcast_to_unicast", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!on_off("flood", &flood, *argv))
|
||||
return -1;
|
||||
} else if (strcmp(*argv, "cost") == 0) {
|
||||
NEXT_ARG();
|
||||
cost = atoi(*argv);
|
||||
|
|
@ -374,11 +318,13 @@ static int brlink_modify(int argc, char **argv)
|
|||
} else if (strcmp(*argv, "state") == 0) {
|
||||
NEXT_ARG();
|
||||
char *endptr;
|
||||
|
||||
size_t nstates = sizeof(port_states) / sizeof(*port_states);
|
||||
state = strtol(*argv, &endptr, 10);
|
||||
if (!(**argv != '\0' && *endptr == '\0')) {
|
||||
state = parse_stp_state(*argv);
|
||||
if (state == -1) {
|
||||
for (state = 0; state < nstates; state++)
|
||||
if (strcmp(port_states[state], *argv) == 0)
|
||||
break;
|
||||
if (state == nstates) {
|
||||
fprintf(stderr,
|
||||
"Error: invalid STP port state\n");
|
||||
return -1;
|
||||
|
|
@ -393,38 +339,14 @@ static int brlink_modify(int argc, char **argv)
|
|||
mode = BRIDGE_MODE_VEB;
|
||||
else {
|
||||
fprintf(stderr,
|
||||
"Mode argument must be \"vepa\" or \"veb\".\n");
|
||||
"Mode argument must be \"vepa\" or "
|
||||
"\"veb\".\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(*argv, "self") == 0) {
|
||||
flags |= BRIDGE_FLAGS_SELF;
|
||||
} else if (strcmp(*argv, "master") == 0) {
|
||||
flags |= BRIDGE_FLAGS_MASTER;
|
||||
} else if (strcmp(*argv, "neigh_suppress") == 0) {
|
||||
NEXT_ARG();
|
||||
neigh_suppress = parse_on_off("neigh_suppress", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else if (strcmp(*argv, "vlan_tunnel") == 0) {
|
||||
NEXT_ARG();
|
||||
vlan_tunnel = parse_on_off("vlan_tunnel", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else if (strcmp(*argv, "isolated") == 0) {
|
||||
NEXT_ARG();
|
||||
isolated = parse_on_off("isolated", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else if (strcmp(*argv, "backup_port") == 0) {
|
||||
NEXT_ARG();
|
||||
backup_port_idx = ll_name_to_index(*argv);
|
||||
if (!backup_port_idx) {
|
||||
fprintf(stderr, "Error: device %s does not exist\n",
|
||||
*argv);
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(*argv, "nobackup_port") == 0) {
|
||||
backup_port_idx = 0;
|
||||
} else {
|
||||
usage();
|
||||
}
|
||||
|
|
@ -459,12 +381,6 @@ static int brlink_modify(int argc, char **argv)
|
|||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_PROTECT, root_block);
|
||||
if (flood >= 0)
|
||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_UNICAST_FLOOD, flood);
|
||||
if (mcast_flood >= 0)
|
||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_MCAST_FLOOD,
|
||||
mcast_flood);
|
||||
if (mcast_to_unicast >= 0)
|
||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_MCAST_TO_UCAST,
|
||||
mcast_to_unicast);
|
||||
if (learning >= 0)
|
||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_LEARNING, learning);
|
||||
if (learning_sync >= 0)
|
||||
|
|
@ -480,19 +396,6 @@ static int brlink_modify(int argc, char **argv)
|
|||
if (state >= 0)
|
||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_STATE, state);
|
||||
|
||||
if (neigh_suppress != -1)
|
||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_NEIGH_SUPPRESS,
|
||||
neigh_suppress);
|
||||
if (vlan_tunnel != -1)
|
||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_VLAN_TUNNEL,
|
||||
vlan_tunnel);
|
||||
if (isolated != -1)
|
||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_ISOLATED, isolated);
|
||||
|
||||
if (backup_port_idx != -1)
|
||||
addattr32(&req.n, sizeof(req), IFLA_BRPORT_BACKUP_PORT,
|
||||
backup_port_idx);
|
||||
|
||||
addattr_nest_end(&req.n, nest);
|
||||
|
||||
/* IFLA_AF_SPEC nested attribute. Contains IFLA_BRIDGE_FLAGS that
|
||||
|
|
@ -512,7 +415,7 @@ static int brlink_modify(int argc, char **argv)
|
|||
addattr_nest_end(&req.n, nest);
|
||||
}
|
||||
|
||||
if (rtnl_talk(&rth, &req.n, NULL) < 0)
|
||||
if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
|
|
@ -533,34 +436,22 @@ static int brlink_show(int argc, char **argv)
|
|||
}
|
||||
|
||||
if (filter_dev) {
|
||||
filter_index = ll_name_to_index(filter_dev);
|
||||
if (!filter_index)
|
||||
return nodev(filter_dev);
|
||||
}
|
||||
|
||||
if (show_details) {
|
||||
if (rtnl_linkdump_req_filter(&rth, PF_BRIDGE,
|
||||
(compress_vlans ?
|
||||
RTEXT_FILTER_BRVLAN_COMPRESSED :
|
||||
RTEXT_FILTER_BRVLAN)) < 0) {
|
||||
perror("Cannon send dump request");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
if (rtnl_linkdump_req(&rth, PF_BRIDGE) < 0) {
|
||||
perror("Cannon send dump request");
|
||||
exit(1);
|
||||
if ((filter_index = ll_name_to_index(filter_dev)) == 0) {
|
||||
fprintf(stderr, "Cannot find device \"%s\"\n",
|
||||
filter_dev);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
new_json_obj(json);
|
||||
if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETLINK) < 0) {
|
||||
perror("Cannon send dump request");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (rtnl_dump_filter(&rth, print_linkinfo, stdout) < 0) {
|
||||
fprintf(stderr, "Dump terminated\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
delete_json_obj();
|
||||
fflush(stdout);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
511
bridge/mdb.c
511
bridge/mdb.c
|
|
@ -1,4 +1,3 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Get mdb table with netlink
|
||||
*/
|
||||
|
|
@ -16,306 +15,77 @@
|
|||
#include <arpa/inet.h>
|
||||
|
||||
#include "libnetlink.h"
|
||||
#include "utils.h"
|
||||
#include "br_common.h"
|
||||
#include "rt_names.h"
|
||||
#include "json_print.h"
|
||||
#include "utils.h"
|
||||
|
||||
#ifndef MDBA_RTA
|
||||
#define MDBA_RTA(r) \
|
||||
((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct br_port_msg))))
|
||||
((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct br_port_msg))))
|
||||
#endif
|
||||
|
||||
static unsigned int filter_index, filter_vlan;
|
||||
static unsigned int filter_index;
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n"
|
||||
" bridge mdb {show} [ dev DEV ] [ vid VID ]\n");
|
||||
fprintf(stderr, "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [permanent | temp]\n");
|
||||
fprintf(stderr, " bridge mdb {show} [ dev DEV ]\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
static bool is_temp_mcast_rtr(__u8 type)
|
||||
static void br_print_router_ports(FILE *f, struct rtattr *attr)
|
||||
{
|
||||
return type == MDB_RTR_TYPE_TEMP_QUERY || type == MDB_RTR_TYPE_TEMP;
|
||||
}
|
||||
|
||||
static const char *format_timer(__u32 ticks, int align)
|
||||
{
|
||||
struct timeval tv;
|
||||
static char tbuf[32];
|
||||
|
||||
__jiffies_to_tv(&tv, ticks);
|
||||
if (align)
|
||||
snprintf(tbuf, sizeof(tbuf), "%4lu.%.2lu",
|
||||
(unsigned long)tv.tv_sec,
|
||||
(unsigned long)tv.tv_usec / 10000);
|
||||
else
|
||||
snprintf(tbuf, sizeof(tbuf), "%lu.%.2lu",
|
||||
(unsigned long)tv.tv_sec,
|
||||
(unsigned long)tv.tv_usec / 10000);
|
||||
|
||||
return tbuf;
|
||||
}
|
||||
|
||||
void br_print_router_port_stats(struct rtattr *pattr)
|
||||
{
|
||||
struct rtattr *tb[MDBA_ROUTER_PATTR_MAX + 1];
|
||||
|
||||
parse_rtattr(tb, MDBA_ROUTER_PATTR_MAX, MDB_RTR_RTA(RTA_DATA(pattr)),
|
||||
RTA_PAYLOAD(pattr) - RTA_ALIGN(sizeof(uint32_t)));
|
||||
|
||||
if (tb[MDBA_ROUTER_PATTR_TIMER]) {
|
||||
__u32 timer = rta_getattr_u32(tb[MDBA_ROUTER_PATTR_TIMER]);
|
||||
|
||||
print_string(PRINT_ANY, "timer", " %s",
|
||||
format_timer(timer, 1));
|
||||
}
|
||||
|
||||
if (tb[MDBA_ROUTER_PATTR_TYPE]) {
|
||||
__u8 type = rta_getattr_u8(tb[MDBA_ROUTER_PATTR_TYPE]);
|
||||
|
||||
print_string(PRINT_ANY, "type", " %s",
|
||||
is_temp_mcast_rtr(type) ? "temp" : "permanent");
|
||||
}
|
||||
}
|
||||
|
||||
static void br_print_router_ports(FILE *f, struct rtattr *attr,
|
||||
const char *brifname)
|
||||
{
|
||||
int rem = RTA_PAYLOAD(attr);
|
||||
struct rtattr *i;
|
||||
|
||||
if (is_json_context())
|
||||
open_json_array(PRINT_JSON, brifname);
|
||||
else if (!show_stats)
|
||||
fprintf(f, "router ports on %s: ", brifname);
|
||||
|
||||
for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
|
||||
uint32_t *port_ifindex = RTA_DATA(i);
|
||||
const char *port_ifname = ll_index_to_name(*port_ifindex);
|
||||
|
||||
if (is_json_context()) {
|
||||
open_json_object(NULL);
|
||||
print_string(PRINT_JSON, "port", NULL, port_ifname);
|
||||
|
||||
if (show_stats)
|
||||
br_print_router_port_stats(i);
|
||||
close_json_object();
|
||||
} else if (show_stats) {
|
||||
fprintf(f, "router ports on %s: %s",
|
||||
brifname, port_ifname);
|
||||
|
||||
br_print_router_port_stats(i);
|
||||
fprintf(f, "\n");
|
||||
} else {
|
||||
fprintf(f, "%s ", port_ifname);
|
||||
}
|
||||
}
|
||||
|
||||
if (!show_stats)
|
||||
print_nl();
|
||||
|
||||
close_json_array(PRINT_JSON, NULL);
|
||||
}
|
||||
|
||||
static void print_src_entry(struct rtattr *src_attr, int af, const char *sep)
|
||||
{
|
||||
struct rtattr *stb[MDBA_MDB_SRCATTR_MAX + 1];
|
||||
SPRINT_BUF(abuf);
|
||||
const char *addr;
|
||||
__u32 timer_val;
|
||||
|
||||
parse_rtattr_nested(stb, MDBA_MDB_SRCATTR_MAX, src_attr);
|
||||
if (!stb[MDBA_MDB_SRCATTR_ADDRESS] || !stb[MDBA_MDB_SRCATTR_TIMER])
|
||||
return;
|
||||
|
||||
addr = inet_ntop(af, RTA_DATA(stb[MDBA_MDB_SRCATTR_ADDRESS]), abuf,
|
||||
sizeof(abuf));
|
||||
if (!addr)
|
||||
return;
|
||||
timer_val = rta_getattr_u32(stb[MDBA_MDB_SRCATTR_TIMER]);
|
||||
|
||||
open_json_object(NULL);
|
||||
print_string(PRINT_FP, NULL, "%s", sep);
|
||||
print_color_string(PRINT_ANY, ifa_family_color(af),
|
||||
"address", "%s", addr);
|
||||
print_string(PRINT_ANY, "timer", "/%s", format_timer(timer_val, 0));
|
||||
close_json_object();
|
||||
}
|
||||
|
||||
static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e,
|
||||
struct nlmsghdr *n, struct rtattr **tb)
|
||||
{
|
||||
const void *grp, *src;
|
||||
const char *addr;
|
||||
SPRINT_BUF(abuf);
|
||||
const char *dev;
|
||||
int af;
|
||||
|
||||
if (filter_vlan && e->vid != filter_vlan)
|
||||
return;
|
||||
|
||||
if (!e->addr.proto) {
|
||||
af = AF_PACKET;
|
||||
grp = &e->addr.u.mac_addr;
|
||||
} else if (e->addr.proto == htons(ETH_P_IP)) {
|
||||
af = AF_INET;
|
||||
grp = &e->addr.u.ip4;
|
||||
} else {
|
||||
af = AF_INET6;
|
||||
grp = &e->addr.u.ip6;
|
||||
}
|
||||
dev = ll_index_to_name(ifindex);
|
||||
|
||||
open_json_object(NULL);
|
||||
|
||||
print_int(PRINT_JSON, "index", NULL, ifindex);
|
||||
print_color_string(PRINT_ANY, COLOR_IFNAME, "dev", "dev %s", dev);
|
||||
print_string(PRINT_ANY, "port", " port %s",
|
||||
ll_index_to_name(e->ifindex));
|
||||
|
||||
/* The ETH_ALEN argument is ignored for all cases but AF_PACKET */
|
||||
addr = rt_addr_n2a_r(af, ETH_ALEN, grp, abuf, sizeof(abuf));
|
||||
if (!addr)
|
||||
return;
|
||||
|
||||
print_color_string(PRINT_ANY, ifa_family_color(af),
|
||||
"grp", " grp %s", addr);
|
||||
|
||||
if (tb && tb[MDBA_MDB_EATTR_SOURCE]) {
|
||||
src = (const void *)RTA_DATA(tb[MDBA_MDB_EATTR_SOURCE]);
|
||||
print_color_string(PRINT_ANY, ifa_family_color(af),
|
||||
"src", " src %s",
|
||||
inet_ntop(af, src, abuf, sizeof(abuf)));
|
||||
}
|
||||
print_string(PRINT_ANY, "state", " %s",
|
||||
(e->state & MDB_PERMANENT) ? "permanent" : "temp");
|
||||
if (show_details && tb) {
|
||||
if (tb[MDBA_MDB_EATTR_GROUP_MODE]) {
|
||||
__u8 mode = rta_getattr_u8(tb[MDBA_MDB_EATTR_GROUP_MODE]);
|
||||
|
||||
print_string(PRINT_ANY, "filter_mode", " filter_mode %s",
|
||||
mode == MCAST_INCLUDE ? "include" :
|
||||
"exclude");
|
||||
}
|
||||
if (tb[MDBA_MDB_EATTR_SRC_LIST]) {
|
||||
struct rtattr *i, *attr = tb[MDBA_MDB_EATTR_SRC_LIST];
|
||||
const char *sep = " ";
|
||||
int rem;
|
||||
|
||||
open_json_array(PRINT_ANY, is_json_context() ?
|
||||
"source_list" :
|
||||
" source_list");
|
||||
rem = RTA_PAYLOAD(attr);
|
||||
for (i = RTA_DATA(attr); RTA_OK(i, rem);
|
||||
i = RTA_NEXT(i, rem)) {
|
||||
print_src_entry(i, af, sep);
|
||||
sep = ",";
|
||||
}
|
||||
close_json_array(PRINT_JSON, NULL);
|
||||
}
|
||||
if (tb[MDBA_MDB_EATTR_RTPROT]) {
|
||||
__u8 rtprot = rta_getattr_u8(tb[MDBA_MDB_EATTR_RTPROT]);
|
||||
SPRINT_BUF(rtb);
|
||||
|
||||
print_string(PRINT_ANY, "protocol", " proto %s ",
|
||||
rtnl_rtprot_n2a(rtprot, rtb, sizeof(rtb)));
|
||||
}
|
||||
}
|
||||
|
||||
open_json_array(PRINT_JSON, "flags");
|
||||
if (e->flags & MDB_FLAGS_OFFLOAD)
|
||||
print_string(PRINT_ANY, NULL, " %s", "offload");
|
||||
if (e->flags & MDB_FLAGS_FAST_LEAVE)
|
||||
print_string(PRINT_ANY, NULL, " %s", "fast_leave");
|
||||
if (e->flags & MDB_FLAGS_STAR_EXCL)
|
||||
print_string(PRINT_ANY, NULL, " %s", "added_by_star_ex");
|
||||
if (e->flags & MDB_FLAGS_BLOCKED)
|
||||
print_string(PRINT_ANY, NULL, " %s", "blocked");
|
||||
close_json_array(PRINT_JSON, NULL);
|
||||
|
||||
if (e->vid)
|
||||
print_uint(PRINT_ANY, "vid", " vid %u", e->vid);
|
||||
|
||||
if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) {
|
||||
__u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]);
|
||||
|
||||
print_string(PRINT_ANY, "timer", " %s",
|
||||
format_timer(timer, 1));
|
||||
}
|
||||
|
||||
print_nl();
|
||||
close_json_object();
|
||||
}
|
||||
|
||||
static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr,
|
||||
struct nlmsghdr *n)
|
||||
{
|
||||
struct rtattr *etb[MDBA_MDB_EATTR_MAX + 1];
|
||||
struct br_mdb_entry *e;
|
||||
uint32_t *port_ifindex;
|
||||
struct rtattr *i;
|
||||
int rem;
|
||||
|
||||
rem = RTA_PAYLOAD(attr);
|
||||
for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
|
||||
e = RTA_DATA(i);
|
||||
parse_rtattr_flags(etb, MDBA_MDB_EATTR_MAX, MDB_RTA(RTA_DATA(i)),
|
||||
RTA_PAYLOAD(i) - RTA_ALIGN(sizeof(*e)),
|
||||
NLA_F_NESTED);
|
||||
print_mdb_entry(f, ifindex, e, n, etb);
|
||||
port_ifindex = RTA_DATA(i);
|
||||
fprintf(f, "%s ", ll_index_to_name(*port_ifindex));
|
||||
}
|
||||
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
static void print_mdb_entries(FILE *fp, struct nlmsghdr *n,
|
||||
int ifindex, struct rtattr *mdb)
|
||||
static void print_mdb_entry(FILE *f, int ifindex, struct br_mdb_entry *e)
|
||||
{
|
||||
SPRINT_BUF(abuf);
|
||||
|
||||
if (e->addr.proto == htons(ETH_P_IP))
|
||||
fprintf(f, "dev %s port %s grp %s %s\n", ll_index_to_name(ifindex),
|
||||
ll_index_to_name(e->ifindex),
|
||||
inet_ntop(AF_INET, &e->addr.u.ip4, abuf, sizeof(abuf)),
|
||||
(e->state & MDB_PERMANENT) ? "permanent" : "temp");
|
||||
else
|
||||
fprintf(f, "dev %s port %s grp %s %s\n", ll_index_to_name(ifindex),
|
||||
ll_index_to_name(e->ifindex),
|
||||
inet_ntop(AF_INET6, &e->addr.u.ip6, abuf, sizeof(abuf)),
|
||||
(e->state & MDB_PERMANENT) ? "permanent" : "temp");
|
||||
}
|
||||
|
||||
static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr)
|
||||
{
|
||||
int rem = RTA_PAYLOAD(mdb);
|
||||
struct rtattr *i;
|
||||
int rem;
|
||||
struct br_mdb_entry *e;
|
||||
|
||||
for (i = RTA_DATA(mdb); RTA_OK(i, rem); i = RTA_NEXT(i, rem))
|
||||
br_print_mdb_entry(fp, ifindex, i, n);
|
||||
}
|
||||
|
||||
static void print_router_entries(FILE *fp, struct nlmsghdr *n,
|
||||
int ifindex, struct rtattr *router)
|
||||
{
|
||||
const char *brifname = ll_index_to_name(ifindex);
|
||||
|
||||
if (n->nlmsg_type == RTM_GETMDB) {
|
||||
if (show_details)
|
||||
br_print_router_ports(fp, router, brifname);
|
||||
} else {
|
||||
struct rtattr *i = RTA_DATA(router);
|
||||
uint32_t *port_ifindex = RTA_DATA(i);
|
||||
const char *port_name = ll_index_to_name(*port_ifindex);
|
||||
|
||||
if (is_json_context()) {
|
||||
open_json_array(PRINT_JSON, brifname);
|
||||
open_json_object(NULL);
|
||||
|
||||
print_string(PRINT_JSON, "port", NULL,
|
||||
port_name);
|
||||
close_json_object();
|
||||
close_json_array(PRINT_JSON, NULL);
|
||||
} else {
|
||||
fprintf(fp, "router port dev %s master %s\n",
|
||||
port_name, brifname);
|
||||
}
|
||||
rem = RTA_PAYLOAD(attr);
|
||||
for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
|
||||
e = RTA_DATA(i);
|
||||
print_mdb_entry(f, ifindex, e);
|
||||
}
|
||||
}
|
||||
|
||||
static int __parse_mdb_nlmsg(struct nlmsghdr *n, struct rtattr **tb)
|
||||
int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
|
||||
{
|
||||
FILE *fp = arg;
|
||||
struct br_port_msg *r = NLMSG_DATA(n);
|
||||
int len = n->nlmsg_len;
|
||||
struct rtattr * tb[MDBA_MAX+1];
|
||||
|
||||
if (n->nlmsg_type != RTM_GETMDB &&
|
||||
n->nlmsg_type != RTM_NEWMDB &&
|
||||
n->nlmsg_type != RTM_DELMDB) {
|
||||
fprintf(stderr,
|
||||
"Not RTM_GETMDB, RTM_NEWMDB or RTM_DELMDB: %08x %08x %08x\n",
|
||||
if (n->nlmsg_type != RTM_GETMDB && n->nlmsg_type != RTM_NEWMDB && n->nlmsg_type != RTM_DELMDB) {
|
||||
fprintf(stderr, "Not RTM_GETMDB, RTM_NEWMDB or RTM_DELMDB: %08x %08x %08x\n",
|
||||
n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
|
||||
|
||||
return 0;
|
||||
|
|
@ -332,62 +102,20 @@ static int __parse_mdb_nlmsg(struct nlmsghdr *n, struct rtattr **tb)
|
|||
|
||||
parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
|
||||
|
||||
return 1;
|
||||
}
|
||||
if (tb[MDBA_MDB]) {
|
||||
struct rtattr *i;
|
||||
int rem = RTA_PAYLOAD(tb[MDBA_MDB]);
|
||||
|
||||
static int print_mdbs(struct nlmsghdr *n, void *arg)
|
||||
{
|
||||
struct br_port_msg *r = NLMSG_DATA(n);
|
||||
struct rtattr *tb[MDBA_MAX+1];
|
||||
FILE *fp = arg;
|
||||
int ret;
|
||||
for (i = RTA_DATA(tb[MDBA_MDB]); RTA_OK(i, rem); i = RTA_NEXT(i, rem))
|
||||
br_print_mdb_entry(fp, r->ifindex, i);
|
||||
}
|
||||
|
||||
ret = __parse_mdb_nlmsg(n, tb);
|
||||
if (ret != 1)
|
||||
return ret;
|
||||
|
||||
if (tb[MDBA_MDB])
|
||||
print_mdb_entries(fp, n, r->ifindex, tb[MDBA_MDB]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_rtrs(struct nlmsghdr *n, void *arg)
|
||||
{
|
||||
struct br_port_msg *r = NLMSG_DATA(n);
|
||||
struct rtattr *tb[MDBA_MAX+1];
|
||||
FILE *fp = arg;
|
||||
int ret;
|
||||
|
||||
ret = __parse_mdb_nlmsg(n, tb);
|
||||
if (ret != 1)
|
||||
return ret;
|
||||
|
||||
if (tb[MDBA_ROUTER])
|
||||
print_router_entries(fp, n, r->ifindex, tb[MDBA_ROUTER]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int print_mdb_mon(struct nlmsghdr *n, void *arg)
|
||||
{
|
||||
struct br_port_msg *r = NLMSG_DATA(n);
|
||||
struct rtattr *tb[MDBA_MAX+1];
|
||||
FILE *fp = arg;
|
||||
int ret;
|
||||
|
||||
ret = __parse_mdb_nlmsg(n, tb);
|
||||
if (ret != 1)
|
||||
return ret;
|
||||
|
||||
if (n->nlmsg_type == RTM_DELMDB)
|
||||
print_bool(PRINT_ANY, "deleted", "Deleted ", true);
|
||||
|
||||
if (tb[MDBA_MDB])
|
||||
print_mdb_entries(fp, n, r->ifindex, tb[MDBA_MDB]);
|
||||
|
||||
if (tb[MDBA_ROUTER])
|
||||
print_router_entries(fp, n, r->ifindex, tb[MDBA_ROUTER]);
|
||||
if (tb[MDBA_ROUTER]) {
|
||||
if (show_details) {
|
||||
fprintf(fp, "router ports on %s: ", ll_index_to_name(r->ifindex));
|
||||
br_print_router_ports(fp, tb[MDBA_ROUTER]);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -402,91 +130,49 @@ static int mdb_show(int argc, char **argv)
|
|||
if (filter_dev)
|
||||
duparg("dev", *argv);
|
||||
filter_dev = *argv;
|
||||
} else if (strcmp(*argv, "vid") == 0) {
|
||||
NEXT_ARG();
|
||||
if (filter_vlan)
|
||||
duparg("vid", *argv);
|
||||
filter_vlan = atoi(*argv);
|
||||
}
|
||||
argc--; argv++;
|
||||
}
|
||||
|
||||
if (filter_dev) {
|
||||
filter_index = ll_name_to_index(filter_dev);
|
||||
if (!filter_index)
|
||||
return nodev(filter_dev);
|
||||
filter_index = if_nametoindex(filter_dev);
|
||||
if (filter_index == 0) {
|
||||
fprintf(stderr, "Cannot find device \"%s\"\n",
|
||||
filter_dev);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
new_json_obj(json);
|
||||
open_json_object(NULL);
|
||||
|
||||
/* get mdb entries */
|
||||
if (rtnl_mdbdump_req(&rth, PF_BRIDGE) < 0) {
|
||||
if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETMDB) < 0) {
|
||||
perror("Cannot send dump request");
|
||||
return -1;
|
||||
}
|
||||
|
||||
open_json_array(PRINT_JSON, "mdb");
|
||||
if (rtnl_dump_filter(&rth, print_mdbs, stdout) < 0) {
|
||||
if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) {
|
||||
fprintf(stderr, "Dump terminated\n");
|
||||
return -1;
|
||||
}
|
||||
close_json_array(PRINT_JSON, NULL);
|
||||
|
||||
/* get router ports */
|
||||
if (rtnl_mdbdump_req(&rth, PF_BRIDGE) < 0) {
|
||||
perror("Cannot send dump request");
|
||||
return -1;
|
||||
}
|
||||
|
||||
open_json_object("router");
|
||||
if (rtnl_dump_filter(&rth, print_rtrs, stdout) < 0) {
|
||||
fprintf(stderr, "Dump terminated\n");
|
||||
return -1;
|
||||
}
|
||||
close_json_object();
|
||||
|
||||
close_json_object();
|
||||
delete_json_obj();
|
||||
fflush(stdout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mdb_parse_grp(const char *grp, struct br_mdb_entry *e)
|
||||
{
|
||||
if (inet_pton(AF_INET, grp, &e->addr.u.ip4)) {
|
||||
e->addr.proto = htons(ETH_P_IP);
|
||||
return 0;
|
||||
}
|
||||
if (inet_pton(AF_INET6, grp, &e->addr.u.ip6)) {
|
||||
e->addr.proto = htons(ETH_P_IPV6);
|
||||
return 0;
|
||||
}
|
||||
if (ll_addr_a2n((char *)e->addr.u.mac_addr, sizeof(e->addr.u.mac_addr),
|
||||
grp) == ETH_ALEN) {
|
||||
e->addr.proto = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int mdb_modify(int cmd, int flags, int argc, char **argv)
|
||||
{
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct nlmsghdr n;
|
||||
struct br_port_msg bpm;
|
||||
char buf[1024];
|
||||
} req = {
|
||||
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct br_port_msg)),
|
||||
.n.nlmsg_flags = NLM_F_REQUEST | flags,
|
||||
.n.nlmsg_type = cmd,
|
||||
.bpm.family = PF_BRIDGE,
|
||||
};
|
||||
char *d = NULL, *p = NULL, *grp = NULL, *src = NULL;
|
||||
struct br_mdb_entry entry = {};
|
||||
short vid = 0;
|
||||
char buf[1024];
|
||||
} req;
|
||||
struct br_mdb_entry entry;
|
||||
char *d = NULL, *p = NULL, *grp = NULL;
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
memset(&entry, 0, sizeof(entry));
|
||||
|
||||
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct br_port_msg));
|
||||
req.n.nlmsg_flags = NLM_F_REQUEST|flags;
|
||||
req.n.nlmsg_type = cmd;
|
||||
req.bpm.family = PF_BRIDGE;
|
||||
|
||||
while (argc > 0) {
|
||||
if (strcmp(*argv, "dev") == 0) {
|
||||
|
|
@ -503,12 +189,6 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
|
|||
entry.state |= MDB_PERMANENT;
|
||||
} else if (strcmp(*argv, "temp") == 0) {
|
||||
;/* nothing */
|
||||
} else if (strcmp(*argv, "vid") == 0) {
|
||||
NEXT_ARG();
|
||||
vid = atoi(*argv);
|
||||
} else if (strcmp(*argv, "src") == 0) {
|
||||
NEXT_ARG();
|
||||
src = *argv;
|
||||
} else {
|
||||
if (matches(*argv, "help") == 0)
|
||||
usage();
|
||||
|
|
@ -522,40 +202,29 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
|
|||
}
|
||||
|
||||
req.bpm.ifindex = ll_name_to_index(d);
|
||||
if (!req.bpm.ifindex)
|
||||
return nodev(d);
|
||||
|
||||
entry.ifindex = ll_name_to_index(p);
|
||||
if (!entry.ifindex)
|
||||
return nodev(p);
|
||||
|
||||
if (mdb_parse_grp(grp, &entry)) {
|
||||
fprintf(stderr, "Invalid address \"%s\"\n", grp);
|
||||
if (req.bpm.ifindex == 0) {
|
||||
fprintf(stderr, "Cannot find device \"%s\"\n", d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
entry.vid = vid;
|
||||
addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry));
|
||||
if (src) {
|
||||
struct rtattr *nest = addattr_nest(&req.n, sizeof(req),
|
||||
MDBA_SET_ENTRY_ATTRS);
|
||||
struct in6_addr src_ip6;
|
||||
__be32 src_ip4;
|
||||
|
||||
nest->rta_type |= NLA_F_NESTED;
|
||||
if (!inet_pton(AF_INET, src, &src_ip4)) {
|
||||
if (!inet_pton(AF_INET6, src, &src_ip6)) {
|
||||
fprintf(stderr, "Invalid source address \"%s\"\n", src);
|
||||
return -1;
|
||||
}
|
||||
addattr_l(&req.n, sizeof(req), MDBE_ATTR_SOURCE, &src_ip6, sizeof(src_ip6));
|
||||
} else {
|
||||
addattr32(&req.n, sizeof(req), MDBE_ATTR_SOURCE, src_ip4);
|
||||
}
|
||||
addattr_nest_end(&req.n, nest);
|
||||
entry.ifindex = ll_name_to_index(p);
|
||||
if (entry.ifindex == 0) {
|
||||
fprintf(stderr, "Cannot find device \"%s\"\n", p);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (rtnl_talk(&rth, &req.n, NULL) < 0)
|
||||
if (!inet_pton(AF_INET, grp, &entry.addr.u.ip4)) {
|
||||
if (!inet_pton(AF_INET6, grp, &entry.addr.u.ip6)) {
|
||||
fprintf(stderr, "Invalid address \"%s\"\n", grp);
|
||||
return -1;
|
||||
} else
|
||||
entry.addr.proto = htons(ETH_P_IPV6);
|
||||
} else
|
||||
entry.addr.proto = htons(ETH_P_IP);
|
||||
|
||||
addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry));
|
||||
|
||||
if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -27,15 +27,15 @@
|
|||
|
||||
|
||||
static void usage(void) __attribute__((noreturn));
|
||||
static int prefix_banner;
|
||||
int prefix_banner;
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
fprintf(stderr, "Usage: bridge monitor [file | link | fdb | mdb | vlan | all]\n");
|
||||
fprintf(stderr, "Usage: bridge monitor [file | link | fdb | mdb | all]\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
static int accept_msg(struct rtnl_ctrl_data *ctrl,
|
||||
static int accept_msg(const struct sockaddr_nl *who,
|
||||
struct nlmsghdr *n, void *arg)
|
||||
{
|
||||
FILE *fp = arg;
|
||||
|
|
@ -49,30 +49,24 @@ static int accept_msg(struct rtnl_ctrl_data *ctrl,
|
|||
if (prefix_banner)
|
||||
fprintf(fp, "[LINK]");
|
||||
|
||||
return print_linkinfo(n, arg);
|
||||
return print_linkinfo(who, n, arg);
|
||||
|
||||
case RTM_NEWNEIGH:
|
||||
case RTM_DELNEIGH:
|
||||
if (prefix_banner)
|
||||
fprintf(fp, "[NEIGH]");
|
||||
return print_fdb(n, arg);
|
||||
return print_fdb(who, n, arg);
|
||||
|
||||
case RTM_NEWMDB:
|
||||
case RTM_DELMDB:
|
||||
if (prefix_banner)
|
||||
fprintf(fp, "[MDB]");
|
||||
return print_mdb_mon(n, arg);
|
||||
return print_mdb(who, n, arg);
|
||||
|
||||
case NLMSG_TSTAMP:
|
||||
print_nlmsg_timestamp(fp, n);
|
||||
return 0;
|
||||
|
||||
case RTM_NEWVLAN:
|
||||
case RTM_DELVLAN:
|
||||
if (prefix_banner)
|
||||
fprintf(fp, "[VLAN]");
|
||||
return print_vlan_rtm(n, arg, true, false);
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -81,11 +75,10 @@ static int accept_msg(struct rtnl_ctrl_data *ctrl,
|
|||
int do_monitor(int argc, char **argv)
|
||||
{
|
||||
char *file = NULL;
|
||||
unsigned int groups = ~RTMGRP_TC;
|
||||
int llink = 0;
|
||||
int lneigh = 0;
|
||||
int lmdb = 0;
|
||||
int lvlan = 0;
|
||||
unsigned groups = ~RTMGRP_TC;
|
||||
int llink=0;
|
||||
int lneigh=0;
|
||||
int lmdb=0;
|
||||
|
||||
rtnl_close(&rth);
|
||||
|
||||
|
|
@ -94,7 +87,7 @@ int do_monitor(int argc, char **argv)
|
|||
NEXT_ARG();
|
||||
file = *argv;
|
||||
} else if (matches(*argv, "link") == 0) {
|
||||
llink = 1;
|
||||
llink=1;
|
||||
groups = 0;
|
||||
} else if (matches(*argv, "fdb") == 0) {
|
||||
lneigh = 1;
|
||||
|
|
@ -102,13 +95,9 @@ int do_monitor(int argc, char **argv)
|
|||
} else if (matches(*argv, "mdb") == 0) {
|
||||
lmdb = 1;
|
||||
groups = 0;
|
||||
} else if (matches(*argv, "vlan") == 0) {
|
||||
lvlan = 1;
|
||||
groups = 0;
|
||||
} else if (strcmp(*argv, "all") == 0) {
|
||||
groups = ~RTMGRP_TC;
|
||||
lvlan = 1;
|
||||
prefix_banner = 1;
|
||||
prefix_banner=1;
|
||||
} else if (matches(*argv, "help") == 0) {
|
||||
usage();
|
||||
} else {
|
||||
|
|
@ -132,7 +121,6 @@ int do_monitor(int argc, char **argv)
|
|||
if (file) {
|
||||
FILE *fp;
|
||||
int err;
|
||||
|
||||
fp = fopen(file, "r");
|
||||
if (fp == NULL) {
|
||||
perror("Cannot fopen");
|
||||
|
|
@ -145,12 +133,6 @@ int do_monitor(int argc, char **argv)
|
|||
|
||||
if (rtnl_open(&rth, groups) < 0)
|
||||
exit(1);
|
||||
|
||||
if (lvlan && rtnl_add_nl_group(&rth, RTNLGRP_BRVLAN) < 0) {
|
||||
fprintf(stderr, "Failed to add bridge vlan group to list\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ll_init_map(&rth);
|
||||
|
||||
if (rtnl_listen(&rth, accept_msg, stdout) < 0)
|
||||
|
|
@ -158,3 +140,4 @@ int do_monitor(int argc, char **argv)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
1270
bridge/vlan.c
1270
bridge/vlan.c
File diff suppressed because it is too large
Load Diff
|
|
@ -1,28 +1,38 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#! /bin/bash
|
||||
# This is not an autoconf generated configure
|
||||
|
||||
INCLUDE="$PWD/include"
|
||||
PREFIX="/usr"
|
||||
LIBDIR="\${prefix}/lib"
|
||||
|
||||
# Output file which is input to Makefile
|
||||
CONFIG=config.mk
|
||||
#
|
||||
INCLUDE=${1:-"$PWD/include"}
|
||||
|
||||
# Make a temp directory in build tree.
|
||||
TMPDIR=$(mktemp -d config.XXXXXX)
|
||||
trap 'status=$?; rm -rf $TMPDIR; exit $status' EXIT HUP INT QUIT TERM
|
||||
|
||||
check_prog()
|
||||
{
|
||||
echo -n "$2"
|
||||
command -v $1 >/dev/null 2>&1 && (echo "$3:=y" >> Config; echo "yes") || (echo "no"; return 1)
|
||||
}
|
||||
|
||||
check_docs()
|
||||
{
|
||||
if check_prog latex " latex: " HAVE_LATEX; then
|
||||
check_prog pdflatex " pdflatex: " HAVE_PDFLATEX || echo " WARNING: no PDF docs can be built from LaTeX files"
|
||||
check_prog sgml2latex " sgml2latex: " HAVE_SGML2LATEX || echo " WARNING: no LaTeX files can be build from SGML files"
|
||||
else
|
||||
echo " WARNING: no docs can be built from LaTeX files"
|
||||
fi
|
||||
|
||||
check_prog sgml2html " sgml2html: " HAVE_SGML2HTML || echo " WARNING: no HTML docs can be built from SGML"
|
||||
}
|
||||
|
||||
check_toolchain()
|
||||
{
|
||||
: ${PKG_CONFIG:=pkg-config}
|
||||
: ${AR=ar}
|
||||
: ${CC=gcc}
|
||||
: ${YACC=bison}
|
||||
echo "PKG_CONFIG:=${PKG_CONFIG}" >>$CONFIG
|
||||
echo "AR:=${AR}" >>$CONFIG
|
||||
echo "CC:=${CC}" >>$CONFIG
|
||||
echo "YACC:=${YACC}" >>$CONFIG
|
||||
echo "PKG_CONFIG:=${PKG_CONFIG}" >>Config
|
||||
echo "AR:=${AR}" >>Config
|
||||
echo "CC:=${CC}" >>Config
|
||||
}
|
||||
|
||||
check_atm()
|
||||
|
|
@ -36,8 +46,10 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
EOF
|
||||
|
||||
if $CC -I$INCLUDE -o $TMPDIR/atmtest $TMPDIR/atmtest.c -latm >/dev/null 2>&1; then
|
||||
echo "TC_CONFIG_ATM:=y" >>$CONFIG
|
||||
$CC -I$INCLUDE -o $TMPDIR/atmtest $TMPDIR/atmtest.c -latm >/dev/null 2>&1
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
echo "TC_CONFIG_ATM:=y" >>Config
|
||||
echo yes
|
||||
else
|
||||
echo no
|
||||
|
|
@ -45,13 +57,6 @@ EOF
|
|||
rm -f $TMPDIR/atmtest.c $TMPDIR/atmtest
|
||||
}
|
||||
|
||||
check_xtables()
|
||||
{
|
||||
if ! ${PKG_CONFIG} xtables --exists; then
|
||||
echo "TC_CONFIG_NO_XT:=y" >>$CONFIG
|
||||
fi
|
||||
}
|
||||
|
||||
check_xt()
|
||||
{
|
||||
#check if we have xtables from iptables >= 1.4.5.
|
||||
|
|
@ -75,8 +80,9 @@ int main(int argc, char **argv)
|
|||
EOF
|
||||
|
||||
if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL \
|
||||
$(${PKG_CONFIG} xtables --cflags --libs) -ldl >/dev/null 2>&1; then
|
||||
echo "TC_CONFIG_XT:=y" >>$CONFIG
|
||||
$(${PKG_CONFIG} xtables --cflags --libs) -ldl >/dev/null 2>&1
|
||||
then
|
||||
echo "TC_CONFIG_XT:=y" >>Config
|
||||
echo "using xtables"
|
||||
fi
|
||||
rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
|
||||
|
|
@ -84,10 +90,13 @@ EOF
|
|||
|
||||
check_xt_old()
|
||||
{
|
||||
# bail if previous XT checks has already succeeded.
|
||||
grep -q TC_CONFIG_XT $CONFIG && return
|
||||
# bail if previous XT checks has already succeded.
|
||||
if grep -q TC_CONFIG_XT Config
|
||||
then
|
||||
return
|
||||
fi
|
||||
|
||||
#check if we don't need our internal header ..
|
||||
#check if we dont need our internal header ..
|
||||
cat >$TMPDIR/ipttest.c <<EOF
|
||||
#include <xtables.h>
|
||||
char *lib_dir;
|
||||
|
|
@ -109,8 +118,10 @@ int main(int argc, char **argv) {
|
|||
|
||||
EOF
|
||||
|
||||
if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1; then
|
||||
echo "TC_CONFIG_XT_OLD:=y" >>$CONFIG
|
||||
$CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
echo "TC_CONFIG_XT_OLD:=y" >>Config
|
||||
echo "using old xtables (no need for xt-internal.h)"
|
||||
fi
|
||||
rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
|
||||
|
|
@ -118,8 +129,11 @@ EOF
|
|||
|
||||
check_xt_old_internal_h()
|
||||
{
|
||||
# bail if previous XT checks has already succeeded.
|
||||
grep -q TC_CONFIG_XT $CONFIG && return
|
||||
# bail if previous XT checks has already succeded.
|
||||
if grep -q TC_CONFIG_XT Config
|
||||
then
|
||||
return
|
||||
fi
|
||||
|
||||
#check if we need our own internal.h
|
||||
cat >$TMPDIR/ipttest.c <<EOF
|
||||
|
|
@ -143,25 +157,20 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
|
||||
EOF
|
||||
if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1; then
|
||||
$CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1
|
||||
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
echo "using old xtables with xt-internal.h"
|
||||
echo "TC_CONFIG_XT_OLD_H:=y" >>$CONFIG
|
||||
echo "TC_CONFIG_XT_OLD_H:=y" >>Config
|
||||
fi
|
||||
rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
|
||||
}
|
||||
|
||||
check_lib_dir()
|
||||
{
|
||||
LIBDIR=$(echo $LIBDIR | sed "s|\${prefix}|$PREFIX|")
|
||||
|
||||
echo -n "lib directory: "
|
||||
echo "$LIBDIR"
|
||||
echo "LIBDIR:=$LIBDIR" >> $CONFIG
|
||||
}
|
||||
|
||||
check_ipt()
|
||||
{
|
||||
if ! grep TC_CONFIG_XT $CONFIG > /dev/null; then
|
||||
if ! grep TC_CONFIG_XT Config > /dev/null
|
||||
then
|
||||
echo "using iptables"
|
||||
fi
|
||||
}
|
||||
|
|
@ -171,16 +180,16 @@ check_ipt_lib_dir()
|
|||
IPT_LIB_DIR=$(${PKG_CONFIG} --variable=xtlibdir xtables)
|
||||
if [ -n "$IPT_LIB_DIR" ]; then
|
||||
echo $IPT_LIB_DIR
|
||||
echo "IPT_LIB_DIR:=$IPT_LIB_DIR" >> $CONFIG
|
||||
echo "IPT_LIB_DIR:=$IPT_LIB_DIR" >> Config
|
||||
return
|
||||
fi
|
||||
|
||||
for dir in /lib /usr/lib /usr/local/lib; do
|
||||
for file in "xtables" "iptables"; do
|
||||
file="$dir/$file/lib*t_*so"
|
||||
for dir in /lib /usr/lib /usr/local/lib
|
||||
do
|
||||
for file in $dir/{xtables,iptables}/lib*t_*so ; do
|
||||
if [ -f $file ]; then
|
||||
echo ${file%/*}
|
||||
echo "IPT_LIB_DIR:=${file%/*}" >> $CONFIG
|
||||
echo "IPT_LIB_DIR:=${file%/*}" >> Config
|
||||
return
|
||||
fi
|
||||
done
|
||||
|
|
@ -198,41 +207,17 @@ int main(int argc, char **argv)
|
|||
return 0;
|
||||
}
|
||||
EOF
|
||||
if $CC -I$INCLUDE -o $TMPDIR/setnstest $TMPDIR/setnstest.c >/dev/null 2>&1; then
|
||||
echo "IP_CONFIG_SETNS:=y" >>$CONFIG
|
||||
$CC -I$INCLUDE -o $TMPDIR/setnstest $TMPDIR/setnstest.c >/dev/null 2>&1
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
echo "IP_CONFIG_SETNS:=y" >>Config
|
||||
echo "yes"
|
||||
echo "CFLAGS += -DHAVE_SETNS" >>$CONFIG
|
||||
else
|
||||
echo "no"
|
||||
fi
|
||||
rm -f $TMPDIR/setnstest.c $TMPDIR/setnstest
|
||||
}
|
||||
|
||||
check_name_to_handle_at()
|
||||
{
|
||||
cat >$TMPDIR/name_to_handle_at_test.c <<EOF
|
||||
#define _GNU_SOURCE
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct file_handle *fhp;
|
||||
int mount_id, flags, dirfd;
|
||||
char *pathname;
|
||||
name_to_handle_at(dirfd, pathname, fhp, &mount_id, flags);
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
if $CC -I$INCLUDE -o $TMPDIR/name_to_handle_at_test $TMPDIR/name_to_handle_at_test.c >/dev/null 2>&1; then
|
||||
echo "yes"
|
||||
echo "CFLAGS += -DHAVE_HANDLE_AT" >>$CONFIG
|
||||
else
|
||||
echo "no"
|
||||
fi
|
||||
rm -f $TMPDIR/name_to_handle_at_test.c $TMPDIR/name_to_handle_at_test
|
||||
}
|
||||
|
||||
check_ipset()
|
||||
{
|
||||
cat >$TMPDIR/ipsettest.c <<EOF
|
||||
|
|
@ -244,7 +229,7 @@ typedef unsigned short ip_set_id_t;
|
|||
#include <linux/netfilter/xt_set.h>
|
||||
|
||||
struct xt_set_info info;
|
||||
#if IPSET_PROTOCOL == 6 || IPSET_PROTOCOL == 7
|
||||
#if IPSET_PROTOCOL == 6
|
||||
int main(void)
|
||||
{
|
||||
return IPSET_MAXNAMELEN;
|
||||
|
|
@ -254,8 +239,9 @@ int main(void)
|
|||
#endif
|
||||
EOF
|
||||
|
||||
if $CC -I$INCLUDE -o $TMPDIR/ipsettest $TMPDIR/ipsettest.c >/dev/null 2>&1; then
|
||||
echo "TC_CONFIG_IPSET:=y" >>$CONFIG
|
||||
if $CC -I$INCLUDE -o $TMPDIR/ipsettest $TMPDIR/ipsettest.c >/dev/null 2>&1
|
||||
then
|
||||
echo "TC_CONFIG_IPSET:=y" >>Config
|
||||
echo "yes"
|
||||
else
|
||||
echo "no"
|
||||
|
|
@ -265,131 +251,34 @@ EOF
|
|||
|
||||
check_elf()
|
||||
{
|
||||
if ${PKG_CONFIG} libelf --exists; then
|
||||
echo "HAVE_ELF:=y" >>$CONFIG
|
||||
echo "yes"
|
||||
cat >$TMPDIR/elftest.c <<EOF
|
||||
#include <libelf.h>
|
||||
#include <gelf.h>
|
||||
int main(void)
|
||||
{
|
||||
Elf_Scn *scn;
|
||||
GElf_Shdr shdr;
|
||||
return elf_version(EV_CURRENT);
|
||||
}
|
||||
EOF
|
||||
|
||||
echo 'CFLAGS += -DHAVE_ELF' `${PKG_CONFIG} libelf --cflags` >> $CONFIG
|
||||
echo 'LDLIBS += ' `${PKG_CONFIG} libelf --libs` >>$CONFIG
|
||||
if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1
|
||||
then
|
||||
echo "TC_CONFIG_ELF:=y" >>Config
|
||||
echo "yes"
|
||||
else
|
||||
echo "no"
|
||||
fi
|
||||
}
|
||||
|
||||
have_libbpf_basic()
|
||||
{
|
||||
cat >$TMPDIR/libbpf_test.c <<EOF
|
||||
#include <bpf/libbpf.h>
|
||||
int main(int argc, char **argv) {
|
||||
bpf_program__set_autoload(NULL, false);
|
||||
bpf_map__ifindex(NULL);
|
||||
bpf_map__set_pin_path(NULL, NULL);
|
||||
bpf_object__open_file(NULL, NULL);
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
|
||||
$CC -o $TMPDIR/libbpf_test $TMPDIR/libbpf_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1
|
||||
local ret=$?
|
||||
|
||||
rm -f $TMPDIR/libbpf_test.c $TMPDIR/libbpf_test
|
||||
return $ret
|
||||
}
|
||||
|
||||
have_libbpf_sec_name()
|
||||
{
|
||||
cat >$TMPDIR/libbpf_sec_test.c <<EOF
|
||||
#include <bpf/libbpf.h>
|
||||
int main(int argc, char **argv) {
|
||||
void *ptr;
|
||||
bpf_program__section_name(NULL);
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
|
||||
$CC -o $TMPDIR/libbpf_sec_test $TMPDIR/libbpf_sec_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1
|
||||
local ret=$?
|
||||
|
||||
rm -f $TMPDIR/libbpf_sec_test.c $TMPDIR/libbpf_sec_test
|
||||
return $ret
|
||||
}
|
||||
|
||||
check_force_libbpf_on()
|
||||
{
|
||||
# if set LIBBPF_FORCE=on but no libbpf support, just exist the config
|
||||
# process to make sure we don't build without libbpf.
|
||||
if [ "$LIBBPF_FORCE" = on ]; then
|
||||
echo " LIBBPF_FORCE=on set, but couldn't find a usable libbpf"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_libbpf()
|
||||
{
|
||||
# if set LIBBPF_FORCE=off, disable libbpf entirely
|
||||
if [ "$LIBBPF_FORCE" = off ]; then
|
||||
echo "no"
|
||||
return
|
||||
fi
|
||||
|
||||
if ! ${PKG_CONFIG} libbpf --exists && [ -z "$LIBBPF_DIR" ] ; then
|
||||
echo "no"
|
||||
check_force_libbpf_on
|
||||
return
|
||||
fi
|
||||
|
||||
if [ $(uname -m) = x86_64 ]; then
|
||||
local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib64"
|
||||
else
|
||||
local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib"
|
||||
fi
|
||||
|
||||
if [ -n "$LIBBPF_DIR" ]; then
|
||||
LIBBPF_CFLAGS="-I${LIBBPF_DIR}/usr/include"
|
||||
LIBBPF_LDLIBS="${LIBBPF_LIBDIR}/libbpf.a -lz -lelf"
|
||||
LIBBPF_VERSION=$(PKG_CONFIG_LIBDIR=${LIBBPF_LIBDIR}/pkgconfig ${PKG_CONFIG} libbpf --modversion)
|
||||
else
|
||||
LIBBPF_CFLAGS=$(${PKG_CONFIG} libbpf --cflags)
|
||||
LIBBPF_LDLIBS=$(${PKG_CONFIG} libbpf --libs)
|
||||
LIBBPF_VERSION=$(${PKG_CONFIG} libbpf --modversion)
|
||||
fi
|
||||
|
||||
if ! have_libbpf_basic; then
|
||||
echo "no"
|
||||
echo " libbpf version $LIBBPF_VERSION is too low, please update it to at least 0.1.0"
|
||||
check_force_libbpf_on
|
||||
return
|
||||
else
|
||||
echo "HAVE_LIBBPF:=y" >> $CONFIG
|
||||
echo 'CFLAGS += -DHAVE_LIBBPF ' $LIBBPF_CFLAGS >> $CONFIG
|
||||
echo "CFLAGS += -DLIBBPF_VERSION=\\\"$LIBBPF_VERSION\\\"" >> $CONFIG
|
||||
echo 'LDLIBS += ' $LIBBPF_LDLIBS >> $CONFIG
|
||||
|
||||
if [ -z "$LIBBPF_DIR" ]; then
|
||||
echo "CFLAGS += -DLIBBPF_DYNAMIC" >> $CONFIG
|
||||
fi
|
||||
fi
|
||||
|
||||
# bpf_program__title() is deprecated since libbpf 0.2.0, use
|
||||
# bpf_program__section_name() instead if we support
|
||||
if have_libbpf_sec_name; then
|
||||
echo "HAVE_LIBBPF_SECTION_NAME:=y" >> $CONFIG
|
||||
echo 'CFLAGS += -DHAVE_LIBBPF_SECTION_NAME ' >> $CONFIG
|
||||
fi
|
||||
|
||||
echo "yes"
|
||||
echo " libbpf version $LIBBPF_VERSION"
|
||||
rm -f $TMPDIR/elftest.c $TMPDIR/elftest
|
||||
}
|
||||
|
||||
check_selinux()
|
||||
# SELinux is a compile time option in the ss utility
|
||||
{
|
||||
if ${PKG_CONFIG} libselinux --exists; then
|
||||
echo "HAVE_SELINUX:=y" >>$CONFIG
|
||||
if ${PKG_CONFIG} libselinux --exists
|
||||
then
|
||||
echo "HAVE_SELINUX:=y" >>Config
|
||||
echo "yes"
|
||||
|
||||
echo 'LDLIBS +=' `${PKG_CONFIG} --libs libselinux` >>$CONFIG
|
||||
echo 'CFLAGS += -DHAVE_SELINUX' `${PKG_CONFIG} --cflags libselinux` >>$CONFIG
|
||||
else
|
||||
echo "no"
|
||||
fi
|
||||
|
|
@ -397,174 +286,16 @@ check_selinux()
|
|||
|
||||
check_mnl()
|
||||
{
|
||||
if ${PKG_CONFIG} libmnl --exists; then
|
||||
echo "HAVE_MNL:=y" >>$CONFIG
|
||||
echo "yes"
|
||||
|
||||
echo 'CFLAGS += -DHAVE_LIBMNL' `${PKG_CONFIG} libmnl --cflags` >>$CONFIG
|
||||
echo 'LDLIBS +=' `${PKG_CONFIG} libmnl --libs` >> $CONFIG
|
||||
if ${PKG_CONFIG} libmnl --exists
|
||||
then
|
||||
echo "HAVE_MNL:=y" >>Config
|
||||
echo -n "yes"
|
||||
else
|
||||
echo "no"
|
||||
echo -n "no"
|
||||
fi
|
||||
}
|
||||
|
||||
check_berkeley_db()
|
||||
{
|
||||
cat >$TMPDIR/dbtest.c <<EOF
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <db_185.h>
|
||||
int main(int argc, char **argv) {
|
||||
dbopen("/tmp/xxx_test_db.db", O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
if $CC -I$INCLUDE -o $TMPDIR/dbtest $TMPDIR/dbtest.c -ldb >/dev/null 2>&1; then
|
||||
echo "HAVE_BERKELEY_DB:=y" >>$CONFIG
|
||||
echo "yes"
|
||||
else
|
||||
echo "no"
|
||||
fi
|
||||
rm -f $TMPDIR/dbtest.c $TMPDIR/dbtest
|
||||
}
|
||||
|
||||
check_strlcpy()
|
||||
{
|
||||
cat >$TMPDIR/strtest.c <<EOF
|
||||
#include <string.h>
|
||||
int main(int argc, char **argv) {
|
||||
char dst[10];
|
||||
strlcpy(dst, "test", sizeof(dst));
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
if $CC -I$INCLUDE -o $TMPDIR/strtest $TMPDIR/strtest.c >/dev/null 2>&1; then
|
||||
echo "no"
|
||||
else
|
||||
if ${PKG_CONFIG} libbsd --exists; then
|
||||
echo 'CFLAGS += -DHAVE_LIBBSD' `${PKG_CONFIG} libbsd --cflags` >>$CONFIG
|
||||
echo 'LDLIBS +=' `${PKG_CONFIG} libbsd --libs` >> $CONFIG
|
||||
echo "no"
|
||||
else
|
||||
echo 'CFLAGS += -DNEED_STRLCPY' >>$CONFIG
|
||||
echo "yes"
|
||||
fi
|
||||
fi
|
||||
rm -f $TMPDIR/strtest.c $TMPDIR/strtest
|
||||
}
|
||||
|
||||
check_cap()
|
||||
{
|
||||
if ${PKG_CONFIG} libcap --exists; then
|
||||
echo "HAVE_CAP:=y" >>$CONFIG
|
||||
echo "yes"
|
||||
|
||||
echo 'CFLAGS += -DHAVE_LIBCAP' `${PKG_CONFIG} libcap --cflags` >>$CONFIG
|
||||
echo 'LDLIBS +=' `${PKG_CONFIG} libcap --libs` >> $CONFIG
|
||||
else
|
||||
echo "no"
|
||||
fi
|
||||
}
|
||||
|
||||
quiet_config()
|
||||
{
|
||||
cat <<EOF
|
||||
# user can control verbosity similar to kernel builds (e.g., V=1)
|
||||
ifeq ("\$(origin V)", "command line")
|
||||
VERBOSE = \$(V)
|
||||
endif
|
||||
ifndef VERBOSE
|
||||
VERBOSE = 0
|
||||
endif
|
||||
ifeq (\$(VERBOSE),1)
|
||||
Q =
|
||||
else
|
||||
Q = @
|
||||
endif
|
||||
|
||||
ifeq (\$(VERBOSE), 0)
|
||||
QUIET_CC = @echo ' CC '\$@;
|
||||
QUIET_AR = @echo ' AR '\$@;
|
||||
QUIET_LINK = @echo ' LINK '\$@;
|
||||
QUIET_YACC = @echo ' YACC '\$@;
|
||||
QUIET_LEX = @echo ' LEX '\$@;
|
||||
endif
|
||||
EOF
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
--include_dir <dir> Path to iproute2 include dir
|
||||
--libdir <dir> Path to iproute2 lib dir
|
||||
--libbpf_dir <dir> Path to libbpf DESTDIR
|
||||
--libbpf_force <on|off> Enable/disable libbpf by force. Available options:
|
||||
on: require link against libbpf, quit config if no libbpf support
|
||||
off: disable libbpf probing
|
||||
--prefix <dir> Path prefix of the lib files to install
|
||||
-h | --help Show this usage info
|
||||
EOF
|
||||
exit $1
|
||||
}
|
||||
|
||||
# Compat with the old INCLUDE path setting method.
|
||||
if [ $# -eq 1 ] && [ "$(echo $1 | cut -c 1)" != '-' ]; then
|
||||
INCLUDE="$1"
|
||||
else
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
--include_dir)
|
||||
shift
|
||||
INCLUDE="$1" ;;
|
||||
--include_dir=*)
|
||||
INCLUDE="${1#*=}" ;;
|
||||
--libdir)
|
||||
shift
|
||||
LIBDIR="$1" ;;
|
||||
--libdir=*)
|
||||
LIBDIR="${1#*=}" ;;
|
||||
--libbpf_dir)
|
||||
shift
|
||||
LIBBPF_DIR="$1" ;;
|
||||
--libbpf_dir=*)
|
||||
LIBBPF_DIR="${1#*=}" ;;
|
||||
--libbpf_force)
|
||||
shift
|
||||
LIBBPF_FORCE="$1" ;;
|
||||
--libbpf_force=*)
|
||||
LIBBPF_FORCE="${1#*=}" ;;
|
||||
--prefix)
|
||||
shift
|
||||
PREFIX="$1" ;;
|
||||
--prefix=*)
|
||||
PREFIX="${1#*=}" ;;
|
||||
-h | --help)
|
||||
usage 0 ;;
|
||||
--*)
|
||||
;;
|
||||
*)
|
||||
usage 1 ;;
|
||||
esac
|
||||
[ "$#" -gt 0 ] && shift
|
||||
done
|
||||
fi
|
||||
|
||||
[ -d "$INCLUDE" ] || usage 1
|
||||
if [ "${LIBBPF_DIR-unused}" != "unused" ]; then
|
||||
[ -d "$LIBBPF_DIR" ] || usage 1
|
||||
fi
|
||||
if [ "${LIBBPF_FORCE-unused}" != "unused" ]; then
|
||||
if [ "$LIBBPF_FORCE" != 'on' ] && [ "$LIBBPF_FORCE" != 'off' ]; then
|
||||
usage 1
|
||||
fi
|
||||
fi
|
||||
[ -z "$PREFIX" ] && usage 1
|
||||
[ -z "$LIBDIR" ] && usage 1
|
||||
|
||||
echo "# Generated config based on" $INCLUDE >$CONFIG
|
||||
quiet_config >> $CONFIG
|
||||
|
||||
echo "# Generated config based on" $INCLUDE >Config
|
||||
check_toolchain
|
||||
|
||||
echo "TC schedulers"
|
||||
|
|
@ -572,52 +303,31 @@ echo "TC schedulers"
|
|||
echo -n " ATM "
|
||||
check_atm
|
||||
|
||||
check_xtables
|
||||
if ! grep -q TC_CONFIG_NO_XT $CONFIG; then
|
||||
echo -n " IPT "
|
||||
check_xt
|
||||
check_xt_old
|
||||
check_xt_old_internal_h
|
||||
check_ipt
|
||||
echo -n " IPT "
|
||||
check_xt
|
||||
check_xt_old
|
||||
check_xt_old_internal_h
|
||||
check_ipt
|
||||
|
||||
echo -n " IPSET "
|
||||
check_ipset
|
||||
fi
|
||||
echo -n " IPSET "
|
||||
check_ipset
|
||||
|
||||
echo
|
||||
check_lib_dir
|
||||
if ! grep -q TC_CONFIG_NO_XT $CONFIG; then
|
||||
echo -n "iptables modules directory: "
|
||||
check_ipt_lib_dir
|
||||
fi
|
||||
echo -n -e "\niptables modules directory: "
|
||||
check_ipt_lib_dir
|
||||
|
||||
echo -n "libc has setns: "
|
||||
check_setns
|
||||
|
||||
echo -n "libc has name_to_handle_at: "
|
||||
check_name_to_handle_at
|
||||
|
||||
echo -n "SELinux support: "
|
||||
check_selinux
|
||||
|
||||
echo -n "libbpf support: "
|
||||
check_libbpf
|
||||
|
||||
echo -n "ELF support: "
|
||||
check_elf
|
||||
|
||||
echo -n "libmnl support: "
|
||||
check_mnl
|
||||
echo " (required by tipc)"
|
||||
|
||||
echo -n "Berkeley DB: "
|
||||
check_berkeley_db
|
||||
|
||||
echo -n "need for strlcpy: "
|
||||
check_strlcpy
|
||||
|
||||
echo -n "libcap support: "
|
||||
check_cap
|
||||
|
||||
echo >> $CONFIG
|
||||
echo "%.o: %.c" >> $CONFIG
|
||||
echo ' $(QUIET_CC)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(CPPFLAGS) -c -o $@ $<' >> $CONFIG
|
||||
echo -e "\nDocs"
|
||||
check_docs
|
||||
echo ""
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
dcb
|
||||
31
dcb/Makefile
31
dcb/Makefile
|
|
@ -1,31 +0,0 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
include ../config.mk
|
||||
|
||||
TARGETS :=
|
||||
|
||||
ifeq ($(HAVE_MNL),y)
|
||||
|
||||
DCBOBJ = dcb.o \
|
||||
dcb_app.o \
|
||||
dcb_buffer.o \
|
||||
dcb_dcbx.o \
|
||||
dcb_ets.o \
|
||||
dcb_maxrate.o \
|
||||
dcb_pfc.o
|
||||
TARGETS += dcb
|
||||
LDLIBS += -lm
|
||||
|
||||
endif
|
||||
|
||||
all: $(TARGETS) $(LIBS)
|
||||
|
||||
dcb: $(DCBOBJ) $(LIBNETLINK)
|
||||
$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
|
||||
|
||||
install: all
|
||||
for i in $(TARGETS); \
|
||||
do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \
|
||||
done
|
||||
|
||||
clean:
|
||||
rm -f $(DCBOBJ) $(TARGETS)
|
||||
611
dcb/dcb.c
611
dcb/dcb.c
|
|
@ -1,611 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <linux/dcbnl.h>
|
||||
#include <libmnl/libmnl.h>
|
||||
#include <getopt.h>
|
||||
|
||||
#include "dcb.h"
|
||||
#include "mnl_utils.h"
|
||||
#include "namespace.h"
|
||||
#include "utils.h"
|
||||
#include "version.h"
|
||||
|
||||
static int dcb_init(struct dcb *dcb)
|
||||
{
|
||||
dcb->buf = malloc(MNL_SOCKET_BUFFER_SIZE);
|
||||
if (dcb->buf == NULL) {
|
||||
perror("Netlink buffer allocation");
|
||||
return -1;
|
||||
}
|
||||
|
||||
dcb->nl = mnlu_socket_open(NETLINK_ROUTE);
|
||||
if (dcb->nl == NULL) {
|
||||
perror("Open netlink socket");
|
||||
goto err_socket_open;
|
||||
}
|
||||
|
||||
new_json_obj_plain(dcb->json_output);
|
||||
return 0;
|
||||
|
||||
err_socket_open:
|
||||
free(dcb->buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void dcb_fini(struct dcb *dcb)
|
||||
{
|
||||
delete_json_obj_plain();
|
||||
mnl_socket_close(dcb->nl);
|
||||
free(dcb->buf);
|
||||
}
|
||||
|
||||
static struct dcb *dcb_alloc(void)
|
||||
{
|
||||
struct dcb *dcb;
|
||||
|
||||
dcb = calloc(1, sizeof(*dcb));
|
||||
if (!dcb)
|
||||
return NULL;
|
||||
return dcb;
|
||||
}
|
||||
|
||||
static void dcb_free(struct dcb *dcb)
|
||||
{
|
||||
free(dcb);
|
||||
}
|
||||
|
||||
struct dcb_get_attribute {
|
||||
struct dcb *dcb;
|
||||
int attr;
|
||||
void *payload;
|
||||
__u16 payload_len;
|
||||
};
|
||||
|
||||
static int dcb_get_attribute_attr_ieee_cb(const struct nlattr *attr, void *data)
|
||||
{
|
||||
struct dcb_get_attribute *ga = data;
|
||||
|
||||
if (mnl_attr_get_type(attr) != ga->attr)
|
||||
return MNL_CB_OK;
|
||||
|
||||
ga->payload = mnl_attr_get_payload(attr);
|
||||
ga->payload_len = mnl_attr_get_payload_len(attr);
|
||||
return MNL_CB_STOP;
|
||||
}
|
||||
|
||||
static int dcb_get_attribute_attr_cb(const struct nlattr *attr, void *data)
|
||||
{
|
||||
if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE)
|
||||
return MNL_CB_OK;
|
||||
|
||||
return mnl_attr_parse_nested(attr, dcb_get_attribute_attr_ieee_cb, data);
|
||||
}
|
||||
|
||||
static int dcb_get_attribute_cb(const struct nlmsghdr *nlh, void *data)
|
||||
{
|
||||
return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_get_attribute_attr_cb, data);
|
||||
}
|
||||
|
||||
static int dcb_get_attribute_bare_cb(const struct nlmsghdr *nlh, void *data)
|
||||
{
|
||||
/* Bare attributes (e.g. DCB_ATTR_DCBX) are not wrapped inside an IEEE
|
||||
* container, so this does not have to go through unpacking in
|
||||
* dcb_get_attribute_attr_cb().
|
||||
*/
|
||||
return mnl_attr_parse(nlh, sizeof(struct dcbmsg),
|
||||
dcb_get_attribute_attr_ieee_cb, data);
|
||||
}
|
||||
|
||||
struct dcb_set_attribute_response {
|
||||
int response_attr;
|
||||
};
|
||||
|
||||
static int dcb_set_attribute_attr_cb(const struct nlattr *attr, void *data)
|
||||
{
|
||||
struct dcb_set_attribute_response *resp = data;
|
||||
uint16_t len;
|
||||
uint8_t err;
|
||||
|
||||
if (mnl_attr_get_type(attr) != resp->response_attr)
|
||||
return MNL_CB_OK;
|
||||
|
||||
len = mnl_attr_get_payload_len(attr);
|
||||
if (len != 1) {
|
||||
fprintf(stderr, "Response attribute expected to have size 1, not %d\n", len);
|
||||
return MNL_CB_ERROR;
|
||||
}
|
||||
|
||||
err = mnl_attr_get_u8(attr);
|
||||
if (err) {
|
||||
fprintf(stderr, "Error when attempting to set attribute: %s\n",
|
||||
strerror(err));
|
||||
return MNL_CB_ERROR;
|
||||
}
|
||||
|
||||
return MNL_CB_STOP;
|
||||
}
|
||||
|
||||
static int dcb_set_attribute_cb(const struct nlmsghdr *nlh, void *data)
|
||||
{
|
||||
return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_set_attribute_attr_cb, data);
|
||||
}
|
||||
|
||||
static int dcb_talk(struct dcb *dcb, struct nlmsghdr *nlh, mnl_cb_t cb, void *data)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = mnl_socket_sendto(dcb->nl, nlh, nlh->nlmsg_len);
|
||||
if (ret < 0) {
|
||||
perror("mnl_socket_sendto");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return mnlu_socket_recv_run(dcb->nl, nlh->nlmsg_seq, dcb->buf, MNL_SOCKET_BUFFER_SIZE,
|
||||
cb, data);
|
||||
}
|
||||
|
||||
static struct nlmsghdr *dcb_prepare(struct dcb *dcb, const char *dev,
|
||||
uint32_t nlmsg_type, uint8_t dcb_cmd)
|
||||
{
|
||||
struct dcbmsg dcbm = {
|
||||
.cmd = dcb_cmd,
|
||||
};
|
||||
struct nlmsghdr *nlh;
|
||||
|
||||
nlh = mnlu_msg_prepare(dcb->buf, nlmsg_type, NLM_F_REQUEST, &dcbm, sizeof(dcbm));
|
||||
mnl_attr_put_strz(nlh, DCB_ATTR_IFNAME, dev);
|
||||
return nlh;
|
||||
}
|
||||
|
||||
static int __dcb_get_attribute(struct dcb *dcb, int command,
|
||||
const char *dev, int attr,
|
||||
void **payload_p, __u16 *payload_len_p,
|
||||
int (*get_attribute_cb)(const struct nlmsghdr *nlh,
|
||||
void *data))
|
||||
{
|
||||
struct dcb_get_attribute ga;
|
||||
struct nlmsghdr *nlh;
|
||||
int ret;
|
||||
|
||||
nlh = dcb_prepare(dcb, dev, RTM_GETDCB, command);
|
||||
|
||||
ga = (struct dcb_get_attribute) {
|
||||
.dcb = dcb,
|
||||
.attr = attr,
|
||||
.payload = NULL,
|
||||
};
|
||||
ret = dcb_talk(dcb, nlh, get_attribute_cb, &ga);
|
||||
if (ret) {
|
||||
perror("Attribute read");
|
||||
return ret;
|
||||
}
|
||||
if (ga.payload == NULL) {
|
||||
perror("Attribute not found");
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
*payload_p = ga.payload;
|
||||
*payload_len_p = ga.payload_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr,
|
||||
void **payload_p, __u16 *payload_len_p)
|
||||
{
|
||||
return __dcb_get_attribute(dcb, DCB_CMD_IEEE_GET, dev, attr,
|
||||
payload_p, payload_len_p,
|
||||
dcb_get_attribute_cb);
|
||||
}
|
||||
|
||||
int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr,
|
||||
void **payload_p, __u16 *payload_len_p)
|
||||
{
|
||||
return __dcb_get_attribute(dcb, cmd, dev, attr,
|
||||
payload_p, payload_len_p,
|
||||
dcb_get_attribute_bare_cb);
|
||||
}
|
||||
|
||||
int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr, void *data, size_t data_len)
|
||||
{
|
||||
__u16 payload_len;
|
||||
void *payload;
|
||||
int ret;
|
||||
|
||||
ret = dcb_get_attribute_va(dcb, dev, attr, &payload, &payload_len);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (payload_len != data_len) {
|
||||
fprintf(stderr, "Wrong len %d, expected %zd\n", payload_len, data_len);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(data, payload, data_len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __dcb_set_attribute(struct dcb *dcb, int command, const char *dev,
|
||||
int (*cb)(struct dcb *, struct nlmsghdr *, void *),
|
||||
void *data, int response_attr)
|
||||
{
|
||||
struct dcb_set_attribute_response resp = {
|
||||
.response_attr = response_attr,
|
||||
};
|
||||
struct nlmsghdr *nlh;
|
||||
int ret;
|
||||
|
||||
nlh = dcb_prepare(dcb, dev, RTM_SETDCB, command);
|
||||
|
||||
ret = cb(dcb, nlh, data);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = dcb_talk(dcb, nlh, dcb_set_attribute_cb, &resp);
|
||||
if (ret) {
|
||||
perror("Attribute write");
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct dcb_set_attribute_ieee_cb {
|
||||
int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data);
|
||||
void *data;
|
||||
};
|
||||
|
||||
static int dcb_set_attribute_ieee_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
|
||||
{
|
||||
struct dcb_set_attribute_ieee_cb *ieee_data = data;
|
||||
struct nlattr *nest;
|
||||
int ret;
|
||||
|
||||
nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE);
|
||||
ret = ieee_data->cb(dcb, nlh, ieee_data->data);
|
||||
if (ret)
|
||||
return ret;
|
||||
mnl_attr_nest_end(nlh, nest);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev,
|
||||
int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data),
|
||||
void *data)
|
||||
{
|
||||
struct dcb_set_attribute_ieee_cb ieee_data = {
|
||||
.cb = cb,
|
||||
.data = data,
|
||||
};
|
||||
|
||||
return __dcb_set_attribute(dcb, command, dev,
|
||||
&dcb_set_attribute_ieee_cb, &ieee_data,
|
||||
DCB_ATTR_IEEE);
|
||||
}
|
||||
|
||||
struct dcb_set_attribute {
|
||||
int attr;
|
||||
const void *data;
|
||||
size_t data_len;
|
||||
};
|
||||
|
||||
static int dcb_set_attribute_put(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
|
||||
{
|
||||
struct dcb_set_attribute *dsa = data;
|
||||
|
||||
mnl_attr_put(nlh, dsa->attr, dsa->data_len, dsa->data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr, const void *data, size_t data_len)
|
||||
{
|
||||
struct dcb_set_attribute dsa = {
|
||||
.attr = attr,
|
||||
.data = data,
|
||||
.data_len = data_len,
|
||||
};
|
||||
|
||||
return dcb_set_attribute_va(dcb, DCB_CMD_IEEE_SET, dev,
|
||||
&dcb_set_attribute_put, &dsa);
|
||||
}
|
||||
|
||||
int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev,
|
||||
int attr, const void *data, size_t data_len,
|
||||
int response_attr)
|
||||
{
|
||||
struct dcb_set_attribute dsa = {
|
||||
.attr = attr,
|
||||
.data = data,
|
||||
.data_len = data_len,
|
||||
};
|
||||
|
||||
return __dcb_set_attribute(dcb, command, dev,
|
||||
&dcb_set_attribute_put, &dsa, response_attr);
|
||||
}
|
||||
|
||||
void dcb_print_array_u8(const __u8 *array, size_t size)
|
||||
{
|
||||
SPRINT_BUF(b);
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
snprintf(b, sizeof(b), "%zd:%%d ", i);
|
||||
print_uint(PRINT_ANY, NULL, b, array[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void dcb_print_array_u64(const __u64 *array, size_t size)
|
||||
{
|
||||
SPRINT_BUF(b);
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
snprintf(b, sizeof(b), "%zd:%%" PRIu64 " ", i);
|
||||
print_u64(PRINT_ANY, NULL, b, array[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void dcb_print_array_on_off(const __u8 *array, size_t size)
|
||||
{
|
||||
SPRINT_BUF(b);
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
snprintf(b, sizeof(b), "%zd:%%s ", i);
|
||||
print_on_off(PRINT_ANY, NULL, b, array[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void dcb_print_array_kw(const __u8 *array, size_t array_size,
|
||||
const char *const kw[], size_t kw_size)
|
||||
{
|
||||
SPRINT_BUF(b);
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < array_size; i++) {
|
||||
__u8 emt = array[i];
|
||||
|
||||
snprintf(b, sizeof(b), "%zd:%%s ", i);
|
||||
if (emt < kw_size && kw[emt])
|
||||
print_string(PRINT_ANY, NULL, b, kw[emt]);
|
||||
else
|
||||
print_string(PRINT_ANY, NULL, b, "???");
|
||||
}
|
||||
}
|
||||
|
||||
void dcb_print_named_array(const char *json_name, const char *fp_name,
|
||||
const __u8 *array, size_t size,
|
||||
void (*print_array)(const __u8 *, size_t))
|
||||
{
|
||||
open_json_array(PRINT_JSON, json_name);
|
||||
print_string(PRINT_FP, NULL, "%s ", fp_name);
|
||||
print_array(array, size);
|
||||
close_json_array(PRINT_JSON, json_name);
|
||||
}
|
||||
|
||||
int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key,
|
||||
const char *what_value, __u64 value, __u64 max_value,
|
||||
void (*set_array)(__u32 index, __u64 value, void *data),
|
||||
void *set_array_data)
|
||||
{
|
||||
bool is_all = key == (__u32) -1;
|
||||
|
||||
if (!is_all && key > max_key) {
|
||||
fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%d\n",
|
||||
what_key, what_value, what_key, max_key);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (value > max_value) {
|
||||
fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%llu\n",
|
||||
what_key, what_value, what_value, max_value);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (is_all) {
|
||||
for (key = 0; key <= max_key; key++)
|
||||
set_array(key, value, set_array_data);
|
||||
} else {
|
||||
set_array(key, value, set_array_data);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dcb_set_u8(__u32 key, __u64 value, void *data)
|
||||
{
|
||||
__u8 *array = data;
|
||||
|
||||
array[key] = value;
|
||||
}
|
||||
|
||||
void dcb_set_u32(__u32 key, __u64 value, void *data)
|
||||
{
|
||||
__u32 *array = data;
|
||||
|
||||
array[key] = value;
|
||||
}
|
||||
|
||||
void dcb_set_u64(__u32 key, __u64 value, void *data)
|
||||
{
|
||||
__u64 *array = data;
|
||||
|
||||
array[key] = value;
|
||||
}
|
||||
|
||||
int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv,
|
||||
int (*and_then)(struct dcb *dcb, const char *dev,
|
||||
int argc, char **argv),
|
||||
void (*help)(void))
|
||||
{
|
||||
const char *dev;
|
||||
|
||||
if (!argc || matches(*argv, "help") == 0) {
|
||||
help();
|
||||
return 0;
|
||||
} else if (matches(*argv, "dev") == 0) {
|
||||
NEXT_ARG();
|
||||
dev = *argv;
|
||||
if (check_ifname(dev)) {
|
||||
invarg("not a valid ifname", *argv);
|
||||
return -EINVAL;
|
||||
}
|
||||
NEXT_ARG_FWD();
|
||||
return and_then(dcb, dev, argc, argv);
|
||||
} else {
|
||||
fprintf(stderr, "Expected `dev DEV', not `%s'", *argv);
|
||||
help();
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static void dcb_help(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb [ OPTIONS ] OBJECT { COMMAND | help }\n"
|
||||
" dcb [ -f | --force ] { -b | --batch } filename [ -n | --netns ] netnsname\n"
|
||||
"where OBJECT := { app | buffer | dcbx | ets | maxrate | pfc }\n"
|
||||
" OPTIONS := [ -V | --Version | -i | --iec | -j | --json\n"
|
||||
" | -N | --Numeric | -p | --pretty\n"
|
||||
" | -s | --statistics | -v | --verbose]\n");
|
||||
}
|
||||
|
||||
static int dcb_cmd(struct dcb *dcb, int argc, char **argv)
|
||||
{
|
||||
if (!argc || matches(*argv, "help") == 0) {
|
||||
dcb_help();
|
||||
return 0;
|
||||
} else if (matches(*argv, "app") == 0) {
|
||||
return dcb_cmd_app(dcb, argc - 1, argv + 1);
|
||||
} else if (matches(*argv, "buffer") == 0) {
|
||||
return dcb_cmd_buffer(dcb, argc - 1, argv + 1);
|
||||
} else if (matches(*argv, "dcbx") == 0) {
|
||||
return dcb_cmd_dcbx(dcb, argc - 1, argv + 1);
|
||||
} else if (matches(*argv, "ets") == 0) {
|
||||
return dcb_cmd_ets(dcb, argc - 1, argv + 1);
|
||||
} else if (matches(*argv, "maxrate") == 0) {
|
||||
return dcb_cmd_maxrate(dcb, argc - 1, argv + 1);
|
||||
} else if (matches(*argv, "pfc") == 0) {
|
||||
return dcb_cmd_pfc(dcb, argc - 1, argv + 1);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Object \"%s\" is unknown\n", *argv);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int dcb_batch_cmd(int argc, char *argv[], void *data)
|
||||
{
|
||||
struct dcb *dcb = data;
|
||||
|
||||
return dcb_cmd(dcb, argc, argv);
|
||||
}
|
||||
|
||||
static int dcb_batch(struct dcb *dcb, const char *name, bool force)
|
||||
{
|
||||
return do_batch(name, force, dcb_batch_cmd, dcb);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
static const struct option long_options[] = {
|
||||
{ "Version", no_argument, NULL, 'V' },
|
||||
{ "force", no_argument, NULL, 'f' },
|
||||
{ "batch", required_argument, NULL, 'b' },
|
||||
{ "iec", no_argument, NULL, 'i' },
|
||||
{ "json", no_argument, NULL, 'j' },
|
||||
{ "Numeric", no_argument, NULL, 'N' },
|
||||
{ "pretty", no_argument, NULL, 'p' },
|
||||
{ "statistics", no_argument, NULL, 's' },
|
||||
{ "netns", required_argument, NULL, 'n' },
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
const char *batch_file = NULL;
|
||||
bool force = false;
|
||||
struct dcb *dcb;
|
||||
int opt;
|
||||
int err;
|
||||
int ret;
|
||||
|
||||
dcb = dcb_alloc();
|
||||
if (!dcb) {
|
||||
fprintf(stderr, "Failed to allocate memory for dcb\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "b:fhijn:psvNV",
|
||||
long_options, NULL)) >= 0) {
|
||||
|
||||
switch (opt) {
|
||||
case 'V':
|
||||
printf("dcb utility, iproute2-%s\n", version);
|
||||
ret = EXIT_SUCCESS;
|
||||
goto dcb_free;
|
||||
case 'f':
|
||||
force = true;
|
||||
break;
|
||||
case 'b':
|
||||
batch_file = optarg;
|
||||
break;
|
||||
case 'j':
|
||||
dcb->json_output = true;
|
||||
break;
|
||||
case 'N':
|
||||
dcb->numeric = true;
|
||||
break;
|
||||
case 'p':
|
||||
pretty = true;
|
||||
break;
|
||||
case 's':
|
||||
dcb->stats = true;
|
||||
break;
|
||||
case 'n':
|
||||
if (netns_switch(optarg)) {
|
||||
ret = EXIT_FAILURE;
|
||||
goto dcb_free;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
dcb->use_iec = true;
|
||||
break;
|
||||
case 'h':
|
||||
dcb_help();
|
||||
ret = EXIT_SUCCESS;
|
||||
goto dcb_free;
|
||||
default:
|
||||
fprintf(stderr, "Unknown option.\n");
|
||||
dcb_help();
|
||||
ret = EXIT_FAILURE;
|
||||
goto dcb_free;
|
||||
}
|
||||
}
|
||||
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
err = dcb_init(dcb);
|
||||
if (err) {
|
||||
ret = EXIT_FAILURE;
|
||||
goto dcb_free;
|
||||
}
|
||||
|
||||
if (batch_file)
|
||||
err = dcb_batch(dcb, batch_file, force);
|
||||
else
|
||||
err = dcb_cmd(dcb, argc, argv);
|
||||
|
||||
if (err) {
|
||||
ret = EXIT_FAILURE;
|
||||
goto dcb_fini;
|
||||
}
|
||||
|
||||
ret = EXIT_SUCCESS;
|
||||
|
||||
dcb_fini:
|
||||
dcb_fini(dcb);
|
||||
dcb_free:
|
||||
dcb_free(dcb);
|
||||
|
||||
return ret;
|
||||
}
|
||||
81
dcb/dcb.h
81
dcb/dcb.h
|
|
@ -1,81 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __DCB_H__
|
||||
#define __DCB_H__ 1
|
||||
|
||||
#include <libmnl/libmnl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/* dcb.c */
|
||||
|
||||
struct dcb {
|
||||
char *buf;
|
||||
struct mnl_socket *nl;
|
||||
bool json_output;
|
||||
bool stats;
|
||||
bool use_iec;
|
||||
bool numeric;
|
||||
};
|
||||
|
||||
int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key,
|
||||
const char *what_value, __u64 value, __u64 max_value,
|
||||
void (*set_array)(__u32 index, __u64 value, void *data),
|
||||
void *set_array_data);
|
||||
int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv,
|
||||
int (*and_then)(struct dcb *dcb, const char *dev,
|
||||
int argc, char **argv),
|
||||
void (*help)(void));
|
||||
|
||||
void dcb_set_u8(__u32 key, __u64 value, void *data);
|
||||
void dcb_set_u32(__u32 key, __u64 value, void *data);
|
||||
void dcb_set_u64(__u32 key, __u64 value, void *data);
|
||||
|
||||
int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr,
|
||||
void *data, size_t data_len);
|
||||
int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr,
|
||||
const void *data, size_t data_len);
|
||||
int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr,
|
||||
void **payload_p, __u16 *payload_len_p);
|
||||
int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev,
|
||||
int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data),
|
||||
void *data);
|
||||
int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr,
|
||||
void **payload_p, __u16 *payload_len_p);
|
||||
int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev,
|
||||
int attr, const void *data, size_t data_len,
|
||||
int response_attr);
|
||||
|
||||
void dcb_print_named_array(const char *json_name, const char *fp_name,
|
||||
const __u8 *array, size_t size,
|
||||
void (*print_array)(const __u8 *, size_t));
|
||||
void dcb_print_array_u8(const __u8 *array, size_t size);
|
||||
void dcb_print_array_u64(const __u64 *array, size_t size);
|
||||
void dcb_print_array_on_off(const __u8 *array, size_t size);
|
||||
void dcb_print_array_kw(const __u8 *array, size_t array_size,
|
||||
const char *const kw[], size_t kw_size);
|
||||
|
||||
/* dcb_app.c */
|
||||
|
||||
int dcb_cmd_app(struct dcb *dcb, int argc, char **argv);
|
||||
|
||||
/* dcb_buffer.c */
|
||||
|
||||
int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv);
|
||||
|
||||
/* dcb_dcbx.c */
|
||||
|
||||
int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv);
|
||||
|
||||
/* dcb_ets.c */
|
||||
|
||||
int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv);
|
||||
|
||||
/* dcb_maxrate.c */
|
||||
|
||||
int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv);
|
||||
|
||||
/* dcb_pfc.c */
|
||||
|
||||
int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv);
|
||||
|
||||
#endif /* __DCB_H__ */
|
||||
795
dcb/dcb_app.c
795
dcb/dcb_app.c
|
|
@ -1,795 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <libmnl/libmnl.h>
|
||||
#include <linux/dcbnl.h>
|
||||
|
||||
#include "dcb.h"
|
||||
#include "utils.h"
|
||||
#include "rt_names.h"
|
||||
|
||||
static void dcb_app_help_add(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb app { add | del | replace } dev STRING\n"
|
||||
" [ default-prio PRIO ]\n"
|
||||
" [ ethtype-prio ET:PRIO ]\n"
|
||||
" [ stream-port-prio PORT:PRIO ]\n"
|
||||
" [ dgram-port-prio PORT:PRIO ]\n"
|
||||
" [ port-prio PORT:PRIO ]\n"
|
||||
" [ dscp-prio INTEGER:PRIO ]\n"
|
||||
"\n"
|
||||
" where PRIO := { 0 .. 7 }\n"
|
||||
" ET := { 0x600 .. 0xffff }\n"
|
||||
" PORT := { 1 .. 65535 }\n"
|
||||
" DSCP := { 0 .. 63 }\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_app_help_show_flush(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb app { show | flush } dev STRING\n"
|
||||
" [ default-prio ]\n"
|
||||
" [ ethtype-prio ]\n"
|
||||
" [ stream-port-prio ]\n"
|
||||
" [ dgram-port-prio ]\n"
|
||||
" [ port-prio ]\n"
|
||||
" [ dscp-prio ]\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_app_help(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb app help\n"
|
||||
"\n"
|
||||
);
|
||||
dcb_app_help_show_flush();
|
||||
dcb_app_help_add();
|
||||
}
|
||||
|
||||
struct dcb_app_table {
|
||||
struct dcb_app *apps;
|
||||
size_t n_apps;
|
||||
};
|
||||
|
||||
static void dcb_app_table_fini(struct dcb_app_table *tab)
|
||||
{
|
||||
free(tab->apps);
|
||||
}
|
||||
|
||||
static int dcb_app_table_push(struct dcb_app_table *tab, struct dcb_app *app)
|
||||
{
|
||||
struct dcb_app *apps = realloc(tab->apps, (tab->n_apps + 1) * sizeof(*tab->apps));
|
||||
|
||||
if (apps == NULL) {
|
||||
perror("Cannot allocate APP table");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
tab->apps = apps;
|
||||
tab->apps[tab->n_apps++] = *app;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dcb_app_table_remove_existing(struct dcb_app_table *a,
|
||||
const struct dcb_app_table *b)
|
||||
{
|
||||
size_t ia, ja;
|
||||
size_t ib;
|
||||
|
||||
for (ia = 0, ja = 0; ia < a->n_apps; ia++) {
|
||||
struct dcb_app *aa = &a->apps[ia];
|
||||
bool found = false;
|
||||
|
||||
for (ib = 0; ib < b->n_apps; ib++) {
|
||||
const struct dcb_app *ab = &b->apps[ib];
|
||||
|
||||
if (aa->selector == ab->selector &&
|
||||
aa->protocol == ab->protocol &&
|
||||
aa->priority == ab->priority) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
a->apps[ja++] = *aa;
|
||||
}
|
||||
|
||||
a->n_apps = ja;
|
||||
}
|
||||
|
||||
static void dcb_app_table_remove_replaced(struct dcb_app_table *a,
|
||||
const struct dcb_app_table *b)
|
||||
{
|
||||
size_t ia, ja;
|
||||
size_t ib;
|
||||
|
||||
for (ia = 0, ja = 0; ia < a->n_apps; ia++) {
|
||||
struct dcb_app *aa = &a->apps[ia];
|
||||
bool present = false;
|
||||
bool found = false;
|
||||
|
||||
for (ib = 0; ib < b->n_apps; ib++) {
|
||||
const struct dcb_app *ab = &b->apps[ib];
|
||||
|
||||
if (aa->selector == ab->selector &&
|
||||
aa->protocol == ab->protocol)
|
||||
present = true;
|
||||
else
|
||||
continue;
|
||||
|
||||
if (aa->priority == ab->priority) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Entries that remain in A will be removed, so keep in the
|
||||
* table only APP entries whose sel/pid is mentioned in B,
|
||||
* but that do not have the full sel/pid/prio match.
|
||||
*/
|
||||
if (present && !found)
|
||||
a->apps[ja++] = *aa;
|
||||
}
|
||||
|
||||
a->n_apps = ja;
|
||||
}
|
||||
|
||||
static int dcb_app_table_copy(struct dcb_app_table *a,
|
||||
const struct dcb_app_table *b)
|
||||
{
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < b->n_apps; i++) {
|
||||
ret = dcb_app_table_push(a, &b->apps[i]);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dcb_app_cmp(const struct dcb_app *a, const struct dcb_app *b)
|
||||
{
|
||||
if (a->protocol < b->protocol)
|
||||
return -1;
|
||||
if (a->protocol > b->protocol)
|
||||
return 1;
|
||||
return a->priority - b->priority;
|
||||
}
|
||||
|
||||
static int dcb_app_cmp_cb(const void *a, const void *b)
|
||||
{
|
||||
return dcb_app_cmp(a, b);
|
||||
}
|
||||
|
||||
static void dcb_app_table_sort(struct dcb_app_table *tab)
|
||||
{
|
||||
qsort(tab->apps, tab->n_apps, sizeof(*tab->apps), dcb_app_cmp_cb);
|
||||
}
|
||||
|
||||
struct dcb_app_parse_mapping {
|
||||
__u8 selector;
|
||||
struct dcb_app_table *tab;
|
||||
int err;
|
||||
};
|
||||
|
||||
static void dcb_app_parse_mapping_cb(__u32 key, __u64 value, void *data)
|
||||
{
|
||||
struct dcb_app_parse_mapping *pm = data;
|
||||
struct dcb_app app = {
|
||||
.selector = pm->selector,
|
||||
.priority = value,
|
||||
.protocol = key,
|
||||
};
|
||||
|
||||
if (pm->err)
|
||||
return;
|
||||
|
||||
pm->err = dcb_app_table_push(pm->tab, &app);
|
||||
}
|
||||
|
||||
static int dcb_app_parse_mapping_ethtype_prio(__u32 key, char *value, void *data)
|
||||
{
|
||||
__u8 prio;
|
||||
|
||||
if (key < 0x600) {
|
||||
fprintf(stderr, "Protocol IDs < 0x600 are reserved for EtherType\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (get_u8(&prio, value, 0))
|
||||
return -EINVAL;
|
||||
|
||||
return dcb_parse_mapping("ETHTYPE", key, 0xffff,
|
||||
"PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
|
||||
dcb_app_parse_mapping_cb, data);
|
||||
}
|
||||
|
||||
static int dcb_app_parse_dscp(__u32 *key, const char *arg)
|
||||
{
|
||||
if (parse_mapping_num_all(key, arg) == 0)
|
||||
return 0;
|
||||
|
||||
if (rtnl_dsfield_a2n(key, arg) != 0)
|
||||
return -1;
|
||||
|
||||
if (*key & 0x03) {
|
||||
fprintf(stderr, "The values `%s' uses non-DSCP bits.\n", arg);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Unshift the value to convert it from dsfield to DSCP. */
|
||||
*key >>= 2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dcb_app_parse_mapping_dscp_prio(__u32 key, char *value, void *data)
|
||||
{
|
||||
__u8 prio;
|
||||
|
||||
if (get_u8(&prio, value, 0))
|
||||
return -EINVAL;
|
||||
|
||||
return dcb_parse_mapping("DSCP", key, 63,
|
||||
"PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
|
||||
dcb_app_parse_mapping_cb, data);
|
||||
}
|
||||
|
||||
static int dcb_app_parse_mapping_port_prio(__u32 key, char *value, void *data)
|
||||
{
|
||||
__u8 prio;
|
||||
|
||||
if (key == 0) {
|
||||
fprintf(stderr, "Port ID of 0 is invalid\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (get_u8(&prio, value, 0))
|
||||
return -EINVAL;
|
||||
|
||||
return dcb_parse_mapping("PORT", key, 0xffff,
|
||||
"PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
|
||||
dcb_app_parse_mapping_cb, data);
|
||||
}
|
||||
|
||||
static int dcb_app_parse_default_prio(int *argcp, char ***argvp, struct dcb_app_table *tab)
|
||||
{
|
||||
int argc = *argcp;
|
||||
char **argv = *argvp;
|
||||
int ret = 0;
|
||||
|
||||
while (argc > 0) {
|
||||
struct dcb_app app;
|
||||
__u8 prio;
|
||||
|
||||
if (get_u8(&prio, *argv, 0)) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
app = (struct dcb_app){
|
||||
.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE,
|
||||
.protocol = 0,
|
||||
.priority = prio,
|
||||
};
|
||||
ret = dcb_app_table_push(tab, &app);
|
||||
if (ret != 0)
|
||||
break;
|
||||
|
||||
argc--, argv++;
|
||||
}
|
||||
|
||||
*argcp = argc;
|
||||
*argvp = argv;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool dcb_app_is_ethtype(const struct dcb_app *app)
|
||||
{
|
||||
return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
|
||||
app->protocol != 0;
|
||||
}
|
||||
|
||||
static bool dcb_app_is_default(const struct dcb_app *app)
|
||||
{
|
||||
return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
|
||||
app->protocol == 0;
|
||||
}
|
||||
|
||||
static bool dcb_app_is_dscp(const struct dcb_app *app)
|
||||
{
|
||||
return app->selector == IEEE_8021QAZ_APP_SEL_DSCP;
|
||||
}
|
||||
|
||||
static bool dcb_app_is_stream_port(const struct dcb_app *app)
|
||||
{
|
||||
return app->selector == IEEE_8021QAZ_APP_SEL_STREAM;
|
||||
}
|
||||
|
||||
static bool dcb_app_is_dgram_port(const struct dcb_app *app)
|
||||
{
|
||||
return app->selector == IEEE_8021QAZ_APP_SEL_DGRAM;
|
||||
}
|
||||
|
||||
static bool dcb_app_is_port(const struct dcb_app *app)
|
||||
{
|
||||
return app->selector == IEEE_8021QAZ_APP_SEL_ANY;
|
||||
}
|
||||
|
||||
static int dcb_app_print_key_dec(__u16 protocol)
|
||||
{
|
||||
return print_uint(PRINT_ANY, NULL, "%d:", protocol);
|
||||
}
|
||||
|
||||
static int dcb_app_print_key_hex(__u16 protocol)
|
||||
{
|
||||
return print_uint(PRINT_ANY, NULL, "%x:", protocol);
|
||||
}
|
||||
|
||||
static int dcb_app_print_key_dscp(__u16 protocol)
|
||||
{
|
||||
const char *name = rtnl_dsfield_get_name(protocol << 2);
|
||||
|
||||
|
||||
if (!is_json_context() && name != NULL)
|
||||
return print_string(PRINT_FP, NULL, "%s:", name);
|
||||
return print_uint(PRINT_ANY, NULL, "%d:", protocol);
|
||||
}
|
||||
|
||||
static void dcb_app_print_filtered(const struct dcb_app_table *tab,
|
||||
bool (*filter)(const struct dcb_app *),
|
||||
int (*print_key)(__u16 protocol),
|
||||
const char *json_name,
|
||||
const char *fp_name)
|
||||
{
|
||||
bool first = true;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < tab->n_apps; i++) {
|
||||
struct dcb_app *app = &tab->apps[i];
|
||||
|
||||
if (!filter(app))
|
||||
continue;
|
||||
if (first) {
|
||||
open_json_array(PRINT_JSON, json_name);
|
||||
print_string(PRINT_FP, NULL, "%s ", fp_name);
|
||||
first = false;
|
||||
}
|
||||
|
||||
open_json_array(PRINT_JSON, NULL);
|
||||
print_key(app->protocol);
|
||||
print_uint(PRINT_ANY, NULL, "%d ", app->priority);
|
||||
close_json_array(PRINT_JSON, NULL);
|
||||
}
|
||||
|
||||
if (!first) {
|
||||
close_json_array(PRINT_JSON, json_name);
|
||||
print_nl();
|
||||
}
|
||||
}
|
||||
|
||||
static void dcb_app_print_ethtype_prio(const struct dcb_app_table *tab)
|
||||
{
|
||||
dcb_app_print_filtered(tab, dcb_app_is_ethtype, dcb_app_print_key_hex,
|
||||
"ethtype_prio", "ethtype-prio");
|
||||
}
|
||||
|
||||
static void dcb_app_print_dscp_prio(const struct dcb *dcb,
|
||||
const struct dcb_app_table *tab)
|
||||
{
|
||||
dcb_app_print_filtered(tab, dcb_app_is_dscp,
|
||||
dcb->numeric ? dcb_app_print_key_dec
|
||||
: dcb_app_print_key_dscp,
|
||||
"dscp_prio", "dscp-prio");
|
||||
}
|
||||
|
||||
static void dcb_app_print_stream_port_prio(const struct dcb_app_table *tab)
|
||||
{
|
||||
dcb_app_print_filtered(tab, dcb_app_is_stream_port, dcb_app_print_key_dec,
|
||||
"stream_port_prio", "stream-port-prio");
|
||||
}
|
||||
|
||||
static void dcb_app_print_dgram_port_prio(const struct dcb_app_table *tab)
|
||||
{
|
||||
dcb_app_print_filtered(tab, dcb_app_is_dgram_port, dcb_app_print_key_dec,
|
||||
"dgram_port_prio", "dgram-port-prio");
|
||||
}
|
||||
|
||||
static void dcb_app_print_port_prio(const struct dcb_app_table *tab)
|
||||
{
|
||||
dcb_app_print_filtered(tab, dcb_app_is_port, dcb_app_print_key_dec,
|
||||
"port_prio", "port-prio");
|
||||
}
|
||||
|
||||
static void dcb_app_print_default_prio(const struct dcb_app_table *tab)
|
||||
{
|
||||
bool first = true;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < tab->n_apps; i++) {
|
||||
if (!dcb_app_is_default(&tab->apps[i]))
|
||||
continue;
|
||||
if (first) {
|
||||
open_json_array(PRINT_JSON, "default_prio");
|
||||
print_string(PRINT_FP, NULL, "default-prio ", NULL);
|
||||
first = false;
|
||||
}
|
||||
print_uint(PRINT_ANY, NULL, "%d ", tab->apps[i].priority);
|
||||
}
|
||||
|
||||
if (!first) {
|
||||
close_json_array(PRINT_JSON, "default_prio");
|
||||
print_nl();
|
||||
}
|
||||
}
|
||||
|
||||
static void dcb_app_print(const struct dcb *dcb, const struct dcb_app_table *tab)
|
||||
{
|
||||
dcb_app_print_ethtype_prio(tab);
|
||||
dcb_app_print_default_prio(tab);
|
||||
dcb_app_print_dscp_prio(dcb, tab);
|
||||
dcb_app_print_stream_port_prio(tab);
|
||||
dcb_app_print_dgram_port_prio(tab);
|
||||
dcb_app_print_port_prio(tab);
|
||||
}
|
||||
|
||||
static int dcb_app_get_table_attr_cb(const struct nlattr *attr, void *data)
|
||||
{
|
||||
struct dcb_app_table *tab = data;
|
||||
struct dcb_app *app;
|
||||
int ret;
|
||||
|
||||
if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE_APP) {
|
||||
fprintf(stderr, "Unknown attribute in DCB_ATTR_IEEE_APP_TABLE: %d\n",
|
||||
mnl_attr_get_type(attr));
|
||||
return MNL_CB_OK;
|
||||
}
|
||||
if (mnl_attr_get_payload_len(attr) < sizeof(struct dcb_app)) {
|
||||
fprintf(stderr, "DCB_ATTR_IEEE_APP payload expected to have size %zd, not %d\n",
|
||||
sizeof(struct dcb_app), mnl_attr_get_payload_len(attr));
|
||||
return MNL_CB_OK;
|
||||
}
|
||||
|
||||
app = mnl_attr_get_payload(attr);
|
||||
ret = dcb_app_table_push(tab, app);
|
||||
if (ret != 0)
|
||||
return MNL_CB_ERROR;
|
||||
|
||||
return MNL_CB_OK;
|
||||
}
|
||||
|
||||
static int dcb_app_get(struct dcb *dcb, const char *dev, struct dcb_app_table *tab)
|
||||
{
|
||||
uint16_t payload_len;
|
||||
void *payload;
|
||||
int ret;
|
||||
|
||||
ret = dcb_get_attribute_va(dcb, dev, DCB_ATTR_IEEE_APP_TABLE, &payload, &payload_len);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = mnl_attr_parse_payload(payload, payload_len, dcb_app_get_table_attr_cb, tab);
|
||||
if (ret != MNL_CB_OK)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct dcb_app_add_del {
|
||||
const struct dcb_app_table *tab;
|
||||
bool (*filter)(const struct dcb_app *app);
|
||||
};
|
||||
|
||||
static int dcb_app_add_del_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
|
||||
{
|
||||
struct dcb_app_add_del *add_del = data;
|
||||
struct nlattr *nest;
|
||||
size_t i;
|
||||
|
||||
nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE_APP_TABLE);
|
||||
|
||||
for (i = 0; i < add_del->tab->n_apps; i++) {
|
||||
const struct dcb_app *app = &add_del->tab->apps[i];
|
||||
|
||||
if (add_del->filter == NULL || add_del->filter(app))
|
||||
mnl_attr_put(nlh, DCB_ATTR_IEEE_APP, sizeof(*app), app);
|
||||
}
|
||||
|
||||
mnl_attr_nest_end(nlh, nest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dcb_app_add_del(struct dcb *dcb, const char *dev, int command,
|
||||
const struct dcb_app_table *tab,
|
||||
bool (*filter)(const struct dcb_app *))
|
||||
{
|
||||
struct dcb_app_add_del add_del = {
|
||||
.tab = tab,
|
||||
.filter = filter,
|
||||
};
|
||||
|
||||
if (tab->n_apps == 0)
|
||||
return 0;
|
||||
|
||||
return dcb_set_attribute_va(dcb, command, dev, dcb_app_add_del_cb, &add_del);
|
||||
}
|
||||
|
||||
static int dcb_cmd_app_parse_add_del(struct dcb *dcb, const char *dev,
|
||||
int argc, char **argv, struct dcb_app_table *tab)
|
||||
{
|
||||
struct dcb_app_parse_mapping pm = {
|
||||
.tab = tab,
|
||||
};
|
||||
int ret;
|
||||
|
||||
if (!argc) {
|
||||
dcb_app_help_add();
|
||||
return 0;
|
||||
}
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_app_help_add();
|
||||
return 0;
|
||||
} else if (matches(*argv, "ethtype-prio") == 0) {
|
||||
NEXT_ARG();
|
||||
pm.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
|
||||
ret = parse_mapping(&argc, &argv, false,
|
||||
&dcb_app_parse_mapping_ethtype_prio,
|
||||
&pm);
|
||||
} else if (matches(*argv, "default-prio") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = dcb_app_parse_default_prio(&argc, &argv, pm.tab);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Invalid default priority %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
} else if (matches(*argv, "dscp-prio") == 0) {
|
||||
NEXT_ARG();
|
||||
pm.selector = IEEE_8021QAZ_APP_SEL_DSCP;
|
||||
ret = parse_mapping_gen(&argc, &argv,
|
||||
&dcb_app_parse_dscp,
|
||||
&dcb_app_parse_mapping_dscp_prio,
|
||||
&pm);
|
||||
} else if (matches(*argv, "stream-port-prio") == 0) {
|
||||
NEXT_ARG();
|
||||
pm.selector = IEEE_8021QAZ_APP_SEL_STREAM;
|
||||
ret = parse_mapping(&argc, &argv, false,
|
||||
&dcb_app_parse_mapping_port_prio,
|
||||
&pm);
|
||||
} else if (matches(*argv, "dgram-port-prio") == 0) {
|
||||
NEXT_ARG();
|
||||
pm.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
|
||||
ret = parse_mapping(&argc, &argv, false,
|
||||
&dcb_app_parse_mapping_port_prio,
|
||||
&pm);
|
||||
} else if (matches(*argv, "port-prio") == 0) {
|
||||
NEXT_ARG();
|
||||
pm.selector = IEEE_8021QAZ_APP_SEL_ANY;
|
||||
ret = parse_mapping(&argc, &argv, false,
|
||||
&dcb_app_parse_mapping_port_prio,
|
||||
&pm);
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_app_help_add();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Invalid mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
if (pm.err)
|
||||
return pm.err;
|
||||
} while (argc > 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dcb_cmd_app_add(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct dcb_app_table tab = {};
|
||||
int ret;
|
||||
|
||||
ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &tab, NULL);
|
||||
dcb_app_table_fini(&tab);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dcb_cmd_app_del(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct dcb_app_table tab = {};
|
||||
int ret;
|
||||
|
||||
ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL);
|
||||
dcb_app_table_fini(&tab);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dcb_cmd_app_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct dcb_app_table tab = {};
|
||||
int ret;
|
||||
|
||||
ret = dcb_app_get(dcb, dev, &tab);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
dcb_app_table_sort(&tab);
|
||||
|
||||
open_json_object(NULL);
|
||||
|
||||
if (!argc) {
|
||||
dcb_app_print(dcb, &tab);
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_app_help_show_flush();
|
||||
goto out;
|
||||
} else if (matches(*argv, "ethtype-prio") == 0) {
|
||||
dcb_app_print_ethtype_prio(&tab);
|
||||
} else if (matches(*argv, "dscp-prio") == 0) {
|
||||
dcb_app_print_dscp_prio(dcb, &tab);
|
||||
} else if (matches(*argv, "stream-port-prio") == 0) {
|
||||
dcb_app_print_stream_port_prio(&tab);
|
||||
} else if (matches(*argv, "dgram-port-prio") == 0) {
|
||||
dcb_app_print_dgram_port_prio(&tab);
|
||||
} else if (matches(*argv, "port-prio") == 0) {
|
||||
dcb_app_print_port_prio(&tab);
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_app_help_show_flush();
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
out:
|
||||
close_json_object();
|
||||
dcb_app_table_fini(&tab);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dcb_cmd_app_flush(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct dcb_app_table tab = {};
|
||||
int ret;
|
||||
|
||||
ret = dcb_app_get(dcb, dev, &tab);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
if (!argc) {
|
||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL);
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_app_help_show_flush();
|
||||
goto out;
|
||||
} else if (matches(*argv, "ethtype-prio") == 0) {
|
||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
|
||||
&dcb_app_is_ethtype);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
} else if (matches(*argv, "default-prio") == 0) {
|
||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
|
||||
&dcb_app_is_default);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
} else if (matches(*argv, "dscp-prio") == 0) {
|
||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
|
||||
&dcb_app_is_dscp);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_app_help_show_flush();
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
out:
|
||||
dcb_app_table_fini(&tab);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dcb_cmd_app_replace(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct dcb_app_table orig = {};
|
||||
struct dcb_app_table tab = {};
|
||||
struct dcb_app_table new = {};
|
||||
int ret;
|
||||
|
||||
ret = dcb_app_get(dcb, dev, &orig);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
|
||||
/* Attempts to add an existing entry would be rejected, so drop
|
||||
* these entries from tab.
|
||||
*/
|
||||
ret = dcb_app_table_copy(&new, &tab);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
dcb_app_table_remove_existing(&new, &orig);
|
||||
|
||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &new, NULL);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Could not add new APP entries\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Remove the obsolete entries. */
|
||||
dcb_app_table_remove_replaced(&orig, &tab);
|
||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &orig, NULL);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "Could not remove replaced APP entries\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
dcb_app_table_fini(&new);
|
||||
dcb_app_table_fini(&tab);
|
||||
dcb_app_table_fini(&orig);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dcb_cmd_app(struct dcb *dcb, int argc, char **argv)
|
||||
{
|
||||
if (!argc || matches(*argv, "help") == 0) {
|
||||
dcb_app_help();
|
||||
return 0;
|
||||
} else if (matches(*argv, "show") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_app_show, dcb_app_help_show_flush);
|
||||
} else if (matches(*argv, "flush") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_app_flush, dcb_app_help_show_flush);
|
||||
} else if (matches(*argv, "add") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_app_add, dcb_app_help_add);
|
||||
} else if (matches(*argv, "del") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_app_del, dcb_app_help_add);
|
||||
} else if (matches(*argv, "replace") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_app_replace, dcb_app_help_add);
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_app_help();
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
235
dcb/dcb_buffer.c
235
dcb/dcb_buffer.c
|
|
@ -1,235 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <linux/dcbnl.h>
|
||||
|
||||
#include "dcb.h"
|
||||
#include "utils.h"
|
||||
|
||||
static void dcb_buffer_help_set(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb buffer set dev STRING\n"
|
||||
" [ prio-buffer PRIO-MAP ]\n"
|
||||
" [ buffer-size SIZE-MAP ]\n"
|
||||
"\n"
|
||||
" where PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n"
|
||||
" PRIO-MAPPING := { all | PRIO }:BUFFER\n"
|
||||
" SIZE-MAP := [ SIZE-MAP ] SIZE-MAPPING\n"
|
||||
" SIZE-MAPPING := { all | BUFFER }:INTEGER\n"
|
||||
" PRIO := { 0 .. 7 }\n"
|
||||
" BUFFER := { 0 .. 7 }\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_buffer_help_show(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb buffer show dev STRING\n"
|
||||
" [ prio-buffer ] [ buffer-size ] [ total-size ]\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_buffer_help(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb buffer help\n"
|
||||
"\n"
|
||||
);
|
||||
dcb_buffer_help_show();
|
||||
dcb_buffer_help_set();
|
||||
}
|
||||
|
||||
static int dcb_buffer_parse_mapping_prio_buffer(__u32 key, char *value, void *data)
|
||||
{
|
||||
struct dcbnl_buffer *buffer = data;
|
||||
__u8 buf;
|
||||
|
||||
if (get_u8(&buf, value, 0))
|
||||
return -EINVAL;
|
||||
|
||||
return dcb_parse_mapping("PRIO", key, IEEE_8021Q_MAX_PRIORITIES - 1,
|
||||
"BUFFER", buf, DCBX_MAX_BUFFERS - 1,
|
||||
dcb_set_u8, buffer->prio2buffer);
|
||||
}
|
||||
|
||||
static int dcb_buffer_parse_mapping_buffer_size(__u32 key, char *value, void *data)
|
||||
{
|
||||
struct dcbnl_buffer *buffer = data;
|
||||
unsigned int size;
|
||||
|
||||
if (get_size(&size, value)) {
|
||||
fprintf(stderr, "%d:%s: Illegal value for buffer size\n", key, value);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return dcb_parse_mapping("BUFFER", key, DCBX_MAX_BUFFERS - 1,
|
||||
"INTEGER", size, -1,
|
||||
dcb_set_u32, buffer->buffer_size);
|
||||
}
|
||||
|
||||
static void dcb_buffer_print_total_size(const struct dcbnl_buffer *buffer)
|
||||
{
|
||||
print_size(PRINT_ANY, "total_size", "total-size %s ", buffer->total_size);
|
||||
}
|
||||
|
||||
static void dcb_buffer_print_prio_buffer(const struct dcbnl_buffer *buffer)
|
||||
{
|
||||
dcb_print_named_array("prio_buffer", "prio-buffer",
|
||||
buffer->prio2buffer, ARRAY_SIZE(buffer->prio2buffer),
|
||||
dcb_print_array_u8);
|
||||
}
|
||||
|
||||
static void dcb_buffer_print_buffer_size(const struct dcbnl_buffer *buffer)
|
||||
{
|
||||
size_t size = ARRAY_SIZE(buffer->buffer_size);
|
||||
SPRINT_BUF(b);
|
||||
size_t i;
|
||||
|
||||
open_json_array(PRINT_JSON, "buffer_size");
|
||||
print_string(PRINT_FP, NULL, "buffer-size ", NULL);
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
snprintf(b, sizeof(b), "%zd:%%s ", i);
|
||||
print_size(PRINT_ANY, NULL, b, buffer->buffer_size[i]);
|
||||
}
|
||||
|
||||
close_json_array(PRINT_JSON, "buffer_size");
|
||||
}
|
||||
|
||||
static void dcb_buffer_print(const struct dcbnl_buffer *buffer)
|
||||
{
|
||||
dcb_buffer_print_prio_buffer(buffer);
|
||||
print_nl();
|
||||
|
||||
dcb_buffer_print_buffer_size(buffer);
|
||||
print_nl();
|
||||
|
||||
dcb_buffer_print_total_size(buffer);
|
||||
print_nl();
|
||||
}
|
||||
|
||||
static int dcb_buffer_get(struct dcb *dcb, const char *dev, struct dcbnl_buffer *buffer)
|
||||
{
|
||||
return dcb_get_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer));
|
||||
}
|
||||
|
||||
static int dcb_buffer_set(struct dcb *dcb, const char *dev, const struct dcbnl_buffer *buffer)
|
||||
{
|
||||
return dcb_set_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer));
|
||||
}
|
||||
|
||||
static int dcb_cmd_buffer_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct dcbnl_buffer buffer;
|
||||
int ret;
|
||||
|
||||
if (!argc) {
|
||||
dcb_buffer_help_set();
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = dcb_buffer_get(dcb, dev, &buffer);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_buffer_help_set();
|
||||
return 0;
|
||||
} else if (matches(*argv, "prio-buffer") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true,
|
||||
&dcb_buffer_parse_mapping_prio_buffer, &buffer);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid priority mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else if (matches(*argv, "buffer-size") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true,
|
||||
&dcb_buffer_parse_mapping_buffer_size, &buffer);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid buffer size mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_buffer_help_set();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
return dcb_buffer_set(dcb, dev, &buffer);
|
||||
}
|
||||
|
||||
static int dcb_cmd_buffer_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct dcbnl_buffer buffer;
|
||||
int ret;
|
||||
|
||||
ret = dcb_buffer_get(dcb, dev, &buffer);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
open_json_object(NULL);
|
||||
|
||||
if (!argc) {
|
||||
dcb_buffer_print(&buffer);
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_buffer_help_show();
|
||||
return 0;
|
||||
} else if (matches(*argv, "prio-buffer") == 0) {
|
||||
dcb_buffer_print_prio_buffer(&buffer);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "buffer-size") == 0) {
|
||||
dcb_buffer_print_buffer_size(&buffer);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "total-size") == 0) {
|
||||
dcb_buffer_print_total_size(&buffer);
|
||||
print_nl();
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_buffer_help_show();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
out:
|
||||
close_json_object();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv)
|
||||
{
|
||||
if (!argc || matches(*argv, "help") == 0) {
|
||||
dcb_buffer_help();
|
||||
return 0;
|
||||
} else if (matches(*argv, "show") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_buffer_show, dcb_buffer_help_show);
|
||||
} else if (matches(*argv, "set") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_buffer_set, dcb_buffer_help_set);
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_buffer_help();
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
192
dcb/dcb_dcbx.c
192
dcb/dcb_dcbx.c
|
|
@ -1,192 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <linux/dcbnl.h>
|
||||
|
||||
#include "dcb.h"
|
||||
#include "utils.h"
|
||||
|
||||
static void dcb_dcbx_help_set(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb dcbx set dev STRING\n"
|
||||
" [ host | lld-managed ]\n"
|
||||
" [ cee | ieee ] [ static ]\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_dcbx_help_show(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb dcbx show dev STRING\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_dcbx_help(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb dcbx help\n"
|
||||
"\n"
|
||||
);
|
||||
dcb_dcbx_help_show();
|
||||
dcb_dcbx_help_set();
|
||||
}
|
||||
|
||||
struct dcb_dcbx_flag {
|
||||
__u8 value;
|
||||
const char *key_fp;
|
||||
const char *key_json;
|
||||
};
|
||||
|
||||
static struct dcb_dcbx_flag dcb_dcbx_flags[] = {
|
||||
{DCB_CAP_DCBX_HOST, "host"},
|
||||
{DCB_CAP_DCBX_LLD_MANAGED, "lld-managed", "lld_managed"},
|
||||
{DCB_CAP_DCBX_VER_CEE, "cee"},
|
||||
{DCB_CAP_DCBX_VER_IEEE, "ieee"},
|
||||
{DCB_CAP_DCBX_STATIC, "static"},
|
||||
};
|
||||
|
||||
static void dcb_dcbx_print(__u8 dcbx)
|
||||
{
|
||||
int bit;
|
||||
int i;
|
||||
|
||||
while ((bit = ffs(dcbx))) {
|
||||
bool found = false;
|
||||
|
||||
bit--;
|
||||
for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) {
|
||||
struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i];
|
||||
|
||||
if (flag->value == 1 << bit) {
|
||||
print_bool(PRINT_JSON, flag->key_json ?: flag->key_fp,
|
||||
NULL, true);
|
||||
print_string(PRINT_FP, NULL, "%s ", flag->key_fp);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
fprintf(stderr, "Unknown DCBX bit %#x.\n", 1 << bit);
|
||||
|
||||
dcbx &= ~(1 << bit);
|
||||
}
|
||||
|
||||
print_nl();
|
||||
}
|
||||
|
||||
static int dcb_dcbx_get(struct dcb *dcb, const char *dev, __u8 *dcbx)
|
||||
{
|
||||
__u16 payload_len;
|
||||
void *payload;
|
||||
int err;
|
||||
|
||||
err = dcb_get_attribute_bare(dcb, DCB_CMD_IEEE_GET, dev, DCB_ATTR_DCBX,
|
||||
&payload, &payload_len);
|
||||
if (err != 0)
|
||||
return err;
|
||||
|
||||
if (payload_len != 1) {
|
||||
fprintf(stderr, "DCB_ATTR_DCBX payload has size %d, expected 1.\n",
|
||||
payload_len);
|
||||
return -EINVAL;
|
||||
}
|
||||
*dcbx = *(__u8 *) payload;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dcb_dcbx_set(struct dcb *dcb, const char *dev, __u8 dcbx)
|
||||
{
|
||||
return dcb_set_attribute_bare(dcb, DCB_CMD_SDCBX, dev, DCB_ATTR_DCBX,
|
||||
&dcbx, 1, DCB_ATTR_DCBX);
|
||||
}
|
||||
|
||||
static int dcb_cmd_dcbx_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
__u8 dcbx = 0;
|
||||
__u8 i;
|
||||
|
||||
if (!argc) {
|
||||
dcb_dcbx_help_set();
|
||||
return 0;
|
||||
}
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_dcbx_help_set();
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) {
|
||||
struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i];
|
||||
|
||||
if (matches(*argv, flag->key_fp) == 0) {
|
||||
dcbx |= flag->value;
|
||||
NEXT_ARG_FWD();
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_dcbx_help_set();
|
||||
return -EINVAL;
|
||||
|
||||
next:
|
||||
;
|
||||
} while (argc > 0);
|
||||
|
||||
return dcb_dcbx_set(dcb, dev, dcbx);
|
||||
}
|
||||
|
||||
static int dcb_cmd_dcbx_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
__u8 dcbx;
|
||||
int ret;
|
||||
|
||||
ret = dcb_dcbx_get(dcb, dev, &dcbx);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
while (argc > 0) {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_dcbx_help_show();
|
||||
return 0;
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_dcbx_help_show();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
}
|
||||
|
||||
open_json_object(NULL);
|
||||
dcb_dcbx_print(dcbx);
|
||||
close_json_object();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv)
|
||||
{
|
||||
if (!argc || matches(*argv, "help") == 0) {
|
||||
dcb_dcbx_help();
|
||||
return 0;
|
||||
} else if (matches(*argv, "show") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_dcbx_show, dcb_dcbx_help_show);
|
||||
} else if (matches(*argv, "set") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_dcbx_set, dcb_dcbx_help_set);
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_dcbx_help();
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
435
dcb/dcb_ets.c
435
dcb/dcb_ets.c
|
|
@ -1,435 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <linux/dcbnl.h>
|
||||
|
||||
#include "dcb.h"
|
||||
#include "utils.h"
|
||||
|
||||
static void dcb_ets_help_set(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb ets set dev STRING\n"
|
||||
" [ willing { on | off } ]\n"
|
||||
" [ { tc-tsa | reco-tc-tsa } TSA-MAP ]\n"
|
||||
" [ { pg-bw | tc-bw | reco-tc-bw } BW-MAP ]\n"
|
||||
" [ { prio-tc | reco-prio-tc } PRIO-MAP ]\n"
|
||||
"\n"
|
||||
" where TSA-MAP := [ TSA-MAP ] TSA-MAPPING\n"
|
||||
" TSA-MAPPING := { all | TC }:{ strict | cbs | ets | vendor }\n"
|
||||
" BW-MAP := [ BW-MAP ] BW-MAPPING\n"
|
||||
" BW-MAPPING := { all | TC }:INTEGER\n"
|
||||
" PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n"
|
||||
" PRIO-MAPPING := { all | PRIO }:TC\n"
|
||||
" TC := { 0 .. 7 }\n"
|
||||
" PRIO := { 0 .. 7 }\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_ets_help_show(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb ets show dev STRING\n"
|
||||
" [ willing ] [ ets-cap ] [ cbs ] [ tc-tsa ]\n"
|
||||
" [ reco-tc-tsa ] [ pg-bw ] [ tc-bw ] [ reco-tc-bw ]\n"
|
||||
" [ prio-tc ] [ reco-prio-tc ]\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_ets_help(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb ets help\n"
|
||||
"\n"
|
||||
);
|
||||
dcb_ets_help_show();
|
||||
dcb_ets_help_set();
|
||||
}
|
||||
|
||||
static const char *const tsa_names[] = {
|
||||
[IEEE_8021QAZ_TSA_STRICT] = "strict",
|
||||
[IEEE_8021QAZ_TSA_CB_SHAPER] = "cbs",
|
||||
[IEEE_8021QAZ_TSA_ETS] = "ets",
|
||||
[IEEE_8021QAZ_TSA_VENDOR] = "vendor",
|
||||
};
|
||||
|
||||
static int dcb_ets_parse_mapping_tc_tsa(__u32 key, char *value, void *data)
|
||||
{
|
||||
__u8 tsa;
|
||||
int ret;
|
||||
|
||||
tsa = parse_one_of("TSA", value, tsa_names, ARRAY_SIZE(tsa_names), &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
|
||||
"TSA", tsa, -1U,
|
||||
dcb_set_u8, data);
|
||||
}
|
||||
|
||||
static int dcb_ets_parse_mapping_tc_bw(__u32 key, char *value, void *data)
|
||||
{
|
||||
__u8 bw;
|
||||
|
||||
if (get_u8(&bw, value, 0))
|
||||
return -EINVAL;
|
||||
|
||||
return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
|
||||
"BW", bw, 100,
|
||||
dcb_set_u8, data);
|
||||
}
|
||||
|
||||
static int dcb_ets_parse_mapping_prio_tc(unsigned int key, char *value, void *data)
|
||||
{
|
||||
__u8 tc;
|
||||
|
||||
if (get_u8(&tc, value, 0))
|
||||
return -EINVAL;
|
||||
|
||||
return dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1,
|
||||
"TC", tc, IEEE_8021QAZ_MAX_TCS - 1,
|
||||
dcb_set_u8, data);
|
||||
}
|
||||
|
||||
static void dcb_print_array_tsa(const __u8 *array, size_t size)
|
||||
{
|
||||
dcb_print_array_kw(array, size, tsa_names, ARRAY_SIZE(tsa_names));
|
||||
}
|
||||
|
||||
static void dcb_ets_print_willing(const struct ieee_ets *ets)
|
||||
{
|
||||
print_on_off(PRINT_ANY, "willing", "willing %s ", ets->willing);
|
||||
}
|
||||
|
||||
static void dcb_ets_print_ets_cap(const struct ieee_ets *ets)
|
||||
{
|
||||
print_uint(PRINT_ANY, "ets_cap", "ets-cap %d ", ets->ets_cap);
|
||||
}
|
||||
|
||||
static void dcb_ets_print_cbs(const struct ieee_ets *ets)
|
||||
{
|
||||
print_on_off(PRINT_ANY, "cbs", "cbs %s ", ets->cbs);
|
||||
}
|
||||
|
||||
static void dcb_ets_print_tc_bw(const struct ieee_ets *ets)
|
||||
{
|
||||
dcb_print_named_array("tc_bw", "tc-bw",
|
||||
ets->tc_tx_bw, ARRAY_SIZE(ets->tc_tx_bw),
|
||||
dcb_print_array_u8);
|
||||
}
|
||||
|
||||
static void dcb_ets_print_pg_bw(const struct ieee_ets *ets)
|
||||
{
|
||||
dcb_print_named_array("pg_bw", "pg-bw",
|
||||
ets->tc_rx_bw, ARRAY_SIZE(ets->tc_rx_bw),
|
||||
dcb_print_array_u8);
|
||||
}
|
||||
|
||||
static void dcb_ets_print_tc_tsa(const struct ieee_ets *ets)
|
||||
{
|
||||
dcb_print_named_array("tc_tsa", "tc-tsa",
|
||||
ets->tc_tsa, ARRAY_SIZE(ets->tc_tsa),
|
||||
dcb_print_array_tsa);
|
||||
}
|
||||
|
||||
static void dcb_ets_print_prio_tc(const struct ieee_ets *ets)
|
||||
{
|
||||
dcb_print_named_array("prio_tc", "prio-tc",
|
||||
ets->prio_tc, ARRAY_SIZE(ets->prio_tc),
|
||||
dcb_print_array_u8);
|
||||
}
|
||||
|
||||
static void dcb_ets_print_reco_tc_bw(const struct ieee_ets *ets)
|
||||
{
|
||||
dcb_print_named_array("reco_tc_bw", "reco-tc-bw",
|
||||
ets->tc_reco_bw, ARRAY_SIZE(ets->tc_reco_bw),
|
||||
dcb_print_array_u8);
|
||||
}
|
||||
|
||||
static void dcb_ets_print_reco_tc_tsa(const struct ieee_ets *ets)
|
||||
{
|
||||
dcb_print_named_array("reco_tc_tsa", "reco-tc-tsa",
|
||||
ets->tc_reco_tsa, ARRAY_SIZE(ets->tc_reco_tsa),
|
||||
dcb_print_array_tsa);
|
||||
}
|
||||
|
||||
static void dcb_ets_print_reco_prio_tc(const struct ieee_ets *ets)
|
||||
{
|
||||
dcb_print_named_array("reco_prio_tc", "reco-prio-tc",
|
||||
ets->reco_prio_tc, ARRAY_SIZE(ets->reco_prio_tc),
|
||||
dcb_print_array_u8);
|
||||
}
|
||||
|
||||
static void dcb_ets_print(const struct ieee_ets *ets)
|
||||
{
|
||||
dcb_ets_print_willing(ets);
|
||||
dcb_ets_print_ets_cap(ets);
|
||||
dcb_ets_print_cbs(ets);
|
||||
print_nl();
|
||||
|
||||
dcb_ets_print_tc_bw(ets);
|
||||
print_nl();
|
||||
|
||||
dcb_ets_print_pg_bw(ets);
|
||||
print_nl();
|
||||
|
||||
dcb_ets_print_tc_tsa(ets);
|
||||
print_nl();
|
||||
|
||||
dcb_ets_print_prio_tc(ets);
|
||||
print_nl();
|
||||
|
||||
dcb_ets_print_reco_tc_bw(ets);
|
||||
print_nl();
|
||||
|
||||
dcb_ets_print_reco_tc_tsa(ets);
|
||||
print_nl();
|
||||
|
||||
dcb_ets_print_reco_prio_tc(ets);
|
||||
print_nl();
|
||||
}
|
||||
|
||||
static int dcb_ets_get(struct dcb *dcb, const char *dev, struct ieee_ets *ets)
|
||||
{
|
||||
return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets));
|
||||
}
|
||||
|
||||
static int dcb_ets_validate_bw(const __u8 bw[], const __u8 tsa[], const char *what)
|
||||
{
|
||||
bool has_ets = false;
|
||||
unsigned int total = 0;
|
||||
unsigned int tc;
|
||||
|
||||
for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) {
|
||||
if (tsa[tc] == IEEE_8021QAZ_TSA_ETS) {
|
||||
has_ets = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* TC bandwidth is only intended for ETS, but 802.1Q-2018 only requires
|
||||
* that the sum be 100, and individual entries 0..100. It explicitly
|
||||
* notes that non-ETS TCs can have non-0 TC bandwidth during
|
||||
* reconfiguration.
|
||||
*/
|
||||
for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) {
|
||||
if (bw[tc] > 100) {
|
||||
fprintf(stderr, "%d%% for TC %d of %s is not a valid bandwidth percentage, expected 0..100%%\n",
|
||||
bw[tc], tc, what);
|
||||
return -EINVAL;
|
||||
}
|
||||
total += bw[tc];
|
||||
}
|
||||
|
||||
/* This is what 802.1Q-2018 requires. */
|
||||
if (total == 100)
|
||||
return 0;
|
||||
|
||||
/* But this requirement does not make sense for all-strict
|
||||
* configurations. Anything else than 0 does not make sense: either BW
|
||||
* has not been reconfigured for the all-strict allocation yet, at which
|
||||
* point we expect sum of 100. Or it has already been reconfigured, at
|
||||
* which point accept 0.
|
||||
*/
|
||||
if (!has_ets && total == 0)
|
||||
return 0;
|
||||
|
||||
fprintf(stderr, "Bandwidth percentages in %s sum to %d%%, expected %d%%\n",
|
||||
what, total, has_ets ? 100 : 0);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int dcb_ets_set(struct dcb *dcb, const char *dev, const struct ieee_ets *ets)
|
||||
{
|
||||
/* Do not validate pg-bw, which is not standard and has unclear
|
||||
* meaning.
|
||||
*/
|
||||
if (dcb_ets_validate_bw(ets->tc_tx_bw, ets->tc_tsa, "tc-bw") ||
|
||||
dcb_ets_validate_bw(ets->tc_reco_bw, ets->tc_reco_tsa, "reco-tc-bw"))
|
||||
return -EINVAL;
|
||||
|
||||
return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets));
|
||||
}
|
||||
|
||||
static int dcb_cmd_ets_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct ieee_ets ets;
|
||||
int ret;
|
||||
|
||||
if (!argc) {
|
||||
dcb_ets_help_set();
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = dcb_ets_get(dcb, dev, &ets);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_ets_help_set();
|
||||
return 0;
|
||||
} else if (matches(*argv, "willing") == 0) {
|
||||
NEXT_ARG();
|
||||
ets.willing = parse_on_off("willing", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else if (matches(*argv, "tc-tsa") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa,
|
||||
ets.tc_tsa);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid tc-tsa mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else if (matches(*argv, "reco-tc-tsa") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa,
|
||||
ets.tc_reco_tsa);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid reco-tc-tsa mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else if (matches(*argv, "tc-bw") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
|
||||
ets.tc_tx_bw);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid tc-bw mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else if (matches(*argv, "pg-bw") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
|
||||
ets.tc_rx_bw);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid pg-bw mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else if (matches(*argv, "reco-tc-bw") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
|
||||
ets.tc_reco_bw);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid reco-tc-bw mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else if (matches(*argv, "prio-tc") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc,
|
||||
ets.prio_tc);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid prio-tc mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else if (matches(*argv, "reco-prio-tc") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc,
|
||||
ets.reco_prio_tc);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid reco-prio-tc mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_ets_help_set();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
return dcb_ets_set(dcb, dev, &ets);
|
||||
}
|
||||
|
||||
static int dcb_cmd_ets_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct ieee_ets ets;
|
||||
int ret;
|
||||
|
||||
ret = dcb_ets_get(dcb, dev, &ets);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
open_json_object(NULL);
|
||||
|
||||
if (!argc) {
|
||||
dcb_ets_print(&ets);
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_ets_help_show();
|
||||
return 0;
|
||||
} else if (matches(*argv, "willing") == 0) {
|
||||
dcb_ets_print_willing(&ets);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "ets-cap") == 0) {
|
||||
dcb_ets_print_ets_cap(&ets);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "cbs") == 0) {
|
||||
dcb_ets_print_cbs(&ets);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "tc-tsa") == 0) {
|
||||
dcb_ets_print_tc_tsa(&ets);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "reco-tc-tsa") == 0) {
|
||||
dcb_ets_print_reco_tc_tsa(&ets);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "tc-bw") == 0) {
|
||||
dcb_ets_print_tc_bw(&ets);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "pg-bw") == 0) {
|
||||
dcb_ets_print_pg_bw(&ets);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "reco-tc-bw") == 0) {
|
||||
dcb_ets_print_reco_tc_bw(&ets);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "prio-tc") == 0) {
|
||||
dcb_ets_print_prio_tc(&ets);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "reco-prio-tc") == 0) {
|
||||
dcb_ets_print_reco_prio_tc(&ets);
|
||||
print_nl();
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_ets_help_show();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
out:
|
||||
close_json_object();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv)
|
||||
{
|
||||
if (!argc || matches(*argv, "help") == 0) {
|
||||
dcb_ets_help();
|
||||
return 0;
|
||||
} else if (matches(*argv, "show") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_show, dcb_ets_help_show);
|
||||
} else if (matches(*argv, "set") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_set, dcb_ets_help_set);
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_ets_help();
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,182 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <linux/dcbnl.h>
|
||||
|
||||
#include "dcb.h"
|
||||
#include "utils.h"
|
||||
|
||||
static void dcb_maxrate_help_set(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb maxrate set dev STRING\n"
|
||||
" [ tc-maxrate RATE-MAP ]\n"
|
||||
"\n"
|
||||
" where RATE-MAP := [ RATE-MAP ] RATE-MAPPING\n"
|
||||
" RATE-MAPPING := { all | TC }:RATE\n"
|
||||
" TC := { 0 .. 7 }\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_maxrate_help_show(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb [ -i ] maxrate show dev STRING\n"
|
||||
" [ tc-maxrate ]\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_maxrate_help(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb maxrate help\n"
|
||||
"\n"
|
||||
);
|
||||
dcb_maxrate_help_show();
|
||||
dcb_maxrate_help_set();
|
||||
}
|
||||
|
||||
static int dcb_maxrate_parse_mapping_tc_maxrate(__u32 key, char *value, void *data)
|
||||
{
|
||||
__u64 rate;
|
||||
|
||||
if (get_rate64(&rate, value))
|
||||
return -EINVAL;
|
||||
|
||||
return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
|
||||
"RATE", rate, -1,
|
||||
dcb_set_u64, data);
|
||||
}
|
||||
|
||||
static void dcb_maxrate_print_tc_maxrate(struct dcb *dcb, const struct ieee_maxrate *maxrate)
|
||||
{
|
||||
size_t size = ARRAY_SIZE(maxrate->tc_maxrate);
|
||||
SPRINT_BUF(b);
|
||||
size_t i;
|
||||
|
||||
open_json_array(PRINT_JSON, "tc_maxrate");
|
||||
print_string(PRINT_FP, NULL, "tc-maxrate ", NULL);
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
snprintf(b, sizeof(b), "%zd:%%s ", i);
|
||||
print_rate(dcb->use_iec, PRINT_ANY, NULL, b, maxrate->tc_maxrate[i]);
|
||||
}
|
||||
|
||||
close_json_array(PRINT_JSON, "tc_maxrate");
|
||||
}
|
||||
|
||||
static void dcb_maxrate_print(struct dcb *dcb, const struct ieee_maxrate *maxrate)
|
||||
{
|
||||
dcb_maxrate_print_tc_maxrate(dcb, maxrate);
|
||||
print_nl();
|
||||
}
|
||||
|
||||
static int dcb_maxrate_get(struct dcb *dcb, const char *dev, struct ieee_maxrate *maxrate)
|
||||
{
|
||||
return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate));
|
||||
}
|
||||
|
||||
static int dcb_maxrate_set(struct dcb *dcb, const char *dev, const struct ieee_maxrate *maxrate)
|
||||
{
|
||||
return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate));
|
||||
}
|
||||
|
||||
static int dcb_cmd_maxrate_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct ieee_maxrate maxrate;
|
||||
int ret;
|
||||
|
||||
if (!argc) {
|
||||
dcb_maxrate_help_set();
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = dcb_maxrate_get(dcb, dev, &maxrate);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_maxrate_help_set();
|
||||
return 0;
|
||||
} else if (matches(*argv, "tc-maxrate") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true,
|
||||
&dcb_maxrate_parse_mapping_tc_maxrate, &maxrate);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_maxrate_help_set();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
return dcb_maxrate_set(dcb, dev, &maxrate);
|
||||
}
|
||||
|
||||
static int dcb_cmd_maxrate_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct ieee_maxrate maxrate;
|
||||
int ret;
|
||||
|
||||
ret = dcb_maxrate_get(dcb, dev, &maxrate);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
open_json_object(NULL);
|
||||
|
||||
if (!argc) {
|
||||
dcb_maxrate_print(dcb, &maxrate);
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_maxrate_help_show();
|
||||
return 0;
|
||||
} else if (matches(*argv, "tc-maxrate") == 0) {
|
||||
dcb_maxrate_print_tc_maxrate(dcb, &maxrate);
|
||||
print_nl();
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_maxrate_help_show();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
out:
|
||||
close_json_object();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv)
|
||||
{
|
||||
if (!argc || matches(*argv, "help") == 0) {
|
||||
dcb_maxrate_help();
|
||||
return 0;
|
||||
} else if (matches(*argv, "show") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_maxrate_show, dcb_maxrate_help_show);
|
||||
} else if (matches(*argv, "set") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_maxrate_set, dcb_maxrate_help_set);
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_maxrate_help();
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
286
dcb/dcb_pfc.c
286
dcb/dcb_pfc.c
|
|
@ -1,286 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <linux/dcbnl.h>
|
||||
|
||||
#include "dcb.h"
|
||||
#include "utils.h"
|
||||
|
||||
static void dcb_pfc_help_set(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb pfc set dev STRING\n"
|
||||
" [ prio-pfc PFC-MAP ]\n"
|
||||
" [ macsec-bypass { on | off } ]\n"
|
||||
" [ delay INTEGER ]\n"
|
||||
"\n"
|
||||
" where PFC-MAP := [ PFC-MAP ] PFC-MAPPING\n"
|
||||
" PFC-MAPPING := { all | TC }:PFC\n"
|
||||
" TC := { 0 .. 7 }\n"
|
||||
" PFC := { on | off }\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_pfc_help_show(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb [ -s ] pfc show dev STRING\n"
|
||||
" [ pfc-cap ] [ prio-pfc ] [ macsec-bypass ]\n"
|
||||
" [ delay ] [ requests ] [ indications ]\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
static void dcb_pfc_help(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: dcb pfc help\n"
|
||||
"\n"
|
||||
);
|
||||
dcb_pfc_help_show();
|
||||
dcb_pfc_help_set();
|
||||
}
|
||||
|
||||
static void dcb_pfc_to_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 pfc_en)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
|
||||
array[i] = !!(pfc_en & (1 << i));
|
||||
}
|
||||
|
||||
static void dcb_pfc_from_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 *pfc_en_p)
|
||||
{
|
||||
__u8 pfc_en = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
|
||||
if (array[i])
|
||||
pfc_en |= 1 << i;
|
||||
}
|
||||
|
||||
*pfc_en_p = pfc_en;
|
||||
}
|
||||
|
||||
static int dcb_pfc_parse_mapping_prio_pfc(__u32 key, char *value, void *data)
|
||||
{
|
||||
struct ieee_pfc *pfc = data;
|
||||
__u8 pfc_en[IEEE_8021QAZ_MAX_TCS];
|
||||
bool enabled;
|
||||
int ret;
|
||||
|
||||
dcb_pfc_to_array(pfc_en, pfc->pfc_en);
|
||||
|
||||
enabled = parse_on_off("PFC", value, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1,
|
||||
"PFC", enabled, -1,
|
||||
dcb_set_u8, pfc_en);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
dcb_pfc_from_array(pfc_en, &pfc->pfc_en);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dcb_pfc_print_pfc_cap(const struct ieee_pfc *pfc)
|
||||
{
|
||||
print_uint(PRINT_ANY, "pfc_cap", "pfc-cap %d ", pfc->pfc_cap);
|
||||
}
|
||||
|
||||
static void dcb_pfc_print_macsec_bypass(const struct ieee_pfc *pfc)
|
||||
{
|
||||
print_on_off(PRINT_ANY, "macsec_bypass", "macsec-bypass %s ", pfc->mbc);
|
||||
}
|
||||
|
||||
static void dcb_pfc_print_delay(const struct ieee_pfc *pfc)
|
||||
{
|
||||
print_uint(PRINT_ANY, "delay", "delay %d ", pfc->delay);
|
||||
}
|
||||
|
||||
static void dcb_pfc_print_prio_pfc(const struct ieee_pfc *pfc)
|
||||
{
|
||||
__u8 pfc_en[IEEE_8021QAZ_MAX_TCS];
|
||||
|
||||
dcb_pfc_to_array(pfc_en, pfc->pfc_en);
|
||||
dcb_print_named_array("prio_pfc", "prio-pfc",
|
||||
pfc_en, ARRAY_SIZE(pfc_en), &dcb_print_array_on_off);
|
||||
}
|
||||
|
||||
static void dcb_pfc_print_requests(const struct ieee_pfc *pfc)
|
||||
{
|
||||
open_json_array(PRINT_JSON, "requests");
|
||||
print_string(PRINT_FP, NULL, "requests ", NULL);
|
||||
dcb_print_array_u64(pfc->requests, ARRAY_SIZE(pfc->requests));
|
||||
close_json_array(PRINT_JSON, "requests");
|
||||
}
|
||||
|
||||
static void dcb_pfc_print_indications(const struct ieee_pfc *pfc)
|
||||
{
|
||||
open_json_array(PRINT_JSON, "indications");
|
||||
print_string(PRINT_FP, NULL, "indications ", NULL);
|
||||
dcb_print_array_u64(pfc->indications, ARRAY_SIZE(pfc->indications));
|
||||
close_json_array(PRINT_JSON, "indications");
|
||||
}
|
||||
|
||||
static void dcb_pfc_print(const struct dcb *dcb, const struct ieee_pfc *pfc)
|
||||
{
|
||||
dcb_pfc_print_pfc_cap(pfc);
|
||||
dcb_pfc_print_macsec_bypass(pfc);
|
||||
dcb_pfc_print_delay(pfc);
|
||||
print_nl();
|
||||
|
||||
dcb_pfc_print_prio_pfc(pfc);
|
||||
print_nl();
|
||||
|
||||
if (dcb->stats) {
|
||||
dcb_pfc_print_requests(pfc);
|
||||
print_nl();
|
||||
|
||||
dcb_pfc_print_indications(pfc);
|
||||
print_nl();
|
||||
}
|
||||
}
|
||||
|
||||
static int dcb_pfc_get(struct dcb *dcb, const char *dev, struct ieee_pfc *pfc)
|
||||
{
|
||||
return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc));
|
||||
}
|
||||
|
||||
static int dcb_pfc_set(struct dcb *dcb, const char *dev, const struct ieee_pfc *pfc)
|
||||
{
|
||||
return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc));
|
||||
}
|
||||
|
||||
static int dcb_cmd_pfc_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct ieee_pfc pfc;
|
||||
int ret;
|
||||
|
||||
if (!argc) {
|
||||
dcb_pfc_help_set();
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = dcb_pfc_get(dcb, dev, &pfc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_pfc_help_set();
|
||||
return 0;
|
||||
} else if (matches(*argv, "prio-pfc") == 0) {
|
||||
NEXT_ARG();
|
||||
ret = parse_mapping(&argc, &argv, true,
|
||||
&dcb_pfc_parse_mapping_prio_pfc, &pfc);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Invalid pfc mapping %s\n", *argv);
|
||||
return ret;
|
||||
}
|
||||
continue;
|
||||
} else if (matches(*argv, "macsec-bypass") == 0) {
|
||||
NEXT_ARG();
|
||||
pfc.mbc = parse_on_off("macsec-bypass", *argv, &ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else if (matches(*argv, "delay") == 0) {
|
||||
NEXT_ARG();
|
||||
/* Do not support the size notations for delay.
|
||||
* Delay is specified in "bit times", not bits, so
|
||||
* it is not applicable. At the same time it would
|
||||
* be confusing that 10Kbit does not mean 10240,
|
||||
* but 1280.
|
||||
*/
|
||||
if (get_u16(&pfc.delay, *argv, 0)) {
|
||||
fprintf(stderr, "Invalid delay `%s', expected an integer 0..65535\n",
|
||||
*argv);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_pfc_help_set();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
return dcb_pfc_set(dcb, dev, &pfc);
|
||||
}
|
||||
|
||||
static int dcb_cmd_pfc_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
||||
{
|
||||
struct ieee_pfc pfc;
|
||||
int ret;
|
||||
|
||||
ret = dcb_pfc_get(dcb, dev, &pfc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
open_json_object(NULL);
|
||||
|
||||
if (!argc) {
|
||||
dcb_pfc_print(dcb, &pfc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
if (matches(*argv, "help") == 0) {
|
||||
dcb_pfc_help_show();
|
||||
return 0;
|
||||
} else if (matches(*argv, "prio-pfc") == 0) {
|
||||
dcb_pfc_print_prio_pfc(&pfc);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "pfc-cap") == 0) {
|
||||
dcb_pfc_print_pfc_cap(&pfc);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "macsec-bypass") == 0) {
|
||||
dcb_pfc_print_macsec_bypass(&pfc);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "delay") == 0) {
|
||||
dcb_pfc_print_delay(&pfc);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "requests") == 0) {
|
||||
dcb_pfc_print_requests(&pfc);
|
||||
print_nl();
|
||||
} else if (matches(*argv, "indications") == 0) {
|
||||
dcb_pfc_print_indications(&pfc);
|
||||
print_nl();
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_pfc_help_show();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
NEXT_ARG_FWD();
|
||||
} while (argc > 0);
|
||||
|
||||
out:
|
||||
close_json_object();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv)
|
||||
{
|
||||
if (!argc || matches(*argv, "help") == 0) {
|
||||
dcb_pfc_help();
|
||||
return 0;
|
||||
} else if (matches(*argv, "show") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_pfc_show, dcb_pfc_help_show);
|
||||
} else if (matches(*argv, "set") == 0) {
|
||||
NEXT_ARG_FWD();
|
||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
||||
dcb_cmd_pfc_set, dcb_pfc_help_set);
|
||||
} else {
|
||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
||||
dcb_pfc_help();
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
devlink
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
include ../config.mk
|
||||
|
||||
TARGETS :=
|
||||
|
||||
ifeq ($(HAVE_MNL),y)
|
||||
|
||||
DEVLINKOBJ = devlink.o mnlg.o
|
||||
TARGETS += devlink
|
||||
LDLIBS += -lm
|
||||
|
||||
endif
|
||||
|
||||
all: $(TARGETS) $(LIBS)
|
||||
|
||||
devlink: $(DEVLINKOBJ) $(LIBNETLINK)
|
||||
$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
|
||||
|
||||
install: all
|
||||
for i in $(TARGETS); \
|
||||
do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \
|
||||
done
|
||||
|
||||
clean:
|
||||
rm -f $(DEVLINKOBJ) $(TARGETS)
|
||||
9189
devlink/devlink.c
9189
devlink/devlink.c
File diff suppressed because it is too large
Load Diff
155
devlink/mnlg.c
155
devlink/mnlg.c
|
|
@ -1,155 +0,0 @@
|
|||
/*
|
||||
* mnlg.c Generic Netlink helpers for libmnl
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Authors: Jiri Pirko <jiri@mellanox.com>
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <libmnl/libmnl.h>
|
||||
#include <linux/genetlink.h>
|
||||
|
||||
#include "libnetlink.h"
|
||||
#include "mnl_utils.h"
|
||||
#include "utils.h"
|
||||
#include "mnlg.h"
|
||||
|
||||
struct mnlg_socket {
|
||||
struct mnl_socket *nl;
|
||||
char *buf;
|
||||
uint32_t id;
|
||||
uint8_t version;
|
||||
unsigned int seq;
|
||||
};
|
||||
|
||||
int mnlg_socket_send(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh)
|
||||
{
|
||||
return mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len);
|
||||
}
|
||||
|
||||
struct group_info {
|
||||
bool found;
|
||||
uint32_t id;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
static int parse_mc_grps_cb(const struct nlattr *attr, void *data)
|
||||
{
|
||||
const struct nlattr **tb = data;
|
||||
int type = mnl_attr_get_type(attr);
|
||||
|
||||
if (mnl_attr_type_valid(attr, CTRL_ATTR_MCAST_GRP_MAX) < 0)
|
||||
return MNL_CB_OK;
|
||||
|
||||
switch (type) {
|
||||
case CTRL_ATTR_MCAST_GRP_ID:
|
||||
if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
|
||||
return MNL_CB_ERROR;
|
||||
break;
|
||||
case CTRL_ATTR_MCAST_GRP_NAME:
|
||||
if (mnl_attr_validate(attr, MNL_TYPE_STRING) < 0)
|
||||
return MNL_CB_ERROR;
|
||||
break;
|
||||
}
|
||||
tb[type] = attr;
|
||||
return MNL_CB_OK;
|
||||
}
|
||||
|
||||
static void parse_genl_mc_grps(struct nlattr *nested,
|
||||
struct group_info *group_info)
|
||||
{
|
||||
struct nlattr *pos;
|
||||
const char *name;
|
||||
|
||||
mnl_attr_for_each_nested(pos, nested) {
|
||||
struct nlattr *tb[CTRL_ATTR_MCAST_GRP_MAX + 1] = {};
|
||||
|
||||
mnl_attr_parse_nested(pos, parse_mc_grps_cb, tb);
|
||||
if (!tb[CTRL_ATTR_MCAST_GRP_NAME] ||
|
||||
!tb[CTRL_ATTR_MCAST_GRP_ID])
|
||||
continue;
|
||||
|
||||
name = mnl_attr_get_str(tb[CTRL_ATTR_MCAST_GRP_NAME]);
|
||||
if (strcmp(name, group_info->name) != 0)
|
||||
continue;
|
||||
|
||||
group_info->id = mnl_attr_get_u32(tb[CTRL_ATTR_MCAST_GRP_ID]);
|
||||
group_info->found = true;
|
||||
}
|
||||
}
|
||||
|
||||
static int get_group_id_attr_cb(const struct nlattr *attr, void *data)
|
||||
{
|
||||
const struct nlattr **tb = data;
|
||||
int type = mnl_attr_get_type(attr);
|
||||
|
||||
if (mnl_attr_type_valid(attr, CTRL_ATTR_MAX) < 0)
|
||||
return MNL_CB_ERROR;
|
||||
|
||||
if (type == CTRL_ATTR_MCAST_GROUPS &&
|
||||
mnl_attr_validate(attr, MNL_TYPE_NESTED) < 0)
|
||||
return MNL_CB_ERROR;
|
||||
tb[type] = attr;
|
||||
return MNL_CB_OK;
|
||||
}
|
||||
|
||||
static int get_group_id_cb(const struct nlmsghdr *nlh, void *data)
|
||||
{
|
||||
struct group_info *group_info = data;
|
||||
struct nlattr *tb[CTRL_ATTR_MAX + 1] = {};
|
||||
struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
|
||||
|
||||
mnl_attr_parse(nlh, sizeof(*genl), get_group_id_attr_cb, tb);
|
||||
if (!tb[CTRL_ATTR_MCAST_GROUPS])
|
||||
return MNL_CB_ERROR;
|
||||
parse_genl_mc_grps(tb[CTRL_ATTR_MCAST_GROUPS], group_info);
|
||||
return MNL_CB_OK;
|
||||
}
|
||||
|
||||
int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name)
|
||||
{
|
||||
struct nlmsghdr *nlh;
|
||||
struct group_info group_info;
|
||||
int err;
|
||||
|
||||
nlh = _mnlu_gen_socket_cmd_prepare(nlg, CTRL_CMD_GETFAMILY,
|
||||
NLM_F_REQUEST | NLM_F_ACK,
|
||||
GENL_ID_CTRL, 1);
|
||||
|
||||
mnl_attr_put_u16(nlh, CTRL_ATTR_FAMILY_ID, nlg->family);
|
||||
|
||||
err = mnlg_socket_send(nlg, nlh);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
group_info.found = false;
|
||||
group_info.name = group_name;
|
||||
err = mnlu_gen_socket_recv_run(nlg, get_group_id_cb, &group_info);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
if (!group_info.found) {
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = mnl_socket_setsockopt(nlg->nl, NETLINK_ADD_MEMBERSHIP,
|
||||
&group_info.id, sizeof(group_info.id));
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mnlg_socket_get_fd(struct mnlu_gen_socket *nlg)
|
||||
{
|
||||
return mnl_socket_get_fd(nlg->nl);
|
||||
}
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
/*
|
||||
* mnlg.h Generic Netlink helpers for libmnl
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Authors: Jiri Pirko <jiri@mellanox.com>
|
||||
*/
|
||||
|
||||
#ifndef _MNLG_H_
|
||||
#define _MNLG_H_
|
||||
|
||||
#include <libmnl/libmnl.h>
|
||||
|
||||
struct mnlu_gen_socket;
|
||||
|
||||
int mnlg_socket_send(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh);
|
||||
int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name);
|
||||
int mnlg_socket_get_fd(struct mnlu_gen_socket *nlg);
|
||||
|
||||
#endif /* _MNLG_H_ */
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
PSFILES=ip-cref.ps ip-tunnels.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps
|
||||
# tc-cref.ps
|
||||
# api-rtnl.tex api-pmtudisc.tex api-news.tex
|
||||
# iki-netdev.ps iki-neighdst.ps
|
||||
|
||||
|
||||
LATEX=latex
|
||||
DVIPS=dvips
|
||||
SGML2DVI=sgml2latex
|
||||
SGML2HTML=sgml2html -s 0
|
||||
LPR=lpr -Zsduplex
|
||||
SHELL=bash
|
||||
PAGESIZE=a4
|
||||
PAGESPERPAGE=2
|
||||
|
||||
HTMLFILES=$(subst .sgml,.html,$(shell echo *.sgml))
|
||||
DVIFILES=$(subst .ps,.dvi,$(PSFILES))
|
||||
PDFFILES=$(subst .ps,.pdf,$(PSFILES))
|
||||
|
||||
|
||||
all: pstwocol
|
||||
|
||||
pstwocol: $(PSFILES)
|
||||
|
||||
html: $(HTMLFILES)
|
||||
|
||||
dvi: $(DVIFILES)
|
||||
|
||||
pdf: $(PDFFILES)
|
||||
|
||||
print: $(PSFILES)
|
||||
$(LPR) $(PSFILES)
|
||||
|
||||
%.tex: %.sgml
|
||||
$(SGML2DVI) --output=tex $<
|
||||
|
||||
%.dvi: %.sgml
|
||||
$(SGML2DVI) --output=dvi $<
|
||||
|
||||
%.dvi: %.tex
|
||||
@set -e; pass=2; echo "Running LaTeX $<"; \
|
||||
while [ `$(LATEX) $< </dev/null 2>&1 | \
|
||||
grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
|
||||
if [ $$pass -gt 3 ]; then \
|
||||
echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
|
||||
fi; \
|
||||
echo "Re-running LaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
|
||||
done
|
||||
|
||||
%.pdf: %.tex
|
||||
@set -e; pass=2; echo "Running pdfLaTeX $<"; \
|
||||
while [ `pdflatex $< </dev/null 2>&1 | \
|
||||
grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
|
||||
if [ $$pass -gt 3 ]; then \
|
||||
echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
|
||||
fi; \
|
||||
echo "Re-running pdfLaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
|
||||
done
|
||||
#%.pdf: %.ps
|
||||
# ps2pdf $<
|
||||
|
||||
%.ps: %.dvi
|
||||
$(DVIPS) $< -o $@
|
||||
|
||||
%.html: %.sgml
|
||||
$(SGML2HTML) $<
|
||||
|
||||
install:
|
||||
install -m 0644 $(shell echo *.tex) $(DESTDIR)$(DOCDIR)
|
||||
install -m 0644 $(shell echo *.sgml) $(DESTDIR)$(DOCDIR)
|
||||
|
||||
clean:
|
||||
rm -f *.aux *.log *.toc $(PSFILES) $(DVIFILES) *.html *.pdf
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
Partially finished work.
|
||||
|
||||
1. User Reference manuals.
|
||||
1.1 IP Command reference (ip-cref.tex, published)
|
||||
1.2 TC Command reference (tc-cref.tex)
|
||||
1.3 IP tunnels (ip-tunnels.tex, published)
|
||||
|
||||
2. Linux-2.2 Networking API
|
||||
2.1 RTNETLINK (api-rtnl.tex)
|
||||
2.2 Path MTU Discovery (api-pmtudisc.tex)
|
||||
2.3 IPv6 Flow Labels (api-ip6-flowlabels.tex, published)
|
||||
2.4 Miscellaneous extensions (api-misc.tex)
|
||||
|
||||
3. Linux-2.2 Networking Intra-Kernel Interfaces
|
||||
3.1 NetDev --- Networking Devices and netdev... (iki-netdev.tex)
|
||||
3.2 Neighbour cache and destination cache. (iki-neighdst.tex)
|
||||
|
|
@ -0,0 +1 @@
|
|||
\def\Draft{020116}
|
||||
|
|
@ -6,8 +6,8 @@ What is it?
|
|||
-----------
|
||||
|
||||
An extension to the filtering/classification architecture of Linux Traffic
|
||||
Control.
|
||||
Up to 2.6.8 the only action that could be "attached" to a filter was policing.
|
||||
Control.
|
||||
Up to 2.6.8 the only action that could be "attached" to a filter was policing.
|
||||
i.e you could say something like:
|
||||
|
||||
-----
|
||||
|
|
@ -17,11 +17,11 @@ tc filter add dev lo parent ffff: protocol ip prio 10 u32 match ip src \
|
|||
|
||||
which implies "if a packet is seen on the ingress of the lo device with
|
||||
a source IP address of 127.0.0.1/32 we give it a classification id of 1:1 and
|
||||
we execute a policing action which rate limits its bandwidth utilization
|
||||
we execute a policing action which rate limits its bandwidth utilization
|
||||
to 1.5Mbps".
|
||||
|
||||
The new extensions allow for more than just policing actions to be added.
|
||||
They are also fully backward compatible. If you have a kernel that doesn't
|
||||
They are also fully backward compatible. If you have a kernel that doesnt
|
||||
understand them, then the effect is null i.e if you have a newer tc
|
||||
but older kernel, the actions are not installed. Likewise if you
|
||||
have a newer kernel but older tc, obviously the tc will use current
|
||||
|
|
@ -29,9 +29,9 @@ syntax which will work fine. Of course to get the required effect you need
|
|||
both newer tc and kernel. If you are reading this you have the
|
||||
right tc ;->
|
||||
|
||||
A side effect is that we can now get stateless firewalling to work with tc.
|
||||
A side effect is that we can now get stateless firewalling to work with tc.
|
||||
Essentially this is now an alternative to iptables.
|
||||
I won't go into details of my dislike for iptables at times, but
|
||||
I wont go into details of my dislike for iptables at times, but
|
||||
scalability is one of the main issues; however, if you need stateful
|
||||
classification - use netfilter (for now).
|
||||
|
||||
|
|
@ -61,7 +61,7 @@ tc filter add dev lo parent 1:0 protocol ip prio 10 u32 \
|
|||
match ip src 127.0.0.1/32 flowid 1:1 \
|
||||
action police mtu 4000 rate 1500kbit burst 90k
|
||||
|
||||
" generic Actions" (gact) at the moment are:
|
||||
" generic Actions" (gact) at the moment are:
|
||||
{ drop, pass, reclassify, continue}
|
||||
(If you have others, no listed here give me a reason and we will add them)
|
||||
+drop says to drop the packet
|
||||
|
|
@ -77,7 +77,7 @@ iptable target. I have only tested with mangler targets up to now.
|
|||
In terms of hooks:
|
||||
*ingress is mapped to pre-routing hook
|
||||
*egress is mapped to post-routing hook
|
||||
I don't see much value in the other hooks, if you see it and email me good
|
||||
I dont see much value in the other hooks, if you see it and email me good
|
||||
reasons, the addition is trivial.
|
||||
|
||||
Example syntax for iptables targets usage becomes:
|
||||
|
|
@ -93,43 +93,43 @@ decimal 12, then use flowid 1:c.
|
|||
|
||||
3) A feature i call pipe
|
||||
The motivation is derived from Unix pipe mechanism but applied to packets.
|
||||
Essentially take a matching packet and pass it through
|
||||
Essentially take a matching packet and pass it through
|
||||
action1 | action2 | action3 etc.
|
||||
You could do something similar to this with the tc policer and the "continue"
|
||||
operator but this rather restricts it to just the policer and requires
|
||||
multiple rules (and lookups, hence quiet inefficient);
|
||||
operator but this rather restricts it to just the policer and requires
|
||||
multiple rules (and lookups, hence quiet inefficient);
|
||||
|
||||
as an example -- and please note that this is just an example _not_ The
|
||||
as an example -- and please note that this is just an example _not_ The
|
||||
Word Youve Been Waiting For (yes i have had problems giving examples
|
||||
which ended becoming dogma in documents and people modifying them a little
|
||||
to look clever);
|
||||
to look clever);
|
||||
|
||||
i selected the metering rates to be small so that i can show better how
|
||||
i selected the metering rates to be small so that i can show better how
|
||||
things work.
|
||||
|
||||
The script below does the following:
|
||||
- an incoming packet from 10.0.0.21 is first given a firewall mark of 1.
|
||||
|
||||
The script below does the following:
|
||||
- an incoming packet from 10.0.0.21 is first given a firewall mark of 1.
|
||||
- It is then metered to make sure it does not exceed its allocated rate of
|
||||
1Kbps. If it doesnt exceed rate, this is where we terminate action execution.
|
||||
|
||||
- It is then metered to make sure it does not exceed its allocated rate of
|
||||
1Kbps. If it doesn't exceed rate, this is where we terminate action execution.
|
||||
|
||||
- If it does exceed its rate, its "color" changes to a mark of 2 and it is
|
||||
- If it does exceed its rate, its "color" changes to a mark of 2 and it is
|
||||
then passed through a second meter.
|
||||
|
||||
-The second meter is shared across all flows on that device [i am surpised
|
||||
that this seems to be not a well know feature of the policer; Bert was telling
|
||||
-The second meter is shared across all flows on that device [i am suprised
|
||||
that this seems to be not a well know feature of the policer; Bert was telling
|
||||
me that someone was writing a qdisc just to do sharing across multiple devices;
|
||||
it must be the summer heat again; weve had someone doing that every year around
|
||||
summer -- the key to sharing is to use a operator "index" in your policer
|
||||
rules (example "index 20"). All your rules have to use the same index to
|
||||
summer -- the key to sharing is to use a operator "index" in your policer
|
||||
rules (example "index 20"). All your rules have to use the same index to
|
||||
share.]
|
||||
|
||||
|
||||
-If the second meter is exceeded the color of the flow changes further to 3.
|
||||
|
||||
-We then pass the packet to another meter which is shared across all devices
|
||||
in the system. If this meter is exceeded we drop the packet.
|
||||
|
||||
Note the mark can be used further up the system to do things like policy
|
||||
Note the mark can be used further up the system to do things like policy
|
||||
or more interesting things on the egress.
|
||||
|
||||
------------------ cut here -------------------------------
|
||||
|
|
@ -145,7 +145,7 @@ u32 match ip src 10.0.0.21/32 flowid 1:15 \
|
|||
action ipt -j mark --set-mark 1 index 2 \
|
||||
#
|
||||
# then pass it through a policer which allows 1kbps; if the flow
|
||||
# doesn't exceed that rate, this is where we stop, if it exceeds we
|
||||
# doesnt exceed that rate, this is where we stop, if it exceeds we
|
||||
# pipe the packet to the next action
|
||||
action police rate 1kbit burst 9k pipe \
|
||||
#
|
||||
|
|
@ -161,31 +161,31 @@ action ipt -j mark --set-mark 3 \
|
|||
# and then attempt to borrow from a meter used by all devices in the
|
||||
# system. Should this be exceeded, drop the packet on the floor.
|
||||
action police index 20 mtu 5000 rate 1kbit burst 90k drop
|
||||
---------------------------------
|
||||
---------------------------------
|
||||
|
||||
Now lets see the actions installed with
|
||||
Now lets see the actions installed with
|
||||
"tc filter show parent ffff: dev eth0"
|
||||
|
||||
-------- output -----------
|
||||
jroot# tc filter show parent ffff: dev eth0
|
||||
filter protocol ip pref 1 u32
|
||||
filter protocol ip pref 1 u32 fh 800: ht divisor 1
|
||||
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15
|
||||
filter protocol ip pref 1 u32
|
||||
filter protocol ip pref 1 u32 fh 800: ht divisor 1
|
||||
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15
|
||||
|
||||
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
target MARK set 0x1 index 2
|
||||
|
||||
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
|
||||
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
|
||||
|
||||
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
target MARK set 0x2 index 1
|
||||
|
||||
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
|
||||
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
|
||||
|
||||
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
target MARK set 0x3 index 3
|
||||
|
||||
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
|
||||
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
|
||||
|
||||
match 0a000015/ffffffff at 12
|
||||
-------------------------------
|
||||
|
|
@ -209,31 +209,31 @@ Now lets take a look at the stats with "tc -s filter show parent ffff: dev eth0"
|
|||
|
||||
--------------
|
||||
jroot# tc -s filter show parent ffff: dev eth0
|
||||
filter protocol ip pref 1 u32
|
||||
filter protocol ip pref 1 u32 fh 800: ht divisor 1
|
||||
filter protocol ip pref 1 u32
|
||||
filter protocol ip pref 1 u32 fh 800: ht divisor 1
|
||||
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
|
||||
5
|
||||
5
|
||||
|
||||
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
target MARK set 0x1 index 2
|
||||
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0)
|
||||
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0)
|
||||
|
||||
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
|
||||
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122)
|
||||
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
|
||||
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122)
|
||||
|
||||
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
target MARK set 0x2 index 1
|
||||
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0)
|
||||
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0)
|
||||
|
||||
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
|
||||
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945)
|
||||
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
|
||||
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945)
|
||||
|
||||
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
target MARK set 0x3 index 3
|
||||
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0)
|
||||
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0)
|
||||
|
||||
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
|
||||
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437)
|
||||
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
|
||||
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437)
|
||||
|
||||
match 0a000015/ffffffff at 12
|
||||
-------------------------------
|
||||
|
|
@ -241,7 +241,7 @@ filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
|
|||
Neat, eh?
|
||||
|
||||
|
||||
Want to write an action module?
|
||||
Wanna write an action module?
|
||||
------------------------------
|
||||
Its easy. Either look at the code or send me email. I will document at
|
||||
some point; will also accept documentation.
|
||||
|
|
@ -254,3 +254,4 @@ At the moment the focus has been on getting the architecture in place.
|
|||
Expect new things in the spurious time i have to work on this
|
||||
(particularly around end of year when i have typically get time off
|
||||
from work).
|
||||
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
|
||||
gact <ACTION> [RAND] [INDEX]
|
||||
|
||||
Where:
|
||||
ACTION := reclassify | drop | continue | pass | ok
|
||||
Where:
|
||||
ACTION := reclassify | drop | continue | pass | ok
|
||||
RAND := random <RANDTYPE> <ACTION> <VAL>
|
||||
RANDTYPE := netrand | determ
|
||||
VAL : = value not exceeding 10000
|
||||
INDEX := index value used
|
||||
|
||||
|
||||
ACTION semantics
|
||||
- pass and ok are equivalent to accept
|
||||
- continue allows one to restart classification lookup
|
||||
- continue allows to restart classification lookup
|
||||
- drop drops packets
|
||||
- reclassify implies continue classification where we left off
|
||||
|
||||
|
|
@ -42,14 +42,14 @@ filter u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16 (rule hit 32 suc
|
|||
random type none pass val 0
|
||||
index 1 ref 1 bind 1 installed 59 sec used 35 sec
|
||||
Sent 1680 bytes 20 pkts (dropped 20, overlimits 0 )
|
||||
|
||||
|
||||
----
|
||||
|
||||
# example 2
|
||||
#allow 1 out 10 randomly using the netrand generator
|
||||
tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
|
||||
10.0.0.9/32 flowid 1:16 action drop random netrand ok 10
|
||||
|
||||
|
||||
ping -c 20 10.0.0.9
|
||||
|
||||
----
|
||||
|
|
@ -59,14 +59,14 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
|
|||
random type netrand pass val 10
|
||||
index 5 ref 1 bind 1 installed 49 sec used 25 sec
|
||||
Sent 1680 bytes 20 pkts (dropped 16, overlimits 0 )
|
||||
|
||||
|
||||
--------
|
||||
#alternative: deterministically accept every second packet
|
||||
tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
|
||||
10.0.0.9/32 flowid 1:16 action drop random determ ok 2
|
||||
|
||||
|
||||
ping -c 20 10.0.0.9
|
||||
|
||||
|
||||
tc -s filter show parent ffff: dev eth0
|
||||
-----
|
||||
filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1filter protocol ip pref 6 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16 (rule hit 20 success 20)
|
||||
|
|
@ -76,3 +76,4 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
|
|||
index 4 ref 1 bind 1 installed 118 sec used 82 sec
|
||||
Sent 1680 bytes 20 pkts (dropped 10, overlimits 0 )
|
||||
-----
|
||||
|
||||
|
|
|
|||
|
|
@ -6,47 +6,47 @@ with a _lot_ less code.
|
|||
Known IMQ/IFB USES
|
||||
------------------
|
||||
|
||||
As far as i know the reasons listed below is why people use IMQ.
|
||||
As far as i know the reasons listed below is why people use IMQ.
|
||||
It would be nice to know of anything else that i missed.
|
||||
|
||||
1) qdiscs/policies that are per device as opposed to system wide.
|
||||
IFB allows for sharing.
|
||||
|
||||
2) Allows for queueing incoming traffic for shaping instead of
|
||||
dropping. I am not aware of any study that shows policing is
|
||||
dropping. I am not aware of any study that shows policing is
|
||||
worse than shaping in achieving the end goal of rate control.
|
||||
I would be interested if anyone is experimenting.
|
||||
|
||||
3) Very interesting use: if you are serving p2p you may want to give
|
||||
preference to your own locally originated traffic (when responses come back)
|
||||
3) Very interesting use: if you are serving p2p you may wanna give
|
||||
preference to your own localy originated traffic (when responses come back)
|
||||
vs someone using your system to do bittorent. So QoSing based on state
|
||||
comes in as the solution. What people did to achieve this was stick
|
||||
comes in as the solution. What people did to achive this was stick
|
||||
the IMQ somewhere prelocal hook.
|
||||
I think this is a pretty neat feature to have in Linux in general.
|
||||
(i.e not just for IMQ).
|
||||
But i won't go back to putting netfilter hooks in the device to satisfy
|
||||
this. I also don't think its worth it hacking ifb some more to be
|
||||
But i wont go back to putting netfilter hooks in the device to satisfy
|
||||
this. I also dont think its worth it hacking ifb some more to be
|
||||
aware of say L3 info and play ip rule tricks to achieve this.
|
||||
--> Instead the plan is to have a conntrack related action. This action will
|
||||
selectively either query/create conntrack state on incoming packets.
|
||||
Packets could then be redirected to ifb based on what happens -> eg
|
||||
on incoming packets; if we find they are of known state we could send to
|
||||
a different queue than one which didn't have existing state. This
|
||||
--> Instead the plan is to have a contrack related action. This action will
|
||||
selectively either query/create contrack state on incoming packets.
|
||||
Packets could then be redirected to ifb based on what happens -> eg
|
||||
on incoming packets; if we find they are of known state we could send to
|
||||
a different queue than one which didnt have existing state. This
|
||||
all however is dependent on whatever rules the admin enters.
|
||||
|
||||
At the moment this 3rd function does not exist yet. I have decided that
|
||||
instead of sitting on the patch for another year, to release it and then
|
||||
if there is pressure i will add this feature.
|
||||
instead of sitting on the patch for another year, to release it and then
|
||||
if theres pressure i will add this feature.
|
||||
|
||||
An example, to provide functionality that most people use IMQ for below:
|
||||
|
||||
--------
|
||||
export TC="/sbin/tc"
|
||||
|
||||
$TC qdisc add dev ifb0 root handle 1: prio
|
||||
$TC qdisc add dev ifb0 root handle 1: prio
|
||||
$TC qdisc add dev ifb0 parent 1:1 handle 10: sfq
|
||||
$TC qdisc add dev ifb0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000
|
||||
$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq
|
||||
$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq
|
||||
$TC filter add dev ifb0 protocol ip pref 1 parent 1: handle 1 fw classid 1:1
|
||||
$TC filter add dev ifb0 protocol ip pref 2 parent 1: handle 2 fw classid 1:2
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ ifconfig ifb0 up
|
|||
|
||||
$TC qdisc add dev eth0 ingress
|
||||
|
||||
# redirect all IP packets arriving in eth0 to ifb0
|
||||
# redirect all IP packets arriving in eth0 to ifb0
|
||||
# use mark 1 --> puts them onto class 1:1
|
||||
$TC filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
|
||||
match u32 0 0 flowid 1:1 \
|
||||
|
|
@ -77,44 +77,44 @@ PING 10.22 (10.0.0.22): 56 data bytes
|
|||
--- 10.22 ping statistics ---
|
||||
3 packets transmitted, 3 packets received, 0% packet loss
|
||||
round-trip min/avg/max = 0.6/1.3/2.8 ms
|
||||
[root@jzny action-tests]#
|
||||
[root@jzny action-tests]#
|
||||
-----
|
||||
Now look at some stats:
|
||||
|
||||
---
|
||||
[root@jmandrake]:~# $TC -s filter show parent ffff: dev eth0
|
||||
filter protocol ip pref 10 u32
|
||||
filter protocol ip pref 10 u32 fh 800: ht divisor 1
|
||||
filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
|
||||
filter protocol ip pref 10 u32
|
||||
filter protocol ip pref 10 u32 fh 800: ht divisor 1
|
||||
filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
|
||||
match 00000000/00000000 at 0
|
||||
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
target MARK set 0x1
|
||||
index 1 ref 1 bind 1 installed 4195sec used 27sec
|
||||
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
|
||||
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||
target MARK set 0x1
|
||||
index 1 ref 1 bind 1 installed 4195sec used 27sec
|
||||
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
|
||||
|
||||
action order 2: mirred (Egress Redirect to device ifb0) stolen
|
||||
index 1 ref 1 bind 1 installed 165 sec used 27 sec
|
||||
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
|
||||
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
|
||||
|
||||
[root@jmandrake]:~# $TC -s qdisc
|
||||
qdisc sfq 30: dev ifb0 limit 128p quantum 1514b
|
||||
Sent 0 bytes 0 pkts (dropped 0, overlimits 0)
|
||||
qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s
|
||||
Sent 210 bytes 3 pkts (dropped 0, overlimits 0)
|
||||
qdisc sfq 10: dev ifb0 limit 128p quantum 1514b
|
||||
Sent 294 bytes 3 pkts (dropped 0, overlimits 0)
|
||||
qdisc sfq 30: dev ifb0 limit 128p quantum 1514b
|
||||
Sent 0 bytes 0 pkts (dropped 0, overlimits 0)
|
||||
qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s
|
||||
Sent 210 bytes 3 pkts (dropped 0, overlimits 0)
|
||||
qdisc sfq 10: dev ifb0 limit 128p quantum 1514b
|
||||
Sent 294 bytes 3 pkts (dropped 0, overlimits 0)
|
||||
qdisc prio 1: dev ifb0 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
|
||||
Sent 504 bytes 6 pkts (dropped 0, overlimits 0)
|
||||
qdisc ingress ffff: dev eth0 ----------------
|
||||
Sent 308 bytes 5 pkts (dropped 0, overlimits 0)
|
||||
Sent 504 bytes 6 pkts (dropped 0, overlimits 0)
|
||||
qdisc ingress ffff: dev eth0 ----------------
|
||||
Sent 308 bytes 5 pkts (dropped 0, overlimits 0)
|
||||
|
||||
[root@jmandrake]:~# ifconfig ifb0
|
||||
ifb0 Link encap:Ethernet HWaddr 00:00:00:00:00:00
|
||||
ifb0 Link encap:Ethernet HWaddr 00:00:00:00:00:00
|
||||
inet6 addr: fe80::200:ff:fe00:0/64 Scope:Link
|
||||
UP BROADCAST RUNNING NOARP MTU:1500 Metric:1
|
||||
RX packets:6 errors:0 dropped:3 overruns:0 frame:0
|
||||
TX packets:3 errors:0 dropped:0 overruns:0 carrier:0
|
||||
collisions:0 txqueuelen:32
|
||||
collisions:0 txqueuelen:32
|
||||
RX bytes:504 (504.0 b) TX bytes:252 (252.0 b)
|
||||
-----
|
||||
|
||||
|
|
|
|||
|
|
@ -7,10 +7,10 @@ flow to be mirrored. High end switches typically can select based
|
|||
on more than just a port (eg a 5 tuple classifier). They may also be
|
||||
capable of redirecting.
|
||||
|
||||
Usage:
|
||||
Usage:
|
||||
|
||||
mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME>
|
||||
where:
|
||||
mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME>
|
||||
where:
|
||||
DIRECTION := <ingress | egress>
|
||||
ACTION := <mirror | redirect>
|
||||
INDEX is the specific policy instance id
|
||||
|
|
@ -18,7 +18,7 @@ DEVICENAME is the devicename
|
|||
|
||||
Direction:
|
||||
- Ingress is not supported at the moment. It will be in the
|
||||
future as well as mirror/redirecting to a socket.
|
||||
future as well as mirror/redirecting to a socket.
|
||||
|
||||
Action:
|
||||
- Mirror takes a copy of the packet and sends it to specified
|
||||
|
|
@ -26,17 +26,17 @@ dev ("port" in ethernet switch/bridging terminology)
|
|||
- redirect
|
||||
steals the packet and redirects to specified destination dev.
|
||||
|
||||
What NOT to do if you don't want your machine to crash:
|
||||
What NOT to do if you dont want your machine to crash:
|
||||
------------------------------------------------------
|
||||
|
||||
Do not create loops!
|
||||
Do not create loops!
|
||||
Loops are not hard to create in the egress qdiscs.
|
||||
|
||||
Here are simple rules to follow if you don't want to get
|
||||
Here are simple rules to follow if you dont want to get
|
||||
hurt:
|
||||
A) Do not have the same packet go to same netdevice twice
|
||||
in a single graph of policies. Your machine will just hang!
|
||||
This is design intent _not a bug_ to teach you some lessons.
|
||||
This is design intent _not a bug_ to teach you some lessons.
|
||||
|
||||
In the future if there are easy ways to do this in the kernel
|
||||
without affecting other packets not interested in this feature
|
||||
|
|
@ -51,7 +51,7 @@ B) Do not redirect from one IFB device to another.
|
|||
Remember that IFB is a very specialized case of packet redirecting
|
||||
device. Instead of redirecting it puts packets at the exact spot
|
||||
on the stack it found them from.
|
||||
Redirecting from ifbX->ifbY will actually not crash your machine but your
|
||||
Redirecting from ifbX->ifbY will actually not crash your machine but your
|
||||
packets will all be dropped (this is much simpler to detect
|
||||
and resolve and is only affecting users of ifb as opposed to the
|
||||
whole stack).
|
||||
|
|
@ -64,7 +64,7 @@ Some examples:
|
|||
|
||||
1) Mirror all packets arriving on eth0 to be sent out on eth1.
|
||||
You may have a sniffer or some accounting box hooked up on eth1.
|
||||
|
||||
|
||||
---
|
||||
tc qdisc add dev eth0 ingress
|
||||
tc filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
|
||||
|
|
@ -100,7 +100,7 @@ stack (i.e ping would work).
|
|||
3) Even more funky example:
|
||||
|
||||
#
|
||||
#allow 1 out 10 packets on ingress of lo to randomly make it to the
|
||||
#allow 1 out 10 packets on ingress of lo to randomly make it to the
|
||||
# host A (Randomness uses the netrand generator)
|
||||
#
|
||||
---
|
||||
|
|
@ -111,9 +111,9 @@ action mirred egress mirror dev eth0
|
|||
---
|
||||
|
||||
4)
|
||||
# for packets from 10.0.0.9 going out on eth0 (could be local
|
||||
# IP or something # we are forwarding) -
|
||||
# if exceeding a 100Kbps rate, then redirect to eth1
|
||||
# for packets from 10.0.0.9 going out on eth0 (could be local
|
||||
# IP or something # we are forwarding) -
|
||||
# if exceeding a 100Kbps rate, then redirect to eth1
|
||||
#
|
||||
|
||||
---
|
||||
|
|
@ -129,7 +129,7 @@ so you could tcpdump them (dummy by defaults drops all packets it sees).
|
|||
This is a very useful debug feature.
|
||||
|
||||
Lets say you are policing packets from alias 192.168.200.200/32
|
||||
you don't want those to exceed 100kbps going out.
|
||||
you dont want those to exceed 100kbps going out.
|
||||
|
||||
---
|
||||
tc qdisc add dev eth0 handle 1:0 root prio
|
||||
|
|
@ -158,7 +158,7 @@ Essentially a good debugging/logging interface (sort of like
|
|||
BSDs speacialized log device does without needing one).
|
||||
|
||||
If you replace mirror with redirect, those packets will be
|
||||
blackholed and will never make it out.
|
||||
blackholed and will never make it out.
|
||||
|
||||
cheers,
|
||||
jamal
|
||||
|
|
|
|||
|
|
@ -0,0 +1,429 @@
|
|||
\documentstyle[12pt,twoside]{article}
|
||||
\def\TITLE{IPv6 Flow Labels}
|
||||
\input preamble
|
||||
\begin{center}
|
||||
\Large\bf IPv6 Flow Labels in Linux-2.2.
|
||||
\end{center}
|
||||
|
||||
|
||||
\begin{center}
|
||||
{ \large Alexey~N.~Kuznetsov } \\
|
||||
\em Institute for Nuclear Research, Moscow \\
|
||||
\verb|kuznet@ms2.inr.ac.ru| \\
|
||||
\rm April 11, 1999
|
||||
\end{center}
|
||||
|
||||
\vspace{5mm}
|
||||
|
||||
\tableofcontents
|
||||
|
||||
\section{Introduction.}
|
||||
|
||||
Every IPv6 packet carries 28 bits of flow information. RFC2460 splits
|
||||
these bits to two fields: 8 bits of traffic class (or DS field, if you
|
||||
prefer this term) and 20 bits of flow label. Currently there exist
|
||||
no well-defined API to manage IPv6 flow information. In this document
|
||||
I describe an attempt to design the API for Linux-2.2 IPv6 stack.
|
||||
|
||||
\vskip 1mm
|
||||
|
||||
The API must solve the following tasks:
|
||||
|
||||
\begin{enumerate}
|
||||
|
||||
\item To allow user to set traffic class bits.
|
||||
|
||||
\item To allow user to read traffic class bits of received packets.
|
||||
This feature is not so useful as the first one, however it will be
|
||||
necessary f.e.\ to implement ECN [RFC2481] for datagram oriented services
|
||||
or to implement receiver side of SRP or another end-to-end protocol
|
||||
using traffic class bits.
|
||||
|
||||
\item To assign flow labels to packets sent by user.
|
||||
|
||||
\item To get flow labels of received packets. I do not know
|
||||
any applications of this feature, but it is possible that receiver will
|
||||
want to use flow labels to distinguish sub-flows.
|
||||
|
||||
\item To allocate flow labels in the way, compliant to RFC2460. Namely:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
Flow labels must be uniformly distributed (pseudo-)random numbers,
|
||||
so that any subset of 20 bits can be used as hash key.
|
||||
|
||||
\item
|
||||
Flows with coinciding source address and flow label must have identical
|
||||
destination address and not-fragmentable extensions headers (i.e.\
|
||||
hop by hop options and all the headers up to and including routing header,
|
||||
if it is present.)
|
||||
|
||||
\begin{NB}
|
||||
There is a hole in specs: some hop-by-hop options can be
|
||||
defined only on per-packet base (f.e.\ jumbo payload option).
|
||||
Essentially, it means that such options cannot present in packets
|
||||
with flow labels.
|
||||
\end{NB}
|
||||
\begin{NB}
|
||||
NB notes here and below reflect only my personal opinion,
|
||||
they should be read with smile or should not be read at all :-).
|
||||
\end{NB}
|
||||
|
||||
|
||||
\item
|
||||
Flow labels have finite lifetime and source is not allowed to reuse
|
||||
flow label for another flow within the maximal lifetime has expired,
|
||||
so that intermediate nodes will be able to invalidate flow state before
|
||||
the label is taken over by another flow.
|
||||
Flow state, including lifetime, is propagated along datagram path
|
||||
by some application specific methods
|
||||
(f.e.\ in RSVP PATH messages or in some hop-by-hop option).
|
||||
|
||||
|
||||
\end{itemize}
|
||||
|
||||
\end{enumerate}
|
||||
|
||||
\section{Sending/receiving flow information.}
|
||||
|
||||
\paragraph{Discussion.}
|
||||
\addcontentsline{toc}{subsection}{Discussion}
|
||||
It was proposed (Where? I do not remember any explicit statement)
|
||||
to solve the first four tasks using
|
||||
\verb|sin6_flowinfo| field added to \verb|struct| \verb|sockaddr_in6|
|
||||
(see RFC2553).
|
||||
|
||||
\begin{NB}
|
||||
This method is difficult to consider as reasonable, because it
|
||||
puts additional overhead to all the services, despite of only
|
||||
very small subset of them (none, to be more exact) really use it.
|
||||
It contradicts both to IETF spirit and the letter. Before RFC2553
|
||||
one justification existed, IPv6 address alignment left 4 byte
|
||||
hole in \verb|sockaddr_in6| in any case. Now it has no justification.
|
||||
\end{NB}
|
||||
|
||||
We have two problems with this method. The first one is common for all OSes:
|
||||
if \verb|recvmsg()| initializes \verb|sin6_flowinfo| to flow info
|
||||
of received packet, we loose one very important property of BSD socket API,
|
||||
namely, we are not allowed to use received address for reply directly
|
||||
and have to mangle it, even if we are not interested in flowinfo subtleties.
|
||||
|
||||
\begin{NB}
|
||||
RFC2553 adds new requirement: to clear \verb|sin6_flowinfo|.
|
||||
Certainly, it is not solution but rather attempt to force applications
|
||||
to make unnecessary work. Well, as usually, one mistake in design
|
||||
is followed by attempts to patch the hole and more mistakes...
|
||||
\end{NB}
|
||||
|
||||
Another problem is Linux specific. Historically Linux IPv6 did not
|
||||
initialize \verb|sin6_flowinfo| at all, so that, if kernel does not
|
||||
support flow labels, this field is not zero, but a random number.
|
||||
Some applications also did not take care about it.
|
||||
|
||||
\begin{NB}
|
||||
Following RFC2553 such applications can be considered as broken,
|
||||
but I still think that they are right: clearing all the address
|
||||
before filling known fields is robust but stupid solution.
|
||||
Useless wasting CPU cycles and
|
||||
memory bandwidth is not a good idea. Such patches are acceptable
|
||||
as temporary hacks, but not as standard of the future.
|
||||
\end{NB}
|
||||
|
||||
|
||||
\paragraph{Implementation.}
|
||||
\addcontentsline{toc}{subsection}{Implementation}
|
||||
By default Linux IPv6 does not read \verb|sin6_flowinfo| field
|
||||
assuming that common applications are not obliged to initialize it
|
||||
and are permitted to consider it as pure alignment padding.
|
||||
In order to tell kernel that application
|
||||
is aware of this field, it is necessary to set socket option
|
||||
\verb|IPV6_FLOWINFO_SEND|.
|
||||
|
||||
\begin{verbatim}
|
||||
int on = 1;
|
||||
setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO_SEND,
|
||||
(void*)&on, sizeof(on));
|
||||
\end{verbatim}
|
||||
|
||||
Linux kernel never fills \verb|sin6_flowinfo| field, when passing
|
||||
message to user space, though the kernels which support flow labels
|
||||
initialize it to zero. If user wants to get received flowinfo, he
|
||||
will set option \verb|IPV6_FLOWINFO| and after this he will receive
|
||||
flowinfo as ancillary data object of type \verb|IPV6_FLOWINFO|
|
||||
(cf.\ RFC2292).
|
||||
|
||||
\begin{verbatim}
|
||||
int on = 1;
|
||||
setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO, (void*)&on, sizeof(on));
|
||||
\end{verbatim}
|
||||
|
||||
Flowinfo received and latched by a connected TCP socket also may be fetched
|
||||
with \verb|getsockopt()| \verb|IPV6_PKTOPTIONS| together with
|
||||
another optional information.
|
||||
|
||||
Besides that, in the spirit of RFC2292 the option \verb|IPV6_FLOWINFO|
|
||||
may be used as alternative way to send flowinfo with \verb|sendmsg()| or
|
||||
to latch it with \verb|IPV6_PKTOPTIONS|.
|
||||
|
||||
\paragraph{Note about IPv6 options and destination address.}
|
||||
\addcontentsline{toc}{subsection}{IPv6 options and destination address}
|
||||
If \verb|sin6_flowinfo| does contain not zero flow label,
|
||||
destination address in \verb|sin6_addr| and non-fragmentable
|
||||
extension headers are ignored. Instead, kernel uses the values
|
||||
cached at flow setup (see below). However, for connected sockets
|
||||
kernel prefers the values set at connection time.
|
||||
|
||||
\paragraph{Example.}
|
||||
\addcontentsline{toc}{subsection}{Example}
|
||||
After setting socket option \verb|IPV6_FLOWINFO|
|
||||
flowlabel and DS field are received as ancillary data object
|
||||
of type \verb|IPV6_FLOWINFO| and level \verb|SOL_IPV6|.
|
||||
In the cases when it is convenient to use \verb|recvfrom(2)|,
|
||||
it is possible to replace library variant with your own one,
|
||||
sort of:
|
||||
|
||||
\begin{verbatim}
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in6.h>
|
||||
|
||||
size_t recvfrom(int fd, char *buf, size_t len, int flags,
|
||||
struct sockaddr *addr, int *addrlen)
|
||||
{
|
||||
size_t cc;
|
||||
char cbuf[128];
|
||||
struct cmsghdr *c;
|
||||
struct iovec iov = { buf, len };
|
||||
struct msghdr msg = { addr, *addrlen,
|
||||
&iov, 1,
|
||||
cbuf, sizeof(cbuf),
|
||||
0 };
|
||||
|
||||
cc = recvmsg(fd, &msg, flags);
|
||||
if (cc < 0)
|
||||
return cc;
|
||||
((struct sockaddr_in6*)addr)->sin6_flowinfo = 0;
|
||||
*addrlen = msg.msg_namelen;
|
||||
for (c=CMSG_FIRSTHDR(&msg); c; c = CMSG_NEXTHDR(&msg, c)) {
|
||||
if (c->cmsg_level != SOL_IPV6 ||
|
||||
c->cmsg_type != IPV6_FLOWINFO)
|
||||
continue;
|
||||
((struct sockaddr_in6*)addr)->sin6_flowinfo = *(__u32*)CMSG_DATA(c);
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
\end{verbatim}
|
||||
|
||||
|
||||
|
||||
\section{Flow label management.}
|
||||
|
||||
\paragraph{Discussion.}
|
||||
\addcontentsline{toc}{subsection}{Discussion}
|
||||
Requirements of RFC2460 are pretty tough. Particularly, lifetimes
|
||||
longer than boot time require to store allocated labels at stable
|
||||
storage, so that the full implementation necessarily includes user space flow
|
||||
label manager. There are at least three different approaches:
|
||||
|
||||
\begin{enumerate}
|
||||
\item {\bf ``Cooperative''. } We could leave flow label allocation wholly
|
||||
to user space. When user needs label he requests manager directly. The approach
|
||||
is valid, but as any ``cooperative'' approach it suffers of security problems.
|
||||
|
||||
\begin{NB}
|
||||
One idea is to disallow not privileged user to allocate flow
|
||||
labels, but instead to pass the socket to manager via \verb|SCM_RIGHTS|
|
||||
control message, so that it will allocate label and assign it to socket
|
||||
itself. Hmm... the idea is interesting.
|
||||
\end{NB}
|
||||
|
||||
\item {\bf ``Indirect''.} Kernel redirects requests to user level daemon
|
||||
and does not install label until the daemon acknowledged the request.
|
||||
The approach is the most promising, it is especially pleasant to recognize
|
||||
parallel with IPsec API [RFC2367,Craig]. Actually, it may share API with
|
||||
IPsec.
|
||||
|
||||
\item {\bf ``Stupid''.} To allocate labels in kernel space. It is the simplest
|
||||
method, but it suffers of two serious flaws: the first,
|
||||
we cannot lease labels with lifetimes longer than boot time, the second,
|
||||
it is sensitive to DoS attacks. Kernel have to remember all the obsolete
|
||||
labels until their expiration and malicious user may fastly eat all the
|
||||
flow label space.
|
||||
|
||||
\end{enumerate}
|
||||
|
||||
Certainly, I choose the most ``stupid'' method. It is the cheapest one
|
||||
for implementor (i.e.\ me), and taking into account that flow labels
|
||||
still have no serious applications it is not useful to work on more
|
||||
advanced API, especially, taking into account that eventually we
|
||||
will get it for no fee together with IPsec.
|
||||
|
||||
|
||||
\paragraph{Implementation.}
|
||||
\addcontentsline{toc}{subsection}{Implementation}
|
||||
Socket option \verb|IPV6_FLOWLABEL_MGR| allows to
|
||||
request flow label manager to allocate new flow label, to reuse
|
||||
already allocated one or to delete old flow label.
|
||||
Its argument is \verb|struct| \verb|in6_flowlabel_req|:
|
||||
|
||||
\begin{verbatim}
|
||||
struct in6_flowlabel_req
|
||||
{
|
||||
struct in6_addr flr_dst;
|
||||
__u32 flr_label;
|
||||
__u8 flr_action;
|
||||
__u8 flr_share;
|
||||
__u16 flr_flags;
|
||||
__u16 flr_expires;
|
||||
__u16 flr_linger;
|
||||
__u32 __flr_reserved;
|
||||
/* Options in format of IPV6_PKTOPTIONS */
|
||||
};
|
||||
\end{verbatim}
|
||||
|
||||
\begin{itemize}
|
||||
|
||||
\item \verb|dst| is IPv6 destination address associated with the label.
|
||||
|
||||
\item \verb|label| is flow label value in network byte order. If it is zero,
|
||||
kernel will allocate new pseudo-random number. Otherwise, kernel will try
|
||||
to lease flow label ordered by user. In this case, it is user task to provide
|
||||
necessary flow label randomness.
|
||||
|
||||
\item \verb|action| is requested operation. Currently, only three operations
|
||||
are defined:
|
||||
|
||||
\begin{verbatim}
|
||||
#define IPV6_FL_A_GET 0 /* Get flow label */
|
||||
#define IPV6_FL_A_PUT 1 /* Release flow label */
|
||||
#define IPV6_FL_A_RENEW 2 /* Update expire time */
|
||||
\end{verbatim}
|
||||
|
||||
\item \verb|flags| are optional modifiers. Currently
|
||||
only \verb|IPV6_FL_A_GET| has modifiers:
|
||||
|
||||
\begin{verbatim}
|
||||
#define IPV6_FL_F_CREATE 1 /* Allowed to create new label */
|
||||
#define IPV6_FL_F_EXCL 2 /* Do not create new label */
|
||||
\end{verbatim}
|
||||
|
||||
|
||||
\item \verb|share| defines who is allowed to reuse the same flow label.
|
||||
|
||||
\begin{verbatim}
|
||||
#define IPV6_FL_S_NONE 0 /* Not defined */
|
||||
#define IPV6_FL_S_EXCL 1 /* Label is private */
|
||||
#define IPV6_FL_S_PROCESS 2 /* May be reused by this process */
|
||||
#define IPV6_FL_S_USER 3 /* May be reused by this user */
|
||||
#define IPV6_FL_S_ANY 255 /* Anyone may reuse it */
|
||||
\end{verbatim}
|
||||
|
||||
\item \verb|linger| is time in seconds. After the last user releases flow
|
||||
label, it will not be reused with different destination and options at least
|
||||
during this time. If \verb|share| is not \verb|IPV6_FL_S_EXCL| the label
|
||||
still can be shared by another sockets. Current implementation does not allow
|
||||
unprivileged user to set linger longer than 60 sec.
|
||||
|
||||
\item \verb|expires| is time in seconds. Flow label will be kept at least
|
||||
for this time, but it will not be destroyed before user released it explicitly
|
||||
or closed all the sockets using it. Current implementation does not allow
|
||||
unprivileged user to set timeout longer than 60 sec. Proviledged applications
|
||||
MAY set longer lifetimes, but in this case they MUST save allocated
|
||||
labels at stable storage and restore them back after reboot before the first
|
||||
application allocates new flow.
|
||||
|
||||
\end{itemize}
|
||||
|
||||
This structure is followed by optional extension headers associated
|
||||
with this flow label in format of \verb|IPV6_PKTOPTIONS|. Only
|
||||
\verb|IPV6_HOPOPTS|, \verb|IPV6_RTHDR| and, if \verb|IPV6_RTHDR| presents,
|
||||
\verb|IPV6_DSTOPTS| are allowed.
|
||||
|
||||
\paragraph{Example.}
|
||||
\addcontentsline{toc}{subsection}{Example}
|
||||
The function \verb|get_flow_label| allocates
|
||||
private flow label.
|
||||
|
||||
\begin{verbatim}
|
||||
int get_flow_label(int fd, struct sockaddr_in6 *dst, __u32 fl)
|
||||
{
|
||||
int on = 1;
|
||||
struct in6_flowlabel_req freq;
|
||||
|
||||
memset(&freq, 0, sizeof(freq));
|
||||
freq.flr_label = htonl(fl);
|
||||
freq.flr_action = IPV6_FL_A_GET;
|
||||
freq.flr_flags = IPV6_FL_F_CREATE | IPV6_FL_F_EXCL;
|
||||
freq.flr_share = IPV6_FL_S_EXCL;
|
||||
memcpy(&freq.flr_dst, &dst->sin6_addr, 16);
|
||||
if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
|
||||
&freq, sizeof(freq)) == -1) {
|
||||
perror ("can't lease flowlabel");
|
||||
return -1;
|
||||
}
|
||||
dst->sin6_flowinfo |= freq.flr_label;
|
||||
|
||||
if (setsockopt(fd, SOL_IPV6, IPV6_FLOWINFO_SEND,
|
||||
&on, sizeof(on)) == -1) {
|
||||
perror ("can't send flowinfo");
|
||||
|
||||
freq.flr_action = IPV6_FL_A_PUT;
|
||||
setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
|
||||
&freq, sizeof(freq));
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
\end{verbatim}
|
||||
|
||||
A bit more complicated example using routing header can be found
|
||||
in \verb|ping6| utility (\verb|iputils| package). Linux rsvpd backend
|
||||
contains an example of using operation \verb|IPV6_FL_A_RENEW|.
|
||||
|
||||
\paragraph{Listing flow labels.}
|
||||
\addcontentsline{toc}{subsection}{Listing flow labels}
|
||||
List of currently allocated
|
||||
flow labels may be read from \verb|/proc/net/ip6_flowlabel|.
|
||||
|
||||
\begin{verbatim}
|
||||
Label S Owner Users Linger Expires Dst Opt
|
||||
A1BE5 1 0 0 6 3 3ffe2400000000010a0020fffe71fb30 0
|
||||
\end{verbatim}
|
||||
|
||||
\begin{itemize}
|
||||
\item \verb|Label| is hexadecimal flow label value.
|
||||
\item \verb|S| is sharing style.
|
||||
\item \verb|Owner| is ID of creator, it is zero, pid or uid, depending on
|
||||
sharing style.
|
||||
\item \verb|Users| is number of applications using the label now.
|
||||
\item \verb|Linger| is \verb|linger| of this label in seconds.
|
||||
\item \verb|Expires| is time until expiration of the label in seconds. It may
|
||||
be negative, if the label is in use.
|
||||
\item \verb|Dst| is IPv6 destination address.
|
||||
\item \verb|Opt| is length of options, associated with the label. Option
|
||||
data are not accessible.
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\paragraph{Flow labels and RSVP.}
|
||||
\addcontentsline{toc}{subsection}{Flow labels and RSVP}
|
||||
RSVP daemon supports IPv6 flow labels
|
||||
without any modifications to standard ISI RAPI. Sender must allocate
|
||||
flow label, fill corresponding sender template and submit it to local rsvp
|
||||
daemon. rsvpd will check the label and start to announce it in PATH
|
||||
messages. Rsvpd on sender node will renew the flow label, so that it will not
|
||||
be reused before path state expires and all the intermediate
|
||||
routers and receiver purge flow state.
|
||||
|
||||
\verb|rtap| utility is modified to parse flow labels. F.e.\ if user allocated
|
||||
flow label \verb|0xA1234|, he may write:
|
||||
|
||||
\begin{verbatim}
|
||||
RTAP> sender 3ffe:2400::1/FL0xA1234 <Tspec>
|
||||
\end{verbatim}
|
||||
|
||||
Receiver makes reservation with command:
|
||||
\begin{verbatim}
|
||||
RTAP> reserve ff 3ffe:2400::1/FL0xA1234 <Flowspec>
|
||||
\end{verbatim}
|
||||
|
||||
\end{document}
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
<!doctype linuxdoc system>
|
||||
|
||||
<article>
|
||||
|
||||
<title>ARPD Daemon
|
||||
<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
|
||||
<date>some_negative_number, 20 Sep 2001
|
||||
<abstract>
|
||||
<tt/arpd/ is daemon collecting gratuitous ARP information, saving
|
||||
it on local disk and feeding it to kernel on demand to avoid
|
||||
redundant broadcasting due to limited size of kernel ARP cache.
|
||||
</abstract>
|
||||
|
||||
|
||||
<p><bf/Description/
|
||||
|
||||
<p>The format of the command is:
|
||||
|
||||
<tscreen><verb>
|
||||
arpd OPTIONS [ INTERFACE [ INTERFACE ... ] ]
|
||||
</verb></tscreen>
|
||||
|
||||
<p> <tt/OPTIONS/ are:
|
||||
|
||||
<itemize>
|
||||
|
||||
<item><tt/-l/ - dump <tt/arpd/ database to stdout and exit. Output consists
|
||||
of three columns: interface index, IP address and MAC address.
|
||||
Negative entries for dead hosts are also shown, in this case MAC address
|
||||
is replaced by word <tt/FAILED/ followed by colon and time when the fact
|
||||
that host is dead was proven the last time.
|
||||
|
||||
<item><tt/-f FILE/ - read and load <tt/arpd/ database from <tt/FILE/
|
||||
in text format similar dumped by option <tt/-l/. Exit after load,
|
||||
probably listing resulting database, if option <tt/-l/ is also given.
|
||||
If <tt/FILE/ is <tt/-/, <tt/stdin/ is read to get ARP table.
|
||||
|
||||
<item><tt/-b DATABASE/ - location of database file. Default location is
|
||||
<tt>/var/lib/arpd/arpd.db</tt>.
|
||||
|
||||
<item><tt/-a NUMBER/ - <tt/arpd/ not only passively listens ARP on wire, but
|
||||
also send brodcast queries itself. <tt/NUMBER/ is number of such queries
|
||||
to make before destination is considered as dead. When <tt/arpd/ is started
|
||||
as kernel helper (i.e. with <tt/app_solicit/ enabled in <tt/sysctl/
|
||||
or even with option <tt/-k/) without this option and still did not learn enough
|
||||
information, you can observe 1 second gaps in service. Not fatal, but
|
||||
not good.
|
||||
|
||||
<item><tt/-k/ - suppress sending broadcast queries by kernel. It takes
|
||||
sense together with option <tt/-a/.
|
||||
|
||||
<item><tt/-n TIME/ - timeout of negative cache. When resolution fails <tt/arpd/
|
||||
suppresses further attempts to resolve for this period. It makes sense
|
||||
only together with option <tt/-k/. This timeout should not be too much
|
||||
longer than boot time of a typical host not supporting gratuitous ARP.
|
||||
Default value is 60 seconds.
|
||||
|
||||
<item><tt/-R RATE/ - maximal steady rate of broadcasts sent by <tt/arpd/
|
||||
in packets per second. Default value is 1.
|
||||
|
||||
<item><tt/-B NUMBER/ - number of broadcasts sent by <tt/arpd/ back to back.
|
||||
Default value is 3. Together with option <tt/-R/ this option allows
|
||||
to police broadcasting not to exceed <tt/B+R*T/ over any interval
|
||||
of time <tt/T/.
|
||||
|
||||
</itemize>
|
||||
|
||||
<p><tt/INTERFACE/ is name of networking inteface to watch.
|
||||
If no interfaces given, <tt/arpd/ monitors all the interfaces.
|
||||
In this case <tt/arpd/ does not adjust <tt/sysctl/ parameters,
|
||||
it is supposed user does this himself after <tt/arpd/ is started.
|
||||
|
||||
|
||||
<p> Signals
|
||||
|
||||
<p> <tt/arpd/ exits gracefully syncing database and restoring adjusted
|
||||
<tt/sysctl/ parameters, when receives <tt/SIGINT/ or <tt/SIGTERM/.
|
||||
<tt/SIGHUP/ syncs database to disk. <tt/SIGUSR1/ sends some statistics
|
||||
to <tt/syslog/. Effect of another signals is undefined, they may corrupt
|
||||
database and leave <tt/sysctl/ parameters in an unpredictable state.
|
||||
|
||||
<p> Note
|
||||
|
||||
<p> In order to <tt/arpd/ be able to serve as ARP resolver, kernel must be
|
||||
compiled with the option <tt/CONFIG_ARPD/ and, in the case when interface list
|
||||
is not given on command line, variable <tt/app_solicit/
|
||||
on interfaces of interest should be set in <tt>/proc/sys/net/ipv4/neigh/*</tt>.
|
||||
If this is not made <tt/arpd/ still collects gratuitous ARP information
|
||||
in its database.
|
||||
|
||||
<p> Examples
|
||||
|
||||
<enum>
|
||||
<item> Start <tt/arpd/ to collect gratuitous ARP, but not messing
|
||||
with kernel functionality:
|
||||
|
||||
<tscreen><verb>
|
||||
arpd -b /var/tmp/arpd.db
|
||||
</verb></tscreen>
|
||||
|
||||
<item> Look at result after some time:
|
||||
|
||||
<tscreen><verb>
|
||||
killall arpd
|
||||
arpd -l -b /var/tmp/arpd.db
|
||||
</verb></tscreen>
|
||||
|
||||
<item> To enable kernel helper, leaving leading role to kernel:
|
||||
|
||||
<tscreen><verb>
|
||||
arpd -b /var/tmp/arpd.db -a 1 eth0 eth1
|
||||
</verb></tscreen>
|
||||
|
||||
<item> Completely replace kernel resolution on interfaces <tt/eth0/
|
||||
and <tt/eth1/. In this case kernel still does unicast probing to
|
||||
validate entries, but all the broadcast activity is suppressed
|
||||
and made under authority of <tt/arpd/:
|
||||
|
||||
<tscreen><verb>
|
||||
arpd -b /var/tmp/arpd.db -a 3 -k eth0 eth1
|
||||
</verb></tscreen>
|
||||
|
||||
This is mode which <tt/arpd/ is supposed to work normally.
|
||||
It is not default just to prevent occasional enabling of too aggressive
|
||||
mode occasionally.
|
||||
|
||||
</enum>
|
||||
|
||||
</article>
|
||||
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
#! /bin/bash
|
||||
# $1 = Temporary file . "string"
|
||||
# $2 = File to process . "string"
|
||||
# $3 = Page size . ie: a4 , letter ... "string"
|
||||
# $4 = Number of pages to fit on a single sheet . "numeric"
|
||||
|
||||
if type psnup >&/dev/null; then
|
||||
echo "psnup -$4 -p$3 $1 $2"
|
||||
psnup -$4 -p$3 $1 $2
|
||||
elif type psmulti >&/dev/null; then
|
||||
echo "psmulti $1 > $2"
|
||||
psmulti $1 > $2
|
||||
else
|
||||
echo "cp $1 $2"
|
||||
cp $1 $2
|
||||
fi
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,469 @@
|
|||
\documentstyle[12pt,twoside]{article}
|
||||
\def\TITLE{Tunnels over IP}
|
||||
\input preamble
|
||||
\begin{center}
|
||||
\Large\bf Tunnels over IP in Linux-2.2
|
||||
\end{center}
|
||||
|
||||
|
||||
\begin{center}
|
||||
{ \large Alexey~N.~Kuznetsov } \\
|
||||
\em Institute for Nuclear Research, Moscow \\
|
||||
\verb|kuznet@ms2.inr.ac.ru| \\
|
||||
\rm March 17, 1999
|
||||
\end{center}
|
||||
|
||||
\vspace{5mm}
|
||||
|
||||
\tableofcontents
|
||||
|
||||
|
||||
\section{Instead of introduction: micro-FAQ.}
|
||||
|
||||
\begin{itemize}
|
||||
|
||||
\item
|
||||
Q: In linux-2.0.36 I used:
|
||||
\begin{verbatim}
|
||||
ifconfig tunl1 10.0.0.1 pointopoint 193.233.7.65
|
||||
\end{verbatim}
|
||||
to create tunnel. It does not work in 2.2.0!
|
||||
|
||||
A: You are right, it does not work. The command written above is split to two commands.
|
||||
\begin{verbatim}
|
||||
ip tunnel add MY-TUNNEL mode ipip remote 193.233.7.65
|
||||
\end{verbatim}
|
||||
will create tunnel device with name \verb|MY-TUNNEL|. Now you may configure
|
||||
it with:
|
||||
\begin{verbatim}
|
||||
ifconfig MY-TUNNEL 10.0.0.1
|
||||
\end{verbatim}
|
||||
Certainly, if you prefer name \verb|tunl1| to \verb|MY-TUNNEL|,
|
||||
you still may use it.
|
||||
|
||||
\item
|
||||
Q: In linux-2.0.36 I used:
|
||||
\begin{verbatim}
|
||||
ifconfig tunl0 10.0.0.1
|
||||
route add -net 10.0.0.0 gw 193.233.7.65 dev tunl0
|
||||
\end{verbatim}
|
||||
to tunnel net 10.0.0.0 via router 193.233.7.65. It does not
|
||||
work in 2.2.0! Moreover, \verb|route| prints a funny error sort of
|
||||
``network unreachable'' and after this I found a strange direct route
|
||||
to 10.0.0.0 via \verb|tunl0| in routing table.
|
||||
|
||||
A: Yes, in 2.2 the rule that {\em normal} gateway must reside on directly
|
||||
connected network has not any exceptions. You may tell kernel, that
|
||||
this particular route is {\em abnormal}:
|
||||
\begin{verbatim}
|
||||
ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
|
||||
ip route add 10.0.0.0/8 via 193.233.7.65 dev tunl0 onlink
|
||||
\end{verbatim}
|
||||
Note keyword \verb|onlink|, it is the magic key that orders kernel
|
||||
not to check for consistency of gateway address.
|
||||
Probably, after this explanation you have already guessed another method
|
||||
to cheat kernel:
|
||||
\begin{verbatim}
|
||||
ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
|
||||
route add -host 193.233.7.65 dev tunl0
|
||||
route add -net 10.0.0.0 netmask 255.0.0.0 gw 193.233.7.65
|
||||
route del -host 193.233.7.65 dev tunl0
|
||||
\end{verbatim}
|
||||
Well, if you like such tricks, nobody may prohibit you to use them.
|
||||
Only do not forget
|
||||
that between \verb|route add| and \verb|route del| host 193.233.7.65 is
|
||||
unreachable.
|
||||
|
||||
\item
|
||||
Q: In 2.0.36 I used to load \verb|tunnel| device module and \verb|ipip| module.
|
||||
I cannot find any \verb|tunnel| in 2.2!
|
||||
|
||||
A: Linux-2.2 has single module \verb|ipip| for both directions of tunneling
|
||||
and for all IPIP tunnel devices.
|
||||
|
||||
\item
|
||||
Q: \verb|traceroute| does not work over tunnel! Well, stop... It works,
|
||||
only skips some number of hops.
|
||||
|
||||
A: Yes. By default tunnel driver copies \verb|ttl| value from
|
||||
inner packet to outer one. It means that path traversed by tunneled
|
||||
packets to another endpoint is not hidden. If you dislike this, or if you
|
||||
are going to use some routing protocol expecting that packets
|
||||
with ttl 1 will reach peering host (f.e.\ RIP, OSPF or EBGP)
|
||||
and you are not afraid of
|
||||
tunnel loops, you may append option \verb|ttl 64|, when creating tunnel
|
||||
with \verb|ip tunnel add|.
|
||||
|
||||
\item
|
||||
Q: ... Well, list of things, which 2.0 was able to do finishes.
|
||||
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Summary of differences between 2.2 and 2.0.}
|
||||
|
||||
\begin{itemize}
|
||||
|
||||
\item {\bf In 2.0} you could compile tunnel device into kernel
|
||||
and got set of 4 devices \verb|tunl0| ... \verb|tunl3| or,
|
||||
alternatively, compile it as module and load new module
|
||||
for each new tunnel. Also, module \verb|ipip| was necessary
|
||||
to receive tunneled packets.
|
||||
|
||||
{\bf 2.2} has {\em one\/} module \verb|ipip|. Loading it you get base
|
||||
tunnel device \verb|tunl0| and another tunnels may be created with command
|
||||
\verb|ip tunnel add|. These new devices may have arbitrary names.
|
||||
|
||||
|
||||
\item {\bf In 2.0} you set remote tunnel endpoint address with
|
||||
the command \verb|ifconfig| ... \verb|pointopoint A|.
|
||||
|
||||
{\bf In 2.2} this command has the same semantics on all
|
||||
the interfaces, namely it sets not tunnel endpoint,
|
||||
but address of peering host, which is directly reachable
|
||||
via this tunnel,
|
||||
rather than via Internet. Actual tunnel endpoint address \verb|A|
|
||||
should be set with \verb|ip tunnel add ... remote A|.
|
||||
|
||||
\item {\bf In 2.0} you create tunnel routes with the command:
|
||||
\begin{verbatim}
|
||||
route add -net 10.0.0.0 gw A dev tunl0
|
||||
\end{verbatim}
|
||||
|
||||
{\bf 2.2} interprets this command equally for all device
|
||||
kinds and gateway is required to be directly reachable via this tunnel,
|
||||
rather than via Internet. You still may use \verb|ip route add ... onlink|
|
||||
to override this behaviour.
|
||||
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\section{Tunnel setup: basics}
|
||||
|
||||
Standard Linux-2.2 kernel supports three flavor of tunnels,
|
||||
listed in the following table:
|
||||
\vspace{2mm}
|
||||
|
||||
\begin{tabular}{lll}
|
||||
\vrule depth 0.8ex width 0pt\relax
|
||||
Mode & Description & Base device \\
|
||||
ipip & IP over IP & tunl0 \\
|
||||
sit & IPv6 over IP & sit0 \\
|
||||
gre & ANY over GRE over IP & gre0
|
||||
\end{tabular}
|
||||
|
||||
\vspace{2mm}
|
||||
|
||||
\noindent All the kinds of tunnels are created with one command:
|
||||
\begin{verbatim}
|
||||
ip tunnel add <NAME> mode <MODE> [ local <S> ] [ remote <D> ]
|
||||
\end{verbatim}
|
||||
|
||||
This command creates new tunnel device with name \verb|<NAME>|.
|
||||
The \verb|<NAME>| is an arbitrary string. Particularly,
|
||||
it may be even \verb|eth0|. The rest of parameters set
|
||||
different tunnel characteristics.
|
||||
|
||||
\begin{itemize}
|
||||
|
||||
\item
|
||||
\verb|mode <MODE>| sets tunnel mode. Three modes are available now
|
||||
\verb|ipip|, \verb|sit| and \verb|gre|.
|
||||
|
||||
\item
|
||||
\verb|remote <D>| sets remote endpoint of the tunnel to IP
|
||||
address \verb|<D>|.
|
||||
\item
|
||||
\verb|local <S>| sets fixed local address for tunneled
|
||||
packets. It must be an address on another interface of this host.
|
||||
|
||||
\end{itemize}
|
||||
|
||||
\let\thefootnote\oldthefootnote
|
||||
|
||||
Both \verb|remote| and \verb|local| may be omitted. In this case we
|
||||
say that they are zero or wildcard. Two tunnels of one mode cannot
|
||||
have the same \verb|remote| and \verb|local|. Particularly it means
|
||||
that base device or fallback tunnel cannot be replicated.\footnote{
|
||||
This restriction is relaxed for keyed GRE tunnels.}
|
||||
|
||||
Tunnels are divided to two classes: {\bf pointopoint} tunnels, which
|
||||
have some not wildcard \verb|remote| address and deliver all the packets
|
||||
to this destination, and {\bf NBMA} (i.e. Non-Broadcast Multi-Access) tunnels,
|
||||
which have no \verb|remote|. Particularly, base devices (f.e.\ \verb|tunl0|)
|
||||
are NBMA, because they have neither \verb|remote| nor
|
||||
\verb|local| addresses.
|
||||
|
||||
|
||||
After tunnel device is created you should configure it as you did
|
||||
it with another devices. Certainly, the configuration of tunnels has
|
||||
some features related to the fact that they work over existing Internet
|
||||
routing infrastructure and simultaneously create new virtual links,
|
||||
which changes this infrastructure. The danger that not enough careful
|
||||
tunnel setup will result in formation of tunnel loops,
|
||||
collapse of routing or flooding network with exponentially
|
||||
growing number of tunneled fragments is very real.
|
||||
|
||||
|
||||
Protocol setup on pointopoint tunnels does not differ of configuration
|
||||
of another devices. You should set a protocol address with \verb|ifconfig|
|
||||
and add routes with \verb|route| utility.
|
||||
|
||||
NBMA tunnels are different. To route something via NBMA tunnel
|
||||
you have to explain to driver, where it should deliver packets to.
|
||||
The only way to make it is to create special routes with gateway
|
||||
address pointing to desired endpoint. F.e.\
|
||||
\begin{verbatim}
|
||||
ip route add 10.0.0.0/24 via <A> dev tunl0 onlink
|
||||
\end{verbatim}
|
||||
It is important to use option \verb|onlink|, otherwise
|
||||
kernel will refuse request to create route via gateway not directly
|
||||
reachable over device \verb|tunl0|. With IPv6 the situation is much simpler:
|
||||
when you start device \verb|sit0|, it automatically configures itself
|
||||
with all IPv4 addresses mapped to IPv6 space, so that all IPv4
|
||||
Internet is {\em really reachable} via \verb|sit0|! Excellent, the command
|
||||
\begin{verbatim}
|
||||
ip route add 3FFE::/16 via ::193.233.7.65 dev sit0
|
||||
\end{verbatim}
|
||||
will route \verb|3FFE::/16| via \verb|sit0|, sending all the packets
|
||||
destined to this prefix to 193.233.7.65.
|
||||
|
||||
\section{Tunnel setup: options}
|
||||
|
||||
Command \verb|ip tunnel add| has several additional options.
|
||||
\begin{itemize}
|
||||
|
||||
\item \verb|ttl N| --- set fixed TTL \verb|N| on tunneled packets.
|
||||
\verb|N| is number in the range 1--255. 0 is special value,
|
||||
meaning that packets inherit TTL value.
|
||||
Default value is: \verb|inherit|.
|
||||
|
||||
\item \verb|tos T| --- set fixed tos \verb|T| on tunneled packets.
|
||||
Default value is: \verb|inherit|.
|
||||
|
||||
\item \verb|dev DEV| --- bind tunnel to device \verb|DEV|, so that
|
||||
tunneled packets will be routed only via this device and will
|
||||
not be able to escape to another device, when route to endpoint changes.
|
||||
|
||||
\item \verb|nopmtudisc| --- disable Path MTU Discovery on this tunnel.
|
||||
It is enabled by default. Note that fixed ttl is incompatible
|
||||
with this option: tunnels with fixed ttl always make pmtu discovery.
|
||||
|
||||
\end{itemize}
|
||||
|
||||
\verb|ipip| and \verb|sit| tunnels have no more options. \verb|gre|
|
||||
tunnels are more complicated:
|
||||
|
||||
\begin{itemize}
|
||||
|
||||
\item \verb|key K| --- use keyed GRE with key \verb|K|. \verb|K| is
|
||||
either number or IP address-like dotted quad.
|
||||
|
||||
\item \verb|csum| --- checksum tunneled packets.
|
||||
|
||||
\item \verb|seq| --- serialize packets.
|
||||
\begin{NB}
|
||||
I think this option does not
|
||||
work. At least, I did not test it, did not debug it and
|
||||
even do not understand, how it is supposed to work and for what
|
||||
purpose Cisco planned to use it.
|
||||
\end{NB}
|
||||
|
||||
\end{itemize}
|
||||
|
||||
|
||||
Actually, these GRE options can be set separately for input and
|
||||
output directions by prefixing corresponding keywords with letter
|
||||
\verb|i| or \verb|o|. F.e.\ \verb|icsum| orders to accept only
|
||||
packets with correct checksum and \verb|ocsum| means, that
|
||||
our host will calculate and send checksum.
|
||||
|
||||
Command \verb|ip tunnel add| is not the only operation,
|
||||
which can be made with tunnels. Certainly, you may get short help page
|
||||
with:
|
||||
\begin{verbatim}
|
||||
ip tunnel help
|
||||
\end{verbatim}
|
||||
|
||||
Besides that, you may view list of installed tunnels with the help of command:
|
||||
\begin{verbatim}
|
||||
ip tunnel ls
|
||||
\end{verbatim}
|
||||
Also you may look at statistics:
|
||||
\begin{verbatim}
|
||||
ip -s tunnel ls Cisco
|
||||
\end{verbatim}
|
||||
where \verb|Cisco| is name of tunnel device. Command
|
||||
\begin{verbatim}
|
||||
ip tunnel del Cisco
|
||||
\end{verbatim}
|
||||
destroys tunnel \verb|Cisco|. And, finally,
|
||||
\begin{verbatim}
|
||||
ip tunnel change Cisco mode sit local ME remote HE ttl 32
|
||||
\end{verbatim}
|
||||
changes its parameters.
|
||||
|
||||
\section{Differences 2.2 and 2.0 tunnels revisited.}
|
||||
|
||||
Now we can discuss more subtle differences between tunneling in 2.0
|
||||
and 2.2.
|
||||
|
||||
\begin{itemize}
|
||||
|
||||
\item In 2.0 all tunneled packets were received promiscuously
|
||||
as soon as you loaded module \verb|ipip|. 2.2 tries to select the best
|
||||
tunnel device and packet looks as received on this. F.e.\ if host
|
||||
received \verb|ipip| packet from host \verb|D| destined to our
|
||||
local address \verb|S|, kernel searches for matching tunnels
|
||||
in order:
|
||||
|
||||
\begin{tabular}{ll}
|
||||
1 & \verb|remote| is \verb|D| and \verb|local| is \verb|S| \\
|
||||
2 & \verb|remote| is \verb|D| and \verb|local| is wildcard \\
|
||||
3 & \verb|remote| is wildcard and \verb|local| is \verb|S| \\
|
||||
4 & \verb|tunl0|
|
||||
\end{tabular}
|
||||
|
||||
If tunnel exists, but it is not in \verb|UP| state, the tunnel is ignored.
|
||||
Note, that if \verb|tunl0| is \verb|UP| it receives all the IPIP packets,
|
||||
not acknowledged by more specific tunnels.
|
||||
Be careful, it means that without carefully installed firewall rules
|
||||
anyone on the Internet may inject to your network any packets with
|
||||
source addresses indistinguishable from local ones. It is not so bad idea
|
||||
to design tunnels in the way enforcing maximal route symmetry
|
||||
and to enable reversed path filter (\verb|rp_filter| sysctl option) on
|
||||
tunnel devices.
|
||||
|
||||
\item In 2.2 you can monitor and debug tunnels with \verb|tcpdump|.
|
||||
F.e.\ \verb|tcpdump| \verb|-i Cisco| \verb|-nvv| will dump packets,
|
||||
which kernel output, via tunnel \verb|Cisco| and the packets received on it
|
||||
from kernel viewpoint.
|
||||
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\section{Linux and Cisco IOS tunnels.}
|
||||
|
||||
Among another tunnels Cisco IOS supports IPIP and GRE.
|
||||
Essentially, Cisco setup is subset of options, available for Linux.
|
||||
Let us consider the simplest example:
|
||||
|
||||
\begin{verbatim}
|
||||
interface Tunnel0
|
||||
tunnel mode gre ip
|
||||
tunnel source 10.10.14.1
|
||||
tunnel destination 10.10.13.2
|
||||
\end{verbatim}
|
||||
|
||||
|
||||
This command set translates to:
|
||||
|
||||
\begin{verbatim}
|
||||
ip tunnel add Tunnel0 \
|
||||
mode gre \
|
||||
local 10.10.14.1 \
|
||||
remote 10.10.13.2
|
||||
\end{verbatim}
|
||||
|
||||
Any questions? No questions.
|
||||
|
||||
\section{Interaction IPIP tunnels and DVMRP.}
|
||||
|
||||
DVMRP exploits IPIP tunnels to route multicasts via Internet.
|
||||
\verb|mrouted| creates
|
||||
IPIP tunnels listed in its configuration file automatically.
|
||||
From kernel and user viewpoints there are no differences between
|
||||
tunnels, created in this way, and tunnels created by \verb|ip tunnel|.
|
||||
I.e.\ if \verb|mrouted| created some tunnel, it may be used to
|
||||
route unicast packets, provided appropriate routes are added.
|
||||
And vice versa, if administrator has already created a tunnel,
|
||||
it will be reused by \verb|mrouted|, if it requests DVMRP
|
||||
tunnel with the same local and remote addresses.
|
||||
|
||||
Do not wonder, if your manually configured tunnel is
|
||||
destroyed, when mrouted exits.
|
||||
|
||||
|
||||
\section{Broadcast GRE ``tunnels''.}
|
||||
|
||||
It is possible to set \verb|remote| for GRE tunnel to a multicast
|
||||
address. Such tunnel becomes {\bf broadcast} tunnel (though word
|
||||
tunnel is not quite appropriate in this case, it is rather virtual network).
|
||||
\begin{verbatim}
|
||||
ip tunnel add Universe local 193.233.7.65 \
|
||||
remote 224.66.66.66 ttl 16
|
||||
ip addr add 10.0.0.1/16 dev Universe
|
||||
ip link set Universe up
|
||||
\end{verbatim}
|
||||
This tunnel is true broadcast network and broadcast packets are
|
||||
sent to multicast group 224.66.66.66. By default such tunnel starts
|
||||
to resolve both IP and IPv6 addresses via ARP/NDISC, so that
|
||||
if multicast routing is supported in surrounding network, all GRE nodes
|
||||
will find one another automatically and will form virtual Ethernet-like
|
||||
broadcast network. If multicast routing does not work, it is unpleasant
|
||||
but not fatal flaw. The tunnel becomes NBMA rather than broadcast network.
|
||||
You may disable dynamic ARPing by:
|
||||
\begin{verbatim}
|
||||
echo 0 > /proc/sys/net/ipv4/neigh/Universe/mcast_solicit
|
||||
\end{verbatim}
|
||||
and to add required information to ARP tables manually:
|
||||
\begin{verbatim}
|
||||
ip neigh add 10.0.0.2 lladdr 128.6.190.2 dev Universe nud permanent
|
||||
\end{verbatim}
|
||||
In this case packets sent to 10.0.0.2 will be encapsulated in GRE
|
||||
and sent to 128.6.190.2. It is possible to facilitate address resolution
|
||||
using methods typical for another NBMA networks f.e.\ to start user
|
||||
level \verb|arpd| daemon, which will maintain database of hosts attached
|
||||
to GRE virtual network or ask for information
|
||||
dedicated ARP or NHRP server.
|
||||
|
||||
|
||||
Actually, such setup is the most natural for tunneling,
|
||||
it is really flexible, scalable and easily managable, so that
|
||||
it is strongly recommended to be used with GRE tunnels instead of ugly
|
||||
hack with NBMA mode and \verb|onlink| modifier. Unfortunately,
|
||||
by historical reasons broadcast mode is not supported by IPIP tunnels,
|
||||
but this probably will change in future.
|
||||
|
||||
|
||||
|
||||
\section{Traffic control issues.}
|
||||
|
||||
Tunnels are devices, hence all the power of Linux traffic control
|
||||
applies to them. The simplest (and the most useful in practice)
|
||||
example is limiting tunnel bandwidth. The following command:
|
||||
\begin{verbatim}
|
||||
tc qdisc add dev tunl0 root tbf \
|
||||
rate 128Kbit burst 4K limit 10K
|
||||
\end{verbatim}
|
||||
will limit tunneled traffic to 128Kbit with maximal burst size of 4K
|
||||
and queuing not more than 10K.
|
||||
|
||||
However, you should remember, that tunnels are {\em virtual} devices
|
||||
implemented in software and true queue management is impossible for them
|
||||
just because they have no queues. Instead, it is better to create classes
|
||||
on real physical interfaces and to map tunneled packets to them.
|
||||
In general case of dynamic routing you should create such classes
|
||||
on all outgoing interfaces, or, alternatively,
|
||||
to use option \verb|dev DEV| to bind tunnel to a fixed physical device.
|
||||
In the last case packets will be routed only via specified device
|
||||
and you need to setup corresponding classes only on it.
|
||||
Though you have to pay for this convenience,
|
||||
if routing will change, your tunnel will fail.
|
||||
|
||||
Suppose that CBQ class \verb|1:ABC| has been created on device \verb|eth0|
|
||||
specially for tunnel \verb|Cisco| with endpoints \verb|S| and \verb|D|.
|
||||
Now you can select IPIP packets with addresses \verb|S| and \verb|D|
|
||||
with some classifier and map them to class \verb|1:ABC|. F.e.\
|
||||
it is easy to make with \verb|rsvp| classifier:
|
||||
\begin{verbatim}
|
||||
tc filter add dev eth0 pref 100 proto ip rsvp \
|
||||
session D ipproto ipip filter S \
|
||||
classid 1:ABC
|
||||
\end{verbatim}
|
||||
|
||||
If you want to make more detailed classification of sub-flows
|
||||
transmitted via tunnel, you can build CBQ subtree,
|
||||
rooted at \verb|1:ABC| and attach to subroot set of rules parsing
|
||||
IPIP packets more deeply.
|
||||
|
||||
\end{document}
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
<!doctype linuxdoc system>
|
||||
|
||||
<article>
|
||||
|
||||
<title>NSTAT, IFSTAT and RTACCT Utilities
|
||||
<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
|
||||
<date>some_negative_number, 20 Sep 2001
|
||||
<abstract>
|
||||
<tt/nstat/, <tt/ifstat/ and <tt/rtacct/ are simple tools helping
|
||||
to monitor kernel snmp counters and network interface statistics.
|
||||
</abstract>
|
||||
|
||||
<p> These utilities are very similar, so that I describe
|
||||
them simultaneously, using name <tt/Xstat/ in the places which apply
|
||||
to all of them.
|
||||
|
||||
<p>The format of the command is:
|
||||
|
||||
<tscreen><verb>
|
||||
Xstat [ OPTIONS ] [ PATTERN [ PATTERN ... ] ]
|
||||
</verb></tscreen>
|
||||
|
||||
<p>
|
||||
<tt/PATTERN/ is shell style pattern, selecting identifier
|
||||
of SNMP variables or interfaces to show. Variable is displayed
|
||||
if one of patterns matches its name. If no patterns are given,
|
||||
<tt/Xstat/ assumes that user wants to see all the variables.
|
||||
|
||||
<p> <tt/OPTIONS/ is list of single letter options, using common unix
|
||||
conventions.
|
||||
|
||||
<itemize>
|
||||
<item><tt/-h/ - show help page
|
||||
<item><tt/-?/ - the same, of course
|
||||
<item><tt/-v/, <tt/-V/ - print version of <tt/Xstat/ and exit
|
||||
<item><tt/-z/ - dump zero counters too. By default they are not shown.
|
||||
<item><tt/-a/ - dump absolute values of counters. By default <tt/Xstat/
|
||||
calculates increments since the previous use.
|
||||
<item><tt/-s/ - do not update history, so that the next time you will
|
||||
see counters including values accumulated to the moment
|
||||
of this measurement too.
|
||||
<item><tt/-n/ - do not display anything, only update history.
|
||||
<item><tt/-r/ - reset history.
|
||||
<item><tt/-d INTERVAL/ - <tt/Xstat/ is run in daemon mode collecting
|
||||
statistics. <tt/INTERVAL/ is interval between measurements
|
||||
in seconds.
|
||||
<item><tt/-t INTERVAL/ - time interval to average rates. Default value
|
||||
is 60 seconds.
|
||||
<item><tt/-e/ - display extended information about errors (<tt/ifstat/ only).
|
||||
</itemize>
|
||||
|
||||
<p>
|
||||
History is just dump saved in file <tt>/tmp/.Xstat.uUID</tt>
|
||||
or in file given by environment variables <tt/NSTAT_HISTORY/,
|
||||
<tt/IFSTAT_HISTORY/ and <tt/RTACCT_HISTORY/.
|
||||
Each time when you use <tt/Xstat/ values there are updated.
|
||||
If you use patterns, only the values which you _really_ see
|
||||
are updated. If you want to skip an unintersting period,
|
||||
use option <tt/-n/, or just output to <tt>/dev/null</tt>.
|
||||
|
||||
<p>
|
||||
<tt/Xstat/ understands when history is invalidated by system reboot
|
||||
or source of information switched between different instances
|
||||
of daemonic <tt/Xstat/ and kernel SNMP tables and does not
|
||||
use invalid history.
|
||||
|
||||
<p> Beware, <tt/Xstat/ will not produce sane output,
|
||||
when many processes use it simultaneously. If several processes
|
||||
under single user need this utility they should use environment
|
||||
variables to put their history in safe places
|
||||
or to use it with options <tt/-a -s/.
|
||||
|
||||
<p>
|
||||
Well, that's all. The utility is very simple, but nevertheless
|
||||
very handy.
|
||||
|
||||
<p> <bf/Output of XSTAT/
|
||||
<p> The first line of output is <tt/#/ followed by identifier
|
||||
of source of information, it may be word <tt/kernel/, when <tt/Xstat/
|
||||
gets information from kernel or some dotted decimal number followed
|
||||
by parameters, when it obtains information from running <tt/Xstat/ daemon.
|
||||
|
||||
<p>In the case of <tt/nstat/ the rest of output consists of three columns:
|
||||
SNMP MIB identifier,
|
||||
its value (or increment since previous measurement) and average
|
||||
rate of increase of the counter per second. <tt/ifstat/ outputs
|
||||
interface name followed by pairs of counter and rate of its change.
|
||||
|
||||
<p> <bf/Daemonic Xstat/
|
||||
<p> <tt/Xstat/ may be started as daemon by any user. This makes sense
|
||||
to avoid wrapped counters and to obtain reasonable long counters
|
||||
for large time. Also <tt/Xstat/ daemon calculates average rates.
|
||||
For the first goal sampling interval (option <tt/-d/) may be large enough,
|
||||
f.e. for gigabit rates byte counters overflow not more frequently than
|
||||
each 40 seconds and you may select interval of 20 seconds.
|
||||
From the other hand, when <tt/Xstat/ is used for estimating rates
|
||||
interval should be less than averaging period (option <tt/-t/), otherwise
|
||||
estimation loses in quality.
|
||||
|
||||
Client <tt/Xstat/, before trying to get information from the kernel,
|
||||
contacts daemon started by this user, then it tries system wide
|
||||
daemon, which is supposed to be started by superuser. And only if
|
||||
none of them replied it gets information from kernel.
|
||||
|
||||
<p> <bf/Environment/
|
||||
<p> <tt/NSTAT_HISTORY/ - name of history file for <tt/nstat/.
|
||||
<p> <tt/IFSTAT_HISTORY/ - name of history file for <tt/ifstat/.
|
||||
<p> <tt/RTACCT_HISTORY/ - name of history file for <tt/rtacct/.
|
||||
|
||||
</article>
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
\textwidth 6.0in
|
||||
\textheight 8.5in
|
||||
|
||||
\input SNAPSHOT
|
||||
|
||||
\pagestyle{myheadings}
|
||||
\markboth{\protect\TITLE}{}
|
||||
\markright{{\protect\sc iproute2-ss\Draft}}
|
||||
|
||||
% To print it in compact form: both sides on one sheet (psnup -2)
|
||||
\evensidemargin=\oddsidemargin
|
||||
|
||||
\newenvironment{NB}{\bgroup \vskip 1mm\leftskip 1cm \footnotesize \noindent NB.
|
||||
}{\par\egroup \vskip 1mm}
|
||||
|
||||
\def\threeonly{[2.3.15+ only] }
|
||||
|
||||
\begin{document}
|
||||
|
||||
\makeatletter
|
||||
\renewcommand{\@oddhead}{{\protect\sc iproute2-ss\Draft} \hfill \protect\arabic{page}}
|
||||
\makeatother
|
||||
\let\oldthefootnote\thefootnote
|
||||
\def\thefootnote{}
|
||||
\footnotetext{Copyright \copyright~1999 A.N.Kuznetsov}
|
||||
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
<!doctype linuxdoc system>
|
||||
|
||||
<article>
|
||||
|
||||
<title>RTACCT Utility
|
||||
<author>Robert Olsson
|
||||
<date>some_negative_number, 20 Dec 2001
|
||||
|
||||
<p>
|
||||
Here is some code for monitoring the route cache. For systems handling high
|
||||
network load, servers, routers, firewalls etc the route cache and its garbage
|
||||
collection is crucial. Linux has a solid implementation.
|
||||
|
||||
<p>
|
||||
The kernel patch (not required since linux-2.4.7) adds statistics counters
|
||||
from route cache process into
|
||||
/proc/net/rt_cache_stat. A companion user mode program presents the statistics
|
||||
in a vmstat or iostat manner. The ratio between cache hits and misses gives
|
||||
the flow length.
|
||||
|
||||
<p>
|
||||
Hopefully it can help understanding performance and DoS and other related
|
||||
issues.
|
||||
|
||||
<p> An URL where newer versions of this utility can be (probably) found
|
||||
is ftp://robur.slu.se/pub/Linux/net-development/rt_cache_stat/
|
||||
|
||||
|
||||
<p><bf/Description/
|
||||
|
||||
<p>The format of the command is:
|
||||
|
||||
<tscreen><verb>
|
||||
rtstat [ OPTIONS ]
|
||||
</verb></tscreen>
|
||||
|
||||
<p> <tt/OPTIONS/ are:
|
||||
|
||||
<itemize>
|
||||
|
||||
<item><tt/-h/, <tt/-help/ - show help page and version of the utility.
|
||||
|
||||
<item><tt/-i INTERVAL/ - interval between snapshots, default value is
|
||||
2 seconds.
|
||||
|
||||
<item><tt/-s NUMBER/ - whether to print header line. 0 inhibits header line,
|
||||
1 prescribes to print it once and 2 (this is default setting) forces header
|
||||
line each 20 lines.
|
||||
|
||||
</itemize>
|
||||
|
||||
</article>
|
||||
|
|
@ -0,0 +1,525 @@
|
|||
<!doctype linuxdoc system>
|
||||
|
||||
<article>
|
||||
|
||||
<title>SS Utility: Quick Intro
|
||||
<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
|
||||
<date>some_negative_number, 20 Sep 2001
|
||||
<abstract>
|
||||
<tt/ss/ is one another utility to investigate sockets.
|
||||
Functionally it is NOT better than <tt/netstat/ combined
|
||||
with some perl/awk scripts and though it is surely faster
|
||||
it is not enough to make it much better. :-)
|
||||
So, stop reading this now and do not waste your time.
|
||||
Well, certainly, it proposes some functionality, which current
|
||||
netstat is still not able to do, but surely will soon.
|
||||
</abstract>
|
||||
|
||||
<sect>Why?
|
||||
|
||||
<p> <tt>/proc</tt> interface is inadequate, unfortunately.
|
||||
When amount of sockets is enough large, <tt/netstat/ or even
|
||||
plain <tt>cat /proc/net/tcp/</tt> cause nothing but pains and curses.
|
||||
In linux-2.4 the desease became worse: even if amount
|
||||
of sockets is small reading <tt>/proc/net/tcp/</tt> is slow enough.
|
||||
|
||||
This utility presents a new approach, which is supposed to scale
|
||||
well. I am not going to describe technical details here and
|
||||
will concentrate on description of the command.
|
||||
The only important thing to say is that it is not so bad idea
|
||||
to load module <tt/tcp_diag/, which can be found in directory
|
||||
<tt/Modules/ of <tt/iproute2/. If you do not make this <tt/ss/
|
||||
will work, but it falls back to <tt>/proc</tt> and becomes slow
|
||||
like <tt/netstat/, well, a bit faster yet (see section "Some numbers").
|
||||
|
||||
<sect>Old news
|
||||
|
||||
<p>
|
||||
In the simplest form <tt/ss/ is equivalent to netstat
|
||||
with some small deviations.
|
||||
|
||||
<itemize>
|
||||
<item><tt/ss -t -a/ dumps all TCP sockets
|
||||
<item><tt/ss -u -a/ dumps all UDP sockets
|
||||
<item><tt/ss -w -a/ dumps all RAW sockets
|
||||
<item><tt/ss -x -a/ dumps all UNIX sockets
|
||||
</itemize>
|
||||
|
||||
<p>
|
||||
Option <tt/-o/ shows TCP timers state.
|
||||
Option <tt/-e/ shows some extended information.
|
||||
Etc. etc. etc. Seems, all the options of netstat related to sockets
|
||||
are supported. Though not AX.25 and other bizarres. :-)
|
||||
If someone wants, he can make support for decnet and ipx.
|
||||
Some rudimentary support for them is already present in iproute2 libutils,
|
||||
and I will be glad to see these new members.
|
||||
|
||||
<p>
|
||||
However, standard functionality is a bit different:
|
||||
|
||||
<p>
|
||||
The first: without option <tt/-a/ sockets in states
|
||||
<tt/TIME-WAIT/ and <tt/SYN-RECV/ are skipped too.
|
||||
It is more reasonable default, I think.
|
||||
|
||||
<p>
|
||||
The second: format of UNIX sockets is different. It coincides
|
||||
with tcp/udp. Though standard kernel still does not allow to
|
||||
see write/read queues and peer address of connected UNIX sockets,
|
||||
the patch doing this exists.
|
||||
|
||||
<p>
|
||||
The third: default is to dump only TCP sockets, rather than all of the types.
|
||||
|
||||
<p>
|
||||
The next: by default it does not resolve numeric host addresses (like <tt/ip/)!
|
||||
Resolving is enabled with option <tt/-r/. Service names, usually stored
|
||||
in local files, are resolved by default. Also, if service database
|
||||
does not contain references to a port, <tt/ss/ queries system
|
||||
<tt/rpcbind/. RPC services are prefixed with <tt/rpc./
|
||||
Resolution of services may be suppressed with option <tt/-n/.
|
||||
|
||||
<p>
|
||||
It does not accept "long" options (I dislike them, sorry).
|
||||
So, address family is given with family identifier following
|
||||
option <tt/-f/ to be algined to iproute2 conventions.
|
||||
Mostly, it is to allow option parser to parse
|
||||
addresses correctly, but as side effect it really limits dumping
|
||||
to sockets supporting only given family. Option <tt/-A/ followed
|
||||
by list of socket tables to dump is also supported.
|
||||
Logically, id of socket table is different of _address_ family, which is
|
||||
another point of incompatibility. So, id is one of
|
||||
<tt/all/, <tt/tcp/, <tt/udp/,
|
||||
<tt/raw/, <tt/inet/, <tt/unix/, <tt/packet/, <tt/netlink/. See?
|
||||
Well, <tt/inet/ is just abbreviation for <tt/tcp|udp|raw/
|
||||
and it is not difficult to guess that <tt/packet/ allows
|
||||
to look at packet sockets. Actually, there are also some other abbreviations,
|
||||
f.e. <tt/unix_dgram/ selects only datagram UNIX sockets.
|
||||
|
||||
<p>
|
||||
The next: well, I still do not know. :-)
|
||||
|
||||
|
||||
|
||||
|
||||
<sect>Time to talk about new functionality.
|
||||
|
||||
<p>It is builtin filtering of socket lists.
|
||||
|
||||
<sect1> Filtering by state.
|
||||
|
||||
<p>
|
||||
<tt/ss/ allows to filter socket states, using keywords
|
||||
<tt/state/ and <tt/exclude/, followed by some state
|
||||
identifier.
|
||||
|
||||
<p>
|
||||
State identifier are standard TCP state names (not listed,
|
||||
they are useless for you if you already do not know them)
|
||||
or abbreviations:
|
||||
|
||||
<itemize>
|
||||
<item><tt/all/ - for all the states
|
||||
<item><tt/bucket/ - for TCP minisockets (<tt/TIME-WAIT|SYN-RECV/)
|
||||
<item><tt/big/ - all except for minisockets
|
||||
<item><tt/connected/ - not closed and not listening
|
||||
<item><tt/synchronized/ - connected and not <tt/SYN-SENT/
|
||||
</itemize>
|
||||
|
||||
<p>
|
||||
F.e. to dump all tcp sockets except <tt/SYN-RECV/:
|
||||
|
||||
<tscreen><verb>
|
||||
ss exclude SYN-RECV
|
||||
</verb></tscreen>
|
||||
|
||||
<p>
|
||||
If neither <tt/state/ nor <tt/exclude/ directives
|
||||
are present,
|
||||
state filter defaults to <tt/all/ with option <tt/-a/
|
||||
or to <tt/all/,
|
||||
excluding listening, syn-recv, time-wait and closed sockets.
|
||||
|
||||
<sect1> Filtering by addresses and ports.
|
||||
|
||||
<p>
|
||||
Option list may contain address/port filter.
|
||||
It is boolean expression which consists of boolean operation
|
||||
<tt/or/, <tt/and/, <tt/not/ and predicates.
|
||||
Actually, all the flavors of names for boolean operations are eaten:
|
||||
<tt/&/, <tt/&&/, <tt/|/, <tt/||/, <tt/!/, but do not forget
|
||||
about special sense given to these symbols by unix shells and escape
|
||||
them correctly, when used from command line.
|
||||
|
||||
<p>
|
||||
Predicates may be of the folowing kinds:
|
||||
|
||||
<itemize>
|
||||
<item>A. Address/port match, where address is checked against mask
|
||||
and port is either wildcard or exact. It is one of:
|
||||
|
||||
<tscreen><verb>
|
||||
dst prefix:port
|
||||
src prefix:port
|
||||
src unix:STRING
|
||||
src link:protocol:ifindex
|
||||
src nl:channel:pid
|
||||
</verb></tscreen>
|
||||
|
||||
Both prefix and port may be absent or replaced with <tt/*/,
|
||||
which means wildcard. UNIX socket use more powerful scheme
|
||||
matching to socket names by shell wildcards. Also, prefixes
|
||||
unix: and link: may be omitted, if address family is evident
|
||||
from context (with option <tt/-x/ or with <tt/-f unix/
|
||||
or with <tt/unix/ keyword)
|
||||
|
||||
<p>
|
||||
F.e.
|
||||
|
||||
<tscreen><verb>
|
||||
dst 10.0.0.1
|
||||
dst 10.0.0.1:
|
||||
dst 10.0.0.1/32:
|
||||
dst 10.0.0.1:*
|
||||
</verb></tscreen>
|
||||
are equivalent and mean socket connected to
|
||||
any port on host 10.0.0.1
|
||||
|
||||
<tscreen><verb>
|
||||
dst 10.0.0.0/24:22
|
||||
</verb></tscreen>
|
||||
sockets connected to port 22 on network
|
||||
10.0.0.0...255.
|
||||
|
||||
<p>
|
||||
Note that port separated of address with colon, which creates
|
||||
troubles with IPv6 addresses. Generally, we interpret the last
|
||||
colon as splitting port. To allow to give IPv6 addresses,
|
||||
trick like used in IPv6 HTTP URLs may be used:
|
||||
|
||||
<tscreen><verb>
|
||||
dst [::1]
|
||||
</verb></tscreen>
|
||||
are sockets connected to ::1 on any port
|
||||
|
||||
<p>
|
||||
Another way is <tt/dst ::1/128/. / helps to understand that
|
||||
colon is part of IPv6 address.
|
||||
|
||||
<p>
|
||||
Now we can add another alias for <tt/dst 10.0.0.1/:
|
||||
<tt/dst [10.0.0.1]/. :-)
|
||||
|
||||
<p> Address may be a DNS name. In this case all the addresses are looked
|
||||
up (in all the address families, if it is not limited by option <tt/-f/
|
||||
or special address prefix <tt/inet:/, <tt/inet6/) and resulting
|
||||
expression is <tt/or/ over all of them.
|
||||
|
||||
<item> B. Port expressions:
|
||||
<tscreen><verb>
|
||||
dport >= :1024
|
||||
dport != :22
|
||||
sport < :32000
|
||||
</verb></tscreen>
|
||||
etc.
|
||||
|
||||
All the relations: <tt/</, <tt/>/, <tt/=/, <tt/>=/, <tt/=/, <tt/==/,
|
||||
<tt/!=/, <tt/eq/, <tt/ge/, <tt/lt/, <tt/ne/...
|
||||
Use variant which you like more, but not forget to escape special
|
||||
characters when typing them in command line. :-)
|
||||
|
||||
Note that port number syntactically coincides to the case A!
|
||||
You may even add an IP address, but it will not participate
|
||||
incomparison, except for <tt/==/ and <tt/!=/, which are equivalent
|
||||
to corresponding predicates of type A. F.e.
|
||||
<p>
|
||||
<tt/dst 10.0.0.1:22/
|
||||
is equivalent to <tt/dport eq 10.0.0.1:22/
|
||||
and
|
||||
<tt/not dst 10.0.0.1:22/ is equivalent to
|
||||
<tt/dport neq 10.0.0.1:22/
|
||||
|
||||
<item>C. Keyword <tt/autobound/. It matches to sockets bound automatically
|
||||
on local system.
|
||||
|
||||
</itemize>
|
||||
|
||||
|
||||
<sect> Examples
|
||||
|
||||
<p>
|
||||
<itemize>
|
||||
<item>1. List all the tcp sockets in state <tt/FIN-WAIT-1/ for our apache
|
||||
to network 193.233.7/24 and look at their timers:
|
||||
|
||||
<tscreen><verb>
|
||||
ss -o state fin-wait-1 \( sport = :http or sport = :https \) \
|
||||
dst 193.233.7/24
|
||||
</verb></tscreen>
|
||||
|
||||
Oops, forgot to say that missing logical operation is
|
||||
equivalent to <tt/and/.
|
||||
|
||||
<item> 2. Well, now look at the rest...
|
||||
|
||||
<tscreen><verb>
|
||||
ss -o excl fin-wait-1
|
||||
ss state fin-wait-1 \( sport neq :http and sport neq :https \) \
|
||||
or not dst 193.233.7/24
|
||||
</verb></tscreen>
|
||||
|
||||
Note that we have to do _two_ calls of ss to do this.
|
||||
State match is always anded to address/port match.
|
||||
The reason for this is purely technical: ss does fast skip of
|
||||
not matching states before parsing addresses and I consider the
|
||||
ability to skip fastly gobs of time-wait and syn-recv sockets
|
||||
as more important than logical generality.
|
||||
|
||||
<item> 3. So, let's look at all our sockets using autobound ports:
|
||||
|
||||
<tscreen><verb>
|
||||
ss -a -A all autobound
|
||||
</verb></tscreen>
|
||||
|
||||
|
||||
<item> 4. And eventually find all the local processes connected
|
||||
to local X servers:
|
||||
|
||||
<tscreen><verb>
|
||||
ss -xp dst "/tmp/.X11-unix/*"
|
||||
</verb></tscreen>
|
||||
|
||||
Pardon, this does not work with current kernel, patching is required.
|
||||
But we still can look at server side:
|
||||
|
||||
<tscreen><verb>
|
||||
ss -x src "/tmp/.X11-unix/*"
|
||||
</verb></tscreen>
|
||||
|
||||
</itemize>
|
||||
|
||||
|
||||
<sect> Returning to ground: real manual
|
||||
|
||||
<p>
|
||||
<sect1> Command arguments
|
||||
|
||||
<p> General format of arguments to <tt/ss/ is:
|
||||
|
||||
<tscreen><verb>
|
||||
ss [ OPTIONS ] [ STATE-FILTER ] [ ADDRESS-FILTER ]
|
||||
</verb></tscreen>
|
||||
|
||||
<sect2><tt/OPTIONS/
|
||||
<p> <tt/OPTIONS/ is list of single letter options, using common unix
|
||||
conventions.
|
||||
|
||||
<itemize>
|
||||
<item><tt/-h/ - show help page
|
||||
<item><tt/-?/ - the same, of course
|
||||
<item><tt/-v/, <tt/-V/ - print version of <tt/ss/ and exit
|
||||
<item><tt/-s/ - print summary statistics. This option does not parse
|
||||
socket lists obtaining summary from various sources. It is useful
|
||||
when amount of sockets is so huge that parsing <tt>/proc/net/tcp</tt>
|
||||
is painful.
|
||||
<item><tt/-D FILE/ - do not display anything, just dump raw information
|
||||
about TCP sockets to <tt/FILE/ after applying filters. If <tt/FILE/ is <tt/-/
|
||||
<tt/stdout/ is used.
|
||||
<item><tt/-F FILE/ - read continuation of filter from <tt/FILE/.
|
||||
Each line of <tt/FILE/ is interpreted like single command line option.
|
||||
If <tt/FILE/ is <tt/-/ <tt/stdin/ is used.
|
||||
<item><tt/-r/ - try to resolve numeric address/ports
|
||||
<item><tt/-n/ - do not try to resolve ports
|
||||
<item><tt/-o/ - show some optional information, f.e. TCP timers
|
||||
<item><tt/-i/ - show some infomration specific to TCP (RTO, congestion
|
||||
window, slow start threshould etc.)
|
||||
<item><tt/-e/ - show even more optional information
|
||||
<item><tt/-m/ - show extended information on memory used by the socket.
|
||||
It is available only with <tt/tcp_diag/ enabled.
|
||||
<item><tt/-p/ - show list of processes owning the socket
|
||||
<item><tt/-f FAMILY/ - default address family used for parsing addresses.
|
||||
Also this option limits listing to sockets supporting
|
||||
given address family. Currently the following families
|
||||
are supported: <tt/unix/, <tt/inet/, <tt/inet6/, <tt/link/,
|
||||
<tt/netlink/.
|
||||
<item><tt/-4/ - alias for <tt/-f inet/
|
||||
<item><tt/-6/ - alias for <tt/-f inet6/
|
||||
<item><tt/-0/ - alias for <tt/-f link/
|
||||
<item><tt/-A LIST-OF-TABLES/ - list of socket tables to dump, separated
|
||||
by commas. The following identifiers are understood:
|
||||
<tt/all/, <tt/inet/, <tt/tcp/, <tt/udp/, <tt/raw/,
|
||||
<tt/unix/, <tt/packet/, <tt/netlink/, <tt/unix_dgram/,
|
||||
<tt/unix_stream/, <tt/packet_raw/, <tt/packet_dgram/.
|
||||
<item><tt/-x/ - alias for <tt/-A unix/
|
||||
<item><tt/-t/ - alias for <tt/-A tcp/
|
||||
<item><tt/-u/ - alias for <tt/-A udp/
|
||||
<item><tt/-w/ - alias for <tt/-A raw/
|
||||
<item><tt/-a/ - show sockets of all the states. By default sockets
|
||||
in states <tt/LISTEN/, <tt/TIME-WAIT/, <tt/SYN_RECV/
|
||||
and <tt/CLOSE/ are skipped.
|
||||
<item><tt/-l/ - show only sockets in state <tt/LISTEN/
|
||||
</itemize>
|
||||
|
||||
<sect2><tt/STATE-FILTER/
|
||||
|
||||
<p><tt/STATE-FILTER/ allows to construct arbitrary set of
|
||||
states to match. Its syntax is sequence of keywords <tt/state/
|
||||
and <tt/exclude/ followed by identifier of state.
|
||||
Available identifiers are:
|
||||
|
||||
<p>
|
||||
<itemize>
|
||||
<item> All standard TCP states: <tt/established/, <tt/syn-sent/,
|
||||
<tt/syn-recv/, <tt/fin-wait-1/, <tt/fin-wait-2/, <tt/time-wait/,
|
||||
<tt/closed/, <tt/close-wait/, <tt/last-ack/, <tt/listen/ and <tt/closing/.
|
||||
|
||||
<item><tt/all/ - for all the states
|
||||
<item><tt/connected/ - all the states except for <tt/listen/ and <tt/closed/
|
||||
<item><tt/synchronized/ - all the <tt/connected/ states except for
|
||||
<tt/syn-sent/
|
||||
<item><tt/bucket/ - states, which are maintained as minisockets, i.e.
|
||||
<tt/time-wait/ and <tt/syn-recv/.
|
||||
<item><tt/big/ - opposite to <tt/bucket/
|
||||
</itemize>
|
||||
|
||||
<sect2><tt/ADDRESS_FILTER/
|
||||
|
||||
<p><tt/ADDRESS_FILTER/ is boolean expression with operations <tt/and/, <tt/or/
|
||||
and <tt/not/, which can be abbreviated in C style f.e. as <tt/&/,
|
||||
<tt/&&/.
|
||||
|
||||
<p>
|
||||
Predicates check socket addresses, both local and remote.
|
||||
There are the following kinds of predicates:
|
||||
|
||||
<itemize>
|
||||
<item> <tt/dst ADDRESS_PATTERN/ - matches remote address and port
|
||||
<item> <tt/src ADDRESS_PATTERN/ - matches local address and port
|
||||
<item> <tt/dport RELOP PORT/ - compares remote port to a number
|
||||
<item> <tt/sport RELOP PORT/ - compares local port to a number
|
||||
<item> <tt/autobound/ - checks that socket is bound to an ephemeral
|
||||
port
|
||||
</itemize>
|
||||
|
||||
<p><tt/RELOP/ is some of <tt/<=/, <tt/>=/, <tt/==/ etc.
|
||||
To make this more convinient for use in unix shell, alphabetic
|
||||
FORTRAN-like notations <tt/le/, <tt/gt/ etc. are accepted as well.
|
||||
|
||||
<p>The format and semantics of <tt/ADDRESS_PATTERN/ depends on address
|
||||
family.
|
||||
|
||||
<itemize>
|
||||
<item><tt/inet/ - <tt/ADDRESS_PATTERN/ consists of IP prefix, optionally
|
||||
followed by colon and port. If prefix or port part is absent or replaced
|
||||
with <tt/*/, this means wildcard match.
|
||||
<item><tt/inet6/ - The same as <tt/inet/, only prefix refers to an IPv6
|
||||
address. Unlike <tt/inet/ colon becomes ambiguous, so that <tt/ss/ allows
|
||||
to use scheme, like used in URLs, where address is suppounded with
|
||||
<tt/[/ ... <tt/]/.
|
||||
<item><tt/unix/ - <tt/ADDRESS_PATTERN/ is shell-style wildcard.
|
||||
<item><tt/packet/ - format looks like <tt/inet/, only interface index
|
||||
stays instead of port and link layer protocol id instead of address.
|
||||
<item><tt/netlink/ - format looks like <tt/inet/, only socket pid
|
||||
stays instead of port and netlink channel instead of address.
|
||||
</itemize>
|
||||
|
||||
<p><tt/PORT/ is syntactically <tt/ADDRESS_PATTERN/ with wildcard
|
||||
address part. Certainly, it is undefined for UNIX sockets.
|
||||
|
||||
<sect1> Environment variables
|
||||
|
||||
<p>
|
||||
<tt/ss/ allows to change source of information using various
|
||||
environment variables:
|
||||
|
||||
<p>
|
||||
<itemize>
|
||||
<item> <tt/PROC_SLABINFO/ to override <tt>/proc/slabinfo</tt>
|
||||
<item> <tt/PROC_NET_TCP/ to override <tt>/proc/net/tcp</tt>
|
||||
<item> <tt/PROC_NET_UDP/ to override <tt>/proc/net/udp</tt>
|
||||
<item> etc.
|
||||
</itemize>
|
||||
|
||||
<p>
|
||||
Variable <tt/PROC_ROOT/ allows to change root of all the <tt>/proc/</tt>
|
||||
hierarchy.
|
||||
|
||||
<p>
|
||||
Variable <tt/TCPDIAG_FILE/ prescribes to open a file instead of
|
||||
requesting kernel to dump information about TCP sockets.
|
||||
|
||||
|
||||
<p> This option is used mainly to investigate bug reports,
|
||||
when dumps of files usually found in <tt>/proc/</tt> are recevied
|
||||
by e-mail.
|
||||
|
||||
<sect1> Output format
|
||||
|
||||
<p>Six columns. The first is <tt/Netid/, it denotes socket type and
|
||||
transport protocol, when it is ambiguous: <tt/tcp/, <tt/udp/, <tt/raw/,
|
||||
<tt/u_str/ is abbreviation for <tt/unix_stream/, <tt/u_dgr/ for UNIX
|
||||
datagram sockets, <tt/nl/ for netlink, <tt/p_raw/ and <tt/p_dgr/ for
|
||||
raw and datagram packet sockets. This column is optional, it will
|
||||
be hidden, if filter selects an unique netid.
|
||||
|
||||
<p>
|
||||
The second column is <tt/State/. Socket state is displayed here.
|
||||
The names are standard TCP names, except for <tt/UNCONN/, which
|
||||
cannot happen for TCP, but normal for not connected sockets
|
||||
of another types. Again, this column can be hidden.
|
||||
|
||||
<p>
|
||||
Then two columns (<tt/Recv-Q/ and <tt/Send-Q/) showing amount of data
|
||||
queued for receive and transmit.
|
||||
|
||||
<p>
|
||||
And the last two columns display local address and port of the socket
|
||||
and its peer address, if the socket is connected.
|
||||
|
||||
<p>
|
||||
If options <tt/-o/, <tt/-e/ or <tt/-p/ were given, options are
|
||||
displayed not in fixed positions but separated by spaces pairs:
|
||||
<tt/option:value/. If value is not a single number, it is presented
|
||||
as list of values, enclosed to <tt/(/ ... <tt/)/ and separated with
|
||||
commas. F.e.
|
||||
|
||||
<tscreen><verb>
|
||||
timer:(keepalive,111min,0)
|
||||
</verb></tscreen>
|
||||
is typical format for TCP timer (option <tt/-o/).
|
||||
|
||||
<tscreen><verb>
|
||||
users:((X,113,3))
|
||||
</verb></tscreen>
|
||||
is typical for list of users (option <tt/-p/).
|
||||
|
||||
|
||||
<sect>Some numbers
|
||||
|
||||
<p>
|
||||
Well, let us use <tt/pidentd/ and a tool <tt/ibench/ to measure
|
||||
its performance. It is 30 requests per second here. Nothing to test,
|
||||
it is too slow. OK, let us patch pidentd with patch from directory
|
||||
Patches. After this it handles about 4300 requests per second
|
||||
and becomes handy tool to pollute socket tables with lots of timewait
|
||||
buckets.
|
||||
|
||||
<p>
|
||||
So, each test starts from pollution tables with 30000 sockets
|
||||
and then doing full dump of the table piped to wc and measuring
|
||||
timings with time:
|
||||
|
||||
<p>Results:
|
||||
|
||||
<itemize>
|
||||
<item> <tt/netstat -at/ - 15.6 seconds
|
||||
<item> <tt/ss -atr/, but without <tt/tcp_diag/ - 5.4 seconds
|
||||
<item> <tt/ss -atr/ with <tt/tcp_diag/ - 0.47 seconds
|
||||
</itemize>
|
||||
|
||||
No comments. Though one comment is necessary, most of time
|
||||
without <tt/tcp_diag/ is wasted inside kernel with completely
|
||||
blocked networking. More than 10 seconds, yes. <tt/tcp_diag/
|
||||
does the same work for 100 milliseconds of system time.
|
||||
|
||||
</article>
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
#
|
||||
# subpath mappings from mount point for pinning
|
||||
#
|
||||
#3 tracing
|
||||
#4 foo/bar
|
||||
#5 tc/cls1
|
||||
|
|
@ -5,4 +5,3 @@
|
|||
4 meta
|
||||
7 canid
|
||||
8 ipset
|
||||
9 ipt
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
9 audit
|
||||
10 fiblookup
|
||||
11 connector
|
||||
12 nft
|
||||
12 nft
|
||||
13 ip6fw
|
||||
14 dec-rt
|
||||
15 uevent
|
||||
|
|
@ -20,4 +20,4 @@
|
|||
18 scsi-trans
|
||||
19 ecryptfs
|
||||
20 rdma
|
||||
21 crypto
|
||||
21 crypto
|
||||
|
|
|
|||
|
|
@ -14,12 +14,18 @@
|
|||
13 dnrouted
|
||||
14 xorp
|
||||
15 ntk
|
||||
16 dhcp
|
||||
18 keepalived
|
||||
16 dhcp
|
||||
42 babel
|
||||
99 openr
|
||||
186 bgp
|
||||
187 isis
|
||||
188 ospf
|
||||
189 rip
|
||||
192 eigrp
|
||||
|
||||
#
|
||||
# Used by me for gated
|
||||
#
|
||||
254 gated/aggr
|
||||
253 gated/bgp
|
||||
252 gated/ospf
|
||||
251 gated/ospfase
|
||||
250 gated/rip
|
||||
249 gated/static
|
||||
248 gated/conn
|
||||
247 gated/inet
|
||||
246 gated/default
|
||||
|
|
|
|||
|
|
@ -1,2 +0,0 @@
|
|||
Each file in this directory is an rt_protos configuration file. iproute2
|
||||
commands scan this directory processing all files that end in '.conf'.
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
Each file in this directory is an rt_tables configuration file. iproute2
|
||||
commands scan this directory processing all files that end in '.conf'.
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
# CHANGES
|
||||
# -------
|
||||
# v0.3a2- fixed bug in "if" operator. Thanks kad@dgtu.donetsk.ua.
|
||||
# v0.3a- added TIME parameter. Example:
|
||||
# TIME=00:00-19:00;64Kbit/6Kbit
|
||||
# So, between 00:00 and 19:00 RATE will be 64Kbit.
|
||||
# Just start "cbq.init timecheck" periodically from cron (every 10
|
||||
# minutes for example).
|
||||
# !!! Anyway you MUST start "cbq.init start" for CBQ initialize.
|
||||
# v0.2 - Some cosmetique changes. Now it more compatible with
|
||||
# old bash version. Thanks to Stanislav V. Voronyi
|
||||
# <stas@cnti.uanet.kharkov.ua>.
|
||||
# v0.1 - First public release
|
||||
#
|
||||
# README
|
||||
# ------
|
||||
#
|
||||
# First of all - this is just a SIMPLE EXAMPLE of CBQ power.
|
||||
# Don't ask me "why" and "how" :)
|
||||
#
|
||||
# This is an example of using CBQ (Class Based Queueing) and policy-based
|
||||
# filter for building smart ethernet shapers. All CBQ parameters are
|
||||
# correct only for ETHERNET (eth0,1,2..) linux interfaces. It works for
|
||||
# ARCNET too (just set bandwidth parameter to 2Mbit). It was tested
|
||||
# on 2.1.125-2.1.129 linux kernels (KSI linux, Nostromo version) and
|
||||
# ip-route utility by A.Kuznetsov (iproute2-ss981101 version).
|
||||
# You can download ip-route from ftp://ftp.inr.ac.ru/ip-routing or
|
||||
# get iproute2*.rpm (compiled with glibc) from ftp.ksi-linux.com.
|
||||
#
|
||||
#
|
||||
# HOW IT WORKS
|
||||
#
|
||||
# Each shaper must be described by config file in $CBQ_PATH
|
||||
# (/etc/sysconfig/cbq/) directory - one config file for each CBQ shaper.
|
||||
#
|
||||
# Some words about config file name:
|
||||
# Each shaper has its personal ID - two byte HEX number. Really ID is
|
||||
# CBQ class.
|
||||
# So, filename looks like:
|
||||
#
|
||||
# cbq-1280.My_first_shaper
|
||||
# ^^^ ^^^ ^^^^^^^^^^^^^
|
||||
# | | |______ Shaper name - any word
|
||||
# | |___________________ ID (0000-FFFF), let ID looks like shaper's rate
|
||||
# |______________________ Filename must begin from "cbq-"
|
||||
#
|
||||
#
|
||||
# Config file describes shaper parameters and source[destination]
|
||||
# address[port].
|
||||
# For example let's prepare /etc/sysconfig/cbq/cbq-1280.My_first_shaper:
|
||||
#
|
||||
# ----------8<---------------------
|
||||
# DEVICE=eth0,10Mbit,1Mbit
|
||||
# RATE=128Kbit
|
||||
# WEIGHT=10Kbit
|
||||
# PRIO=5
|
||||
# RULE=192.168.1.0/24
|
||||
# ----------8<---------------------
|
||||
#
|
||||
# This is minimal configuration, where:
|
||||
# DEVICE: eth0 - device where we do control our traffic
|
||||
# 10Mbit - REAL ethernet card bandwidth
|
||||
# 1Mbit - "weight" of :1 class (parent for all shapers for eth0),
|
||||
# as a rule of thumb weight=batdwidth/10.
|
||||
# 100Mbit adapter's example: DEVICE=eth0,100Mbit,10Mbit
|
||||
# *** If you want to build more than one shaper per device it's
|
||||
# enough to describe bandwidth and weight once - cbq.init
|
||||
# is smart :) You can put only 'DEVICE=eth0' into cbq-*
|
||||
# config file for eth0.
|
||||
#
|
||||
# RATE: Shaper's speed - Kbit,Mbit or bps (bytes per second)
|
||||
#
|
||||
# WEIGHT: "weight" of shaper (CBQ class). Like for DEVICE - approx. RATE/10
|
||||
#
|
||||
# PRIO: shaper's priority from 1 to 8 where 1 is the highest one.
|
||||
# I do always use "5" for all my shapers.
|
||||
#
|
||||
# RULE: [source addr][:source port],[dest addr][:dest port]
|
||||
# Some examples:
|
||||
# RULE=10.1.1.0/24:80 - all traffic for network 10.1.1.0 to port 80
|
||||
# will be shaped.
|
||||
# RULE=10.2.2.5 - shaper works only for IP address 10.2.2.5
|
||||
# RULE=:25,10.2.2.128/25:5000 - all traffic from any address and port 25 to
|
||||
# address 10.2.2.128 - 10.2.2.255 and port 5000
|
||||
# will be shaped.
|
||||
# RULE=10.5.5.5:80, - shaper active only for traffic from port 80 of
|
||||
# address 10.5.5.5
|
||||
# Multiple RULE fields per one config file are allowed. For example:
|
||||
# RULE=10.1.1.2:80
|
||||
# RULE=10.1.1.2:25
|
||||
# RULE=10.1.1.2:110
|
||||
#
|
||||
# *** ATTENTION!!!
|
||||
# All shapers do work only for outgoing traffic!
|
||||
# So, if you want to build bidirectional shaper you must set it up for
|
||||
# both ethernet card. For example let's build shaper for our linux box like:
|
||||
#
|
||||
# --------- 192.168.1.1
|
||||
# BACKBONE -----eth0-| linux |-eth1------*[our client]
|
||||
# ---------
|
||||
#
|
||||
# Let all traffic from backbone to client will be shaped at 28Kbit and
|
||||
# traffic from client to backbone - at 128Kbit. We need two config files:
|
||||
#
|
||||
# ---8<-----/etc/sysconfig/cbq/cbq-28.client-out----
|
||||
# DEVICE=eth1,10Mbit,1Mbit
|
||||
# RATE=28Kbit
|
||||
# WEIGHT=2Kbit
|
||||
# PRIO=5
|
||||
# RULE=192.168.1.1
|
||||
# ---8<---------------------------------------------
|
||||
#
|
||||
# ---8<-----/etc/sysconfig/cbq/cbq-128.client-in----
|
||||
# DEVICE=eth0,10Mbit,1Mbit
|
||||
# RATE=128Kbit
|
||||
# WEIGHT=10Kbit
|
||||
# PRIO=5
|
||||
# RULE=192.168.1.1,
|
||||
# ---8<---------------------------------------------
|
||||
# ^pay attention to "," - this is source address!
|
||||
#
|
||||
# Enjoy.
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
#! /bin/sh -x
|
||||
#
|
||||
# sample script on using the ingress capabilities
|
||||
# this script shows how one can rate limit incoming SYNs
|
||||
# Useful for TCP-SYN attack protection. You can use
|
||||
# IPchains to have more powerful additions to the SYN (eg
|
||||
# in addition the subnet)
|
||||
#
|
||||
#path to various utilities;
|
||||
#change to reflect yours.
|
||||
#
|
||||
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||
TC=$IPROUTE/tc/tc
|
||||
IP=$IPROUTE/ip/ip
|
||||
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||
INDEV=eth2
|
||||
#
|
||||
# tag all incoming SYN packets through $INDEV as mark value 1
|
||||
############################################################
|
||||
$IPCHAINS -A input -i $INDEV -y -m 1
|
||||
############################################################
|
||||
#
|
||||
# install the ingress qdisc on the ingress interface
|
||||
############################################################
|
||||
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||
############################################################
|
||||
|
||||
#
|
||||
#
|
||||
# SYN packets are 40 bytes (320 bits) so three SYNs equals
|
||||
# 960 bits (approximately 1kbit); so we rate limit below
|
||||
# the incoming SYNs to 3/sec (not very sueful really; but
|
||||
#serves to show the point - JHS
|
||||
############################################################
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 1 fw \
|
||||
police rate 1kbit burst 40 mtu 9k drop flowid :1
|
||||
############################################################
|
||||
|
||||
|
||||
#
|
||||
echo "---- qdisc parameters Ingress ----------"
|
||||
$TC qdisc ls dev $INDEV
|
||||
echo "---- Class parameters Ingress ----------"
|
||||
$TC class ls dev $INDEV
|
||||
echo "---- filter parameters Ingress ----------"
|
||||
$TC filter ls dev $INDEV parent ffff:
|
||||
|
||||
#deleting the ingress qdisc
|
||||
#$TC qdisc del $INDEV ingress
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
eBPF toy code examples (running in kernel) to familiarize yourself
|
||||
with syntax and features:
|
||||
|
||||
- BTF defined map examples
|
||||
- bpf_graft.c -> Demo on altering runtime behaviour
|
||||
- bpf_shared.c -> Ingress/egress map sharing example
|
||||
- bpf_map_in_map.c -> Using map in map example
|
||||
|
||||
- legacy struct bpf_elf_map defined map examples
|
||||
- legacy/bpf_shared.c -> Ingress/egress map sharing example
|
||||
- legacy/bpf_tailcall.c -> Using tail call chains
|
||||
- legacy/bpf_cyclic.c -> Simple cycle as tail calls
|
||||
- legacy/bpf_graft.c -> Demo on altering runtime behaviour
|
||||
- legacy/bpf_map_in_map.c -> Using map in map example
|
||||
|
||||
Note: Users should use new BTF way to defined the maps, the examples
|
||||
in legacy folder which is using struct bpf_elf_map defined maps is not
|
||||
recommanded.
|
||||
|
|
@ -0,0 +1,258 @@
|
|||
/*
|
||||
* eBPF user space agent part
|
||||
*
|
||||
* Simple, _self-contained_ user space agent for the eBPF kernel
|
||||
* ebpf_prog.c program, which gets all map fds passed from tc via unix
|
||||
* domain socket in one transaction and can thus keep referencing
|
||||
* them from user space in order to read out (or possibly modify)
|
||||
* map data. Here, just as a minimal example to display counters.
|
||||
*
|
||||
* The agent only uses the bpf(2) syscall API to read or possibly
|
||||
* write to eBPF maps, it doesn't need to be aware of the low-level
|
||||
* bytecode parts and/or ELF parsing bits.
|
||||
*
|
||||
* ! For more details, see header comment in bpf_prog.c !
|
||||
*
|
||||
* gcc bpf_agent.c -o bpf_agent -Wall -O2
|
||||
*
|
||||
* For example, a more complex user space agent could run on each
|
||||
* host, reading and writing into eBPF maps used by tc classifier
|
||||
* and actions. It would thus allow for implementing a distributed
|
||||
* tc architecture, for example, which would push down central
|
||||
* policies into eBPF maps, and thus altering run-time behaviour.
|
||||
*
|
||||
* -- Happy eBPF hacking! ;)
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <sys/un.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
/* Just some misc macros as min(), offsetof(), etc. */
|
||||
#include "../../include/utils.h"
|
||||
/* Common code from fd passing. */
|
||||
#include "../../include/bpf_scm.h"
|
||||
/* Common, shared definitions with ebpf_prog.c */
|
||||
#include "bpf_shared.h"
|
||||
/* Mini syscall wrapper */
|
||||
#include "bpf_sys.h"
|
||||
|
||||
static void bpf_dump_drops(int fd)
|
||||
{
|
||||
int cpu, max;
|
||||
|
||||
max = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
printf(" `- number of drops:");
|
||||
for (cpu = 0; cpu < max; cpu++) {
|
||||
long drops;
|
||||
|
||||
assert(bpf_lookup_elem(fd, &cpu, &drops) == 0);
|
||||
printf("\tcpu%d: %5ld", cpu, drops);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void bpf_dump_queue(int fd)
|
||||
{
|
||||
/* Just for the same of the example. */
|
||||
int max_queue = 4, i;
|
||||
|
||||
printf(" | nic queues:");
|
||||
for (i = 0; i < max_queue; i++) {
|
||||
struct count_queue cq;
|
||||
int ret;
|
||||
|
||||
memset(&cq, 0, sizeof(cq));
|
||||
ret = bpf_lookup_elem(fd, &i, &cq);
|
||||
assert(ret == 0 || (ret < 0 && errno == ENOENT));
|
||||
|
||||
printf("\tq%d:[pkts: %ld, mis: %ld]",
|
||||
i, cq.total, cq.mismatch);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void bpf_dump_proto(int fd)
|
||||
{
|
||||
uint8_t protos[] = { IPPROTO_TCP, IPPROTO_UDP, IPPROTO_ICMP };
|
||||
char *names[] = { "tcp", "udp", "icmp" };
|
||||
int i;
|
||||
|
||||
printf(" ` protos:");
|
||||
for (i = 0; i < ARRAY_SIZE(protos); i++) {
|
||||
struct count_tuple ct;
|
||||
int ret;
|
||||
|
||||
memset(&ct, 0, sizeof(ct));
|
||||
ret = bpf_lookup_elem(fd, &protos[i], &ct);
|
||||
assert(ret == 0 || (ret < 0 && errno == ENOENT));
|
||||
|
||||
printf("\t%s:[pkts: %ld, bytes: %ld]",
|
||||
names[i], ct.packets, ct.bytes);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void bpf_dump_map_data(int *tfd)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 30; i++) {
|
||||
const int period = 5;
|
||||
|
||||
printf("data, period: %dsec\n", period);
|
||||
|
||||
bpf_dump_drops(tfd[BPF_MAP_ID_DROPS]);
|
||||
bpf_dump_queue(tfd[BPF_MAP_ID_QUEUE]);
|
||||
bpf_dump_proto(tfd[BPF_MAP_ID_PROTO]);
|
||||
|
||||
sleep(period);
|
||||
}
|
||||
}
|
||||
|
||||
static void bpf_info_loop(int *fds, struct bpf_map_aux *aux)
|
||||
{
|
||||
int i, tfd[BPF_MAP_ID_MAX];
|
||||
|
||||
printf("ver: %d\nobj: %s\ndev: %lu\nino: %lu\nmaps: %u\n",
|
||||
aux->uds_ver, aux->obj_name, aux->obj_st.st_dev,
|
||||
aux->obj_st.st_ino, aux->num_ent);
|
||||
|
||||
for (i = 0; i < aux->num_ent; i++) {
|
||||
printf("map%d:\n", i);
|
||||
printf(" `- fd: %u\n", fds[i]);
|
||||
printf(" | serial: %u\n", aux->ent[i].id);
|
||||
printf(" | type: %u\n", aux->ent[i].type);
|
||||
printf(" | max elem: %u\n", aux->ent[i].max_elem);
|
||||
printf(" | size key: %u\n", aux->ent[i].size_key);
|
||||
printf(" ` size val: %u\n", aux->ent[i].size_value);
|
||||
|
||||
tfd[aux->ent[i].id] = fds[i];
|
||||
}
|
||||
|
||||
bpf_dump_map_data(tfd);
|
||||
}
|
||||
|
||||
static void bpf_map_get_from_env(int *tfd)
|
||||
{
|
||||
char key[64], *val;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BPF_MAP_ID_MAX; i++) {
|
||||
memset(key, 0, sizeof(key));
|
||||
snprintf(key, sizeof(key), "BPF_MAP%d", i);
|
||||
|
||||
val = secure_getenv(key);
|
||||
assert(val != NULL);
|
||||
|
||||
tfd[i] = atoi(val);
|
||||
}
|
||||
}
|
||||
|
||||
static int bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
|
||||
unsigned int entries)
|
||||
{
|
||||
struct bpf_map_set_msg msg;
|
||||
int *cmsg_buf, min_fd, i;
|
||||
char *amsg_buf, *mmsg_buf;
|
||||
|
||||
cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
|
||||
amsg_buf = (char *)msg.aux.ent;
|
||||
mmsg_buf = (char *)&msg.aux;
|
||||
|
||||
for (i = 0; i < entries; i += min_fd) {
|
||||
struct cmsghdr *cmsg;
|
||||
int ret;
|
||||
|
||||
min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
|
||||
|
||||
bpf_map_set_init_single(&msg, min_fd);
|
||||
|
||||
ret = recvmsg(fd, &msg.hdr, 0);
|
||||
if (ret <= 0)
|
||||
return ret ? : -1;
|
||||
|
||||
cmsg = CMSG_FIRSTHDR(&msg.hdr);
|
||||
if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
|
||||
return -EINVAL;
|
||||
if (msg.hdr.msg_flags & MSG_CTRUNC)
|
||||
return -EIO;
|
||||
|
||||
min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
|
||||
if (min_fd > entries || min_fd <= 0)
|
||||
return -1;
|
||||
|
||||
memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
|
||||
memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
|
||||
memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
|
||||
|
||||
if (i + min_fd == aux->num_ent)
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int fds[BPF_SCM_MAX_FDS];
|
||||
struct bpf_map_aux aux;
|
||||
struct sockaddr_un addr;
|
||||
int fd, ret, i;
|
||||
|
||||
/* When arguments are being passed, we take it as a path
|
||||
* to a Unix domain socket, otherwise we grab the fds
|
||||
* from the environment to demonstrate both possibilities.
|
||||
*/
|
||||
if (argc == 1) {
|
||||
int tfd[BPF_MAP_ID_MAX];
|
||||
|
||||
bpf_map_get_from_env(tfd);
|
||||
bpf_dump_map_data(tfd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "Cannot open socket: %s\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
memset(&addr, 0, sizeof(addr));
|
||||
addr.sun_family = AF_UNIX;
|
||||
strncpy(addr.sun_path, argv[argc - 1], sizeof(addr.sun_path));
|
||||
|
||||
ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "Cannot bind to socket: %s\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
memset(fds, 0, sizeof(fds));
|
||||
memset(&aux, 0, sizeof(aux));
|
||||
|
||||
ret = bpf_map_set_recv(fd, fds, &aux, BPF_SCM_MAX_FDS);
|
||||
if (ret >= 0)
|
||||
bpf_info_loop(fds, &aux);
|
||||
|
||||
for (i = 0; i < aux.num_ent; i++)
|
||||
close(fds[i]);
|
||||
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
#ifndef __BPF_FUNCS__
|
||||
#define __BPF_FUNCS__
|
||||
|
||||
/* Misc macros. */
|
||||
#ifndef __maybe_unused
|
||||
# define __maybe_unused __attribute__ ((__unused__))
|
||||
#endif
|
||||
|
||||
#ifndef __section
|
||||
# define __section(NAME) __attribute__((section(NAME), used))
|
||||
#endif
|
||||
|
||||
#ifndef offsetof
|
||||
# define offsetof __builtin_offsetof
|
||||
#endif
|
||||
|
||||
#ifndef htons
|
||||
# define htons(x) __constant_htons((x))
|
||||
#endif
|
||||
|
||||
#ifndef likely
|
||||
# define likely(x) __builtin_expect(!!(x), 1)
|
||||
#endif
|
||||
|
||||
#ifndef unlikely
|
||||
# define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#endif
|
||||
|
||||
/* The verifier will translate them to actual function calls. */
|
||||
static void *(*bpf_map_lookup_elem)(void *map, void *key) __maybe_unused =
|
||||
(void *) BPF_FUNC_map_lookup_elem;
|
||||
|
||||
static int (*bpf_map_update_elem)(void *map, void *key, void *value,
|
||||
unsigned long long flags) __maybe_unused =
|
||||
(void *) BPF_FUNC_map_update_elem;
|
||||
|
||||
static int (*bpf_map_delete_elem)(void *map, void *key) __maybe_unused =
|
||||
(void *) BPF_FUNC_map_delete_elem;
|
||||
|
||||
static unsigned int (*get_smp_processor_id)(void) __maybe_unused =
|
||||
(void *) BPF_FUNC_get_smp_processor_id;
|
||||
|
||||
static unsigned int (*get_prandom_u32)(void) __maybe_unused =
|
||||
(void *) BPF_FUNC_get_prandom_u32;
|
||||
|
||||
/* LLVM built-in functions that an eBPF C program may use to emit
|
||||
* BPF_LD_ABS and BPF_LD_IND instructions.
|
||||
*/
|
||||
unsigned long long load_byte(void *skb, unsigned long long off)
|
||||
asm ("llvm.bpf.load.byte");
|
||||
|
||||
unsigned long long load_half(void *skb, unsigned long long off)
|
||||
asm ("llvm.bpf.load.half");
|
||||
|
||||
unsigned long long load_word(void *skb, unsigned long long off)
|
||||
asm ("llvm.bpf.load.word");
|
||||
|
||||
#endif /* __BPF_FUNCS__ */
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
#include "../../include/bpf_api.h"
|
||||
|
||||
/* This example demonstrates how classifier run-time behaviour
|
||||
* can be altered with tail calls. We start out with an empty
|
||||
* jmp_tc array, then add section aaa to the array slot 0, and
|
||||
* later on atomically replace it with section bbb. Note that
|
||||
* as shown in other examples, the tc loader can prepopulate
|
||||
* tail called sections, here we start out with an empty one
|
||||
* on purpose to show it can also be done this way.
|
||||
*
|
||||
* tc filter add dev foo parent ffff: bpf obj graft.o
|
||||
* tc exec bpf dbg
|
||||
* [...]
|
||||
* Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough
|
||||
* <idle>-0 [001] ..s. 138993.202265: : fallthrough
|
||||
* Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough
|
||||
* [...]
|
||||
*
|
||||
* tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa
|
||||
* tc exec bpf dbg
|
||||
* [...]
|
||||
* Socket Thread-19818 [002] ..s. 139012.053587: : aaa
|
||||
* <idle>-0 [002] ..s. 139012.172359: : aaa
|
||||
* Socket Thread-19818 [001] ..s. 139012.173556: : aaa
|
||||
* [...]
|
||||
*
|
||||
* tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb
|
||||
* tc exec bpf dbg
|
||||
* [...]
|
||||
* Socket Thread-19818 [002] ..s. 139022.102967: : bbb
|
||||
* <idle>-0 [002] ..s. 139022.155640: : bbb
|
||||
* Socket Thread-19818 [001] ..s. 139022.156730: : bbb
|
||||
* [...]
|
||||
*/
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
|
||||
__uint(key_size, sizeof(uint32_t));
|
||||
__uint(value_size, sizeof(uint32_t));
|
||||
__uint(max_entries, 1);
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
} jmp_tc __section(".maps");
|
||||
|
||||
__section("aaa")
|
||||
int cls_aaa(struct __sk_buff *skb)
|
||||
{
|
||||
printt("aaa\n");
|
||||
return TC_H_MAKE(1, 42);
|
||||
}
|
||||
|
||||
__section("bbb")
|
||||
int cls_bbb(struct __sk_buff *skb)
|
||||
{
|
||||
printt("bbb\n");
|
||||
return TC_H_MAKE(1, 43);
|
||||
}
|
||||
|
||||
__section_cls_entry
|
||||
int cls_entry(struct __sk_buff *skb)
|
||||
{
|
||||
tail_call(skb, &jmp_tc, 0);
|
||||
printt("fallthrough\n");
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
|
|
@ -1,55 +0,0 @@
|
|||
#include "../../include/bpf_api.h"
|
||||
|
||||
struct inner_map {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(key_size, sizeof(uint32_t));
|
||||
__uint(value_size, sizeof(uint32_t));
|
||||
__uint(max_entries, 1);
|
||||
} map_inner __section(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
|
||||
__uint(key_size, sizeof(uint32_t));
|
||||
__uint(value_size, sizeof(uint32_t));
|
||||
__uint(max_entries, 1);
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
__array(values, struct inner_map);
|
||||
} map_outer __section(".maps") = {
|
||||
.values = {
|
||||
[0] = &map_inner,
|
||||
},
|
||||
};
|
||||
|
||||
__section("egress")
|
||||
int emain(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_elf_map *map_inner;
|
||||
int key = 0, *val;
|
||||
|
||||
map_inner = map_lookup_elem(&map_outer, &key);
|
||||
if (map_inner) {
|
||||
val = map_lookup_elem(map_inner, &key);
|
||||
if (val)
|
||||
lock_xadd(val, 1);
|
||||
}
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
__section("ingress")
|
||||
int imain(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_elf_map *map_inner;
|
||||
int key = 0, *val;
|
||||
|
||||
map_inner = map_lookup_elem(&map_outer, &key);
|
||||
if (map_inner) {
|
||||
val = map_lookup_elem(map_inner, &key);
|
||||
if (val)
|
||||
printt("map val: %d\n", *val);
|
||||
}
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
|
|
@ -0,0 +1,496 @@
|
|||
/*
|
||||
* eBPF kernel space program part
|
||||
*
|
||||
* Toy eBPF program for demonstration purposes, some parts derived from
|
||||
* kernel tree's samples/bpf/sockex2_kern.c example.
|
||||
*
|
||||
* More background on eBPF, kernel tree: Documentation/networking/filter.txt
|
||||
*
|
||||
* Note, this file is rather large, and most classifier and actions are
|
||||
* likely smaller to accomplish one specific use-case and are tailored
|
||||
* for high performance. For performance reasons, you might also have the
|
||||
* classifier and action already merged inside the classifier.
|
||||
*
|
||||
* In order to show various features it serves as a bigger programming
|
||||
* example, which you should feel free to rip apart and experiment with.
|
||||
*
|
||||
* Compilation, configuration example:
|
||||
*
|
||||
* Note: as long as the BPF backend in LLVM is still experimental,
|
||||
* you need to build LLVM with LLVM with --enable-experimental-targets=BPF
|
||||
* Also, make sure your 4.1+ kernel is compiled with CONFIG_BPF_SYSCALL=y,
|
||||
* and you have libelf.h and gelf.h headers and can link tc against -lelf.
|
||||
*
|
||||
* In case you need to sync kernel headers, go to your kernel source tree:
|
||||
* # make headers_install INSTALL_HDR_PATH=/usr/
|
||||
*
|
||||
* $ export PATH=/home/<...>/llvm/Debug+Asserts/bin/:$PATH
|
||||
* $ clang -O2 -emit-llvm -c bpf_prog.c -o - | llc -march=bpf -filetype=obj -o bpf.o
|
||||
* $ objdump -h bpf.o
|
||||
* [...]
|
||||
* 3 classifier 000007f8 0000000000000000 0000000000000000 00000040 2**3
|
||||
* CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
|
||||
* 4 action-mark 00000088 0000000000000000 0000000000000000 00000838 2**3
|
||||
* CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
|
||||
* 5 action-rand 00000098 0000000000000000 0000000000000000 000008c0 2**3
|
||||
* CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
|
||||
* 6 maps 00000030 0000000000000000 0000000000000000 00000958 2**2
|
||||
* CONTENTS, ALLOC, LOAD, DATA
|
||||
* 7 license 00000004 0000000000000000 0000000000000000 00000988 2**0
|
||||
* CONTENTS, ALLOC, LOAD, DATA
|
||||
* [...]
|
||||
* # echo 1 > /proc/sys/net/core/bpf_jit_enable
|
||||
* $ gcc bpf_agent.c -o bpf_agent -Wall -O2
|
||||
* # ./bpf_agent /tmp/bpf-uds (e.g. on a different terminal)
|
||||
* # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
|
||||
* action bpf obj bpf.o sec action-mark \
|
||||
* action bpf obj bpf.o sec action-rand ok
|
||||
* # tc filter show dev em1
|
||||
* filter parent 1: protocol all pref 49152 bpf
|
||||
* filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid 1:1 bpf.o:[classifier]
|
||||
* action order 1: bpf bpf.o:[action-mark] default-action pipe
|
||||
* index 52 ref 1 bind 1
|
||||
*
|
||||
* action order 2: bpf bpf.o:[action-rand] default-action pipe
|
||||
* index 53 ref 1 bind 1
|
||||
*
|
||||
* action order 3: gact action pass
|
||||
* random type none pass val 0
|
||||
* index 38 ref 1 bind 1
|
||||
*
|
||||
* The same program can also be installed on ingress side (as opposed to above
|
||||
* egress configuration), e.g.:
|
||||
*
|
||||
* # tc qdisc add dev em1 handle ffff: ingress
|
||||
* # tc filter add dev em1 parent ffff: bpf obj ...
|
||||
*
|
||||
* Notes on BPF agent:
|
||||
*
|
||||
* In the above example, the bpf_agent creates the unix domain socket
|
||||
* natively. "tc exec" can also spawn a shell and hold the socktes there:
|
||||
*
|
||||
* # tc exec bpf imp /tmp/bpf-uds
|
||||
* # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
|
||||
* action bpf obj bpf.o sec action-mark \
|
||||
* action bpf obj bpf.o sec action-rand ok
|
||||
* sh-4.2# (shell spawned from tc exec)
|
||||
* sh-4.2# bpf_agent
|
||||
* [...]
|
||||
*
|
||||
* This will read out fds over environment and produce the same data dump
|
||||
* as below. This has the advantage that the spawned shell owns the fds
|
||||
* and thus if the agent is restarted, it can reattach to the same fds, also
|
||||
* various programs can easily read/modify the data simultaneously from user
|
||||
* space side.
|
||||
*
|
||||
* If the shell is unnecessary, the agent can also just be spawned directly
|
||||
* via tc exec:
|
||||
*
|
||||
* # tc exec bpf imp /tmp/bpf-uds run bpf_agent
|
||||
* # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
|
||||
* action bpf obj bpf.o sec action-mark \
|
||||
* action bpf obj bpf.o sec action-rand ok
|
||||
*
|
||||
* BPF agent example output:
|
||||
*
|
||||
* ver: 1
|
||||
* obj: bpf.o
|
||||
* dev: 64770
|
||||
* ino: 6045133
|
||||
* maps: 3
|
||||
* map0:
|
||||
* `- fd: 4
|
||||
* | serial: 1
|
||||
* | type: 1
|
||||
* | max elem: 256
|
||||
* | size key: 1
|
||||
* ` size val: 16
|
||||
* map1:
|
||||
* `- fd: 5
|
||||
* | serial: 2
|
||||
* | type: 1
|
||||
* | max elem: 1024
|
||||
* | size key: 4
|
||||
* ` size val: 16
|
||||
* map2:
|
||||
* `- fd: 6
|
||||
* | serial: 3
|
||||
* | type: 2
|
||||
* | max elem: 64
|
||||
* | size key: 4
|
||||
* ` size val: 8
|
||||
* data, period: 5sec
|
||||
* `- number of drops: cpu0: 0 cpu1: 0 cpu2: 0 cpu3: 0
|
||||
* | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 0, mis: 0] q3:[pkts: 0, mis: 0]
|
||||
* ` protos: tcp:[pkts: 0, bytes: 0] udp:[pkts: 0, bytes: 0] icmp:[pkts: 0, bytes: 0]
|
||||
* data, period: 5sec
|
||||
* `- number of drops: cpu0: 5 cpu1: 0 cpu2: 0 cpu3: 1
|
||||
* | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 24, mis: 14] q3:[pkts: 0, mis: 0]
|
||||
* ` protos: tcp:[pkts: 13, bytes: 1989] udp:[pkts: 10, bytes: 710] icmp:[pkts: 0, bytes: 0]
|
||||
* data, period: 5sec
|
||||
* `- number of drops: cpu0: 5 cpu1: 0 cpu2: 3 cpu3: 3
|
||||
* | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 39, mis: 21] q3:[pkts: 0, mis: 0]
|
||||
* ` protos: tcp:[pkts: 20, bytes: 3549] udp:[pkts: 18, bytes: 1278] icmp:[pkts: 0, bytes: 0]
|
||||
* [...]
|
||||
*
|
||||
* This now means, the below classifier and action pipeline has been loaded
|
||||
* as eBPF bytecode into the kernel, the kernel has verified that the
|
||||
* execution of the bytecode is "safe", and it has JITed the programs
|
||||
* afterwards, so that upon invocation they're running on native speed. tc
|
||||
* has transferred all map file descriptors to the bpf_agent via IPC and
|
||||
* even after tc exits, the agent can read out or modify all map data.
|
||||
*
|
||||
* Note that the export to the uds is done only once in the classifier and
|
||||
* not in the action. It's enough to export the (here) shared descriptors
|
||||
* once.
|
||||
*
|
||||
* If you need to disassemble the generated JIT image (echo with 2), the
|
||||
* kernel tree has under tools/net/ a small helper, you can invoke e.g.
|
||||
* `bpf_jit_disasm -o`.
|
||||
*
|
||||
* Please find in the code below further comments.
|
||||
*
|
||||
* -- Happy eBPF hacking! ;)
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <asm/types.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/if.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/if_tunnel.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
/* Common, shared definitions with ebpf_agent.c. */
|
||||
#include "bpf_shared.h"
|
||||
/* Selection of BPF helper functions for our example. */
|
||||
#include "bpf_funcs.h"
|
||||
|
||||
/* Could be defined here as well, or included from the header. */
|
||||
#define TC_ACT_UNSPEC (-1)
|
||||
#define TC_ACT_OK 0
|
||||
#define TC_ACT_RECLASSIFY 1
|
||||
#define TC_ACT_SHOT 2
|
||||
#define TC_ACT_PIPE 3
|
||||
#define TC_ACT_STOLEN 4
|
||||
#define TC_ACT_QUEUED 5
|
||||
#define TC_ACT_REPEAT 6
|
||||
|
||||
/* Other, misc stuff. */
|
||||
#define IP_MF 0x2000
|
||||
#define IP_OFFSET 0x1FFF
|
||||
|
||||
/* eBPF map definitions, all placed in section "maps". */
|
||||
struct bpf_elf_map __section("maps") map_proto = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.id = BPF_MAP_ID_PROTO,
|
||||
.size_key = sizeof(uint8_t),
|
||||
.size_value = sizeof(struct count_tuple),
|
||||
.max_elem = 256,
|
||||
};
|
||||
|
||||
struct bpf_elf_map __section("maps") map_queue = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.id = BPF_MAP_ID_QUEUE,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(struct count_queue),
|
||||
.max_elem = 1024,
|
||||
};
|
||||
|
||||
struct bpf_elf_map __section("maps") map_drops = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.id = BPF_MAP_ID_DROPS,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(long),
|
||||
.max_elem = 64,
|
||||
};
|
||||
|
||||
/* Helper functions and definitions for the flow dissector used by the
|
||||
* example classifier. This resembles the kernel's flow dissector to
|
||||
* some extend and is just used as an example to show what's possible
|
||||
* with eBPF.
|
||||
*/
|
||||
struct sockaddr;
|
||||
|
||||
struct vlan_hdr {
|
||||
__be16 h_vlan_TCI;
|
||||
__be16 h_vlan_encapsulated_proto;
|
||||
};
|
||||
|
||||
struct flow_keys {
|
||||
__u32 src;
|
||||
__u32 dst;
|
||||
union {
|
||||
__u32 ports;
|
||||
__u16 port16[2];
|
||||
};
|
||||
__s32 th_off;
|
||||
__u8 ip_proto;
|
||||
};
|
||||
|
||||
static inline int flow_ports_offset(__u8 ip_proto)
|
||||
{
|
||||
switch (ip_proto) {
|
||||
case IPPROTO_TCP:
|
||||
case IPPROTO_UDP:
|
||||
case IPPROTO_DCCP:
|
||||
case IPPROTO_ESP:
|
||||
case IPPROTO_SCTP:
|
||||
case IPPROTO_UDPLITE:
|
||||
default:
|
||||
return 0;
|
||||
case IPPROTO_AH:
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool flow_is_frag(struct __sk_buff *skb, int nh_off)
|
||||
{
|
||||
return !!(load_half(skb, nh_off + offsetof(struct iphdr, frag_off)) &
|
||||
(IP_MF | IP_OFFSET));
|
||||
}
|
||||
|
||||
static inline int flow_parse_ipv4(struct __sk_buff *skb, int nh_off,
|
||||
__u8 *ip_proto, struct flow_keys *flow)
|
||||
{
|
||||
__u8 ip_ver_len;
|
||||
|
||||
if (unlikely(flow_is_frag(skb, nh_off)))
|
||||
*ip_proto = 0;
|
||||
else
|
||||
*ip_proto = load_byte(skb, nh_off + offsetof(struct iphdr,
|
||||
protocol));
|
||||
if (*ip_proto != IPPROTO_GRE) {
|
||||
flow->src = load_word(skb, nh_off + offsetof(struct iphdr, saddr));
|
||||
flow->dst = load_word(skb, nh_off + offsetof(struct iphdr, daddr));
|
||||
}
|
||||
|
||||
ip_ver_len = load_byte(skb, nh_off + 0 /* offsetof(struct iphdr, ihl) */);
|
||||
if (likely(ip_ver_len == 0x45))
|
||||
nh_off += 20;
|
||||
else
|
||||
nh_off += (ip_ver_len & 0xF) << 2;
|
||||
|
||||
return nh_off;
|
||||
}
|
||||
|
||||
static inline __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, int off)
|
||||
{
|
||||
__u32 w0 = load_word(skb, off);
|
||||
__u32 w1 = load_word(skb, off + sizeof(w0));
|
||||
__u32 w2 = load_word(skb, off + sizeof(w0) * 2);
|
||||
__u32 w3 = load_word(skb, off + sizeof(w0) * 3);
|
||||
|
||||
return w0 ^ w1 ^ w2 ^ w3;
|
||||
}
|
||||
|
||||
static inline int flow_parse_ipv6(struct __sk_buff *skb, int nh_off,
|
||||
__u8 *ip_proto, struct flow_keys *flow)
|
||||
{
|
||||
*ip_proto = load_byte(skb, nh_off + offsetof(struct ipv6hdr, nexthdr));
|
||||
|
||||
flow->src = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, saddr));
|
||||
flow->dst = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, daddr));
|
||||
|
||||
return nh_off + sizeof(struct ipv6hdr);
|
||||
}
|
||||
|
||||
static inline bool flow_dissector(struct __sk_buff *skb,
|
||||
struct flow_keys *flow)
|
||||
{
|
||||
int poff, nh_off = BPF_LL_OFF + ETH_HLEN;
|
||||
__be16 proto = skb->protocol;
|
||||
__u8 ip_proto;
|
||||
|
||||
/* TODO: check for skb->vlan_tci, skb->vlan_proto first */
|
||||
if (proto == htons(ETH_P_8021AD)) {
|
||||
proto = load_half(skb, nh_off +
|
||||
offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
|
||||
nh_off += sizeof(struct vlan_hdr);
|
||||
}
|
||||
if (proto == htons(ETH_P_8021Q)) {
|
||||
proto = load_half(skb, nh_off +
|
||||
offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
|
||||
nh_off += sizeof(struct vlan_hdr);
|
||||
}
|
||||
|
||||
if (likely(proto == htons(ETH_P_IP)))
|
||||
nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
|
||||
else if (proto == htons(ETH_P_IPV6))
|
||||
nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
|
||||
else
|
||||
return false;
|
||||
|
||||
switch (ip_proto) {
|
||||
case IPPROTO_GRE: {
|
||||
struct gre_hdr {
|
||||
__be16 flags;
|
||||
__be16 proto;
|
||||
};
|
||||
|
||||
__u16 gre_flags = load_half(skb, nh_off +
|
||||
offsetof(struct gre_hdr, flags));
|
||||
__u16 gre_proto = load_half(skb, nh_off +
|
||||
offsetof(struct gre_hdr, proto));
|
||||
|
||||
if (gre_flags & (GRE_VERSION | GRE_ROUTING))
|
||||
break;
|
||||
|
||||
nh_off += 4;
|
||||
if (gre_flags & GRE_CSUM)
|
||||
nh_off += 4;
|
||||
if (gre_flags & GRE_KEY)
|
||||
nh_off += 4;
|
||||
if (gre_flags & GRE_SEQ)
|
||||
nh_off += 4;
|
||||
|
||||
if (gre_proto == ETH_P_8021Q) {
|
||||
gre_proto = load_half(skb, nh_off +
|
||||
offsetof(struct vlan_hdr,
|
||||
h_vlan_encapsulated_proto));
|
||||
nh_off += sizeof(struct vlan_hdr);
|
||||
}
|
||||
if (gre_proto == ETH_P_IP)
|
||||
nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
|
||||
else if (gre_proto == ETH_P_IPV6)
|
||||
nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
case IPPROTO_IPIP:
|
||||
nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
|
||||
break;
|
||||
case IPPROTO_IPV6:
|
||||
nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
nh_off += flow_ports_offset(ip_proto);
|
||||
|
||||
flow->ports = load_word(skb, nh_off);
|
||||
flow->th_off = nh_off;
|
||||
flow->ip_proto = ip_proto;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void cls_update_proto_map(const struct __sk_buff *skb,
|
||||
const struct flow_keys *flow)
|
||||
{
|
||||
uint8_t proto = flow->ip_proto;
|
||||
struct count_tuple *ct, _ct;
|
||||
|
||||
ct = bpf_map_lookup_elem(&map_proto, &proto);
|
||||
if (likely(ct)) {
|
||||
__sync_fetch_and_add(&ct->packets, 1);
|
||||
__sync_fetch_and_add(&ct->bytes, skb->len);
|
||||
return;
|
||||
}
|
||||
|
||||
/* No hit yet, we need to create a new entry. */
|
||||
_ct.packets = 1;
|
||||
_ct.bytes = skb->len;
|
||||
|
||||
bpf_map_update_elem(&map_proto, &proto, &_ct, BPF_ANY);
|
||||
}
|
||||
|
||||
static inline void cls_update_queue_map(const struct __sk_buff *skb)
|
||||
{
|
||||
uint32_t queue = skb->queue_mapping;
|
||||
struct count_queue *cq, _cq;
|
||||
bool mismatch;
|
||||
|
||||
mismatch = skb->queue_mapping != get_smp_processor_id();
|
||||
|
||||
cq = bpf_map_lookup_elem(&map_queue, &queue);
|
||||
if (likely(cq)) {
|
||||
__sync_fetch_and_add(&cq->total, 1);
|
||||
if (mismatch)
|
||||
__sync_fetch_and_add(&cq->mismatch, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/* No hit yet, we need to create a new entry. */
|
||||
_cq.total = 1;
|
||||
_cq.mismatch = mismatch ? 1 : 0;
|
||||
|
||||
bpf_map_update_elem(&map_queue, &queue, &_cq, BPF_ANY);
|
||||
}
|
||||
|
||||
/* eBPF program definitions, placed in various sections, which can
|
||||
* have custom section names. If custom names are in use, it's
|
||||
* required to point tc to the correct section, e.g.
|
||||
*
|
||||
* tc filter add [...] bpf obj cls.o sec cls-tos [...]
|
||||
*
|
||||
* in case the program resides in __section("cls-tos").
|
||||
*
|
||||
* Default section for cls_bpf is: "classifier", for act_bpf is:
|
||||
* "action". Naturally, if for example multiple actions are present
|
||||
* in the same file, they need to have distinct section names.
|
||||
*
|
||||
* It is however not required to have multiple programs sharing
|
||||
* a file.
|
||||
*/
|
||||
__section("classifier") int cls_main(struct __sk_buff *skb)
|
||||
{
|
||||
struct flow_keys flow;
|
||||
|
||||
if (!flow_dissector(skb, &flow))
|
||||
return 0; /* No match in cls_bpf. */
|
||||
|
||||
cls_update_proto_map(skb, &flow);
|
||||
cls_update_queue_map(skb);
|
||||
|
||||
return flow.ip_proto;
|
||||
}
|
||||
|
||||
static inline void act_update_drop_map(void)
|
||||
{
|
||||
uint32_t *count, cpu = get_smp_processor_id();
|
||||
|
||||
count = bpf_map_lookup_elem(&map_drops, &cpu);
|
||||
if (count)
|
||||
/* Only this cpu is accessing this element. */
|
||||
(*count)++;
|
||||
}
|
||||
|
||||
__section("action-mark") int act_mark_main(struct __sk_buff *skb)
|
||||
{
|
||||
/* You could also mangle skb data here with the helper function
|
||||
* BPF_FUNC_skb_store_bytes, etc. Or, alternatively you could
|
||||
* do that already in the classifier itself as a merged combination
|
||||
* of classifier'n'action model.
|
||||
*/
|
||||
|
||||
if (skb->mark == 0xcafe) {
|
||||
act_update_drop_map();
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
/* Default configured tc opcode. */
|
||||
return TC_ACT_UNSPEC;
|
||||
}
|
||||
|
||||
__section("action-rand") int act_rand_main(struct __sk_buff *skb)
|
||||
{
|
||||
/* Sorry, we're near event horizon ... */
|
||||
if ((get_prandom_u32() & 3) == 0) {
|
||||
act_update_drop_map();
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
return TC_ACT_UNSPEC;
|
||||
}
|
||||
|
||||
/* Last but not least, the file contains a license. Some future helper
|
||||
* functions may only be available with a GPL license.
|
||||
*/
|
||||
char __license[] __section("license") = "GPL";
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
#include "../../include/bpf_api.h"
|
||||
|
||||
/* Minimal, stand-alone toy map pinning example:
|
||||
*
|
||||
* clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c
|
||||
* tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress
|
||||
* tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress
|
||||
*
|
||||
* Both classifier will share the very same map instance in this example,
|
||||
* so map content can be accessed from ingress *and* egress side!
|
||||
*
|
||||
* This example has a pinning of PIN_OBJECT_NS, so it's private and
|
||||
* thus shared among various program sections within the object.
|
||||
*
|
||||
* A setting of PIN_GLOBAL_NS would place it into a global namespace,
|
||||
* so that it can be shared among different object files. A setting
|
||||
* of PIN_NONE (= 0) means no sharing, so each tc invocation a new map
|
||||
* instance is being created.
|
||||
*/
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(key_size, sizeof(uint32_t));
|
||||
__uint(value_size, sizeof(uint32_t));
|
||||
__uint(max_entries, 1);
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME); /* or LIBBPF_PIN_NONE */
|
||||
} map_sh __section(".maps");
|
||||
|
||||
__section("egress")
|
||||
int emain(struct __sk_buff *skb)
|
||||
{
|
||||
int key = 0, *val;
|
||||
|
||||
val = map_lookup_elem(&map_sh, &key);
|
||||
if (val)
|
||||
lock_xadd(val, 1);
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
__section("ingress")
|
||||
int imain(struct __sk_buff *skb)
|
||||
{
|
||||
int key = 0, *val;
|
||||
|
||||
val = map_lookup_elem(&map_sh, &key);
|
||||
if (val)
|
||||
printt("map val: %d\n", *val);
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
#ifndef __BPF_SHARED__
|
||||
#define __BPF_SHARED__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../../include/bpf_elf.h"
|
||||
|
||||
enum {
|
||||
BPF_MAP_ID_PROTO,
|
||||
BPF_MAP_ID_QUEUE,
|
||||
BPF_MAP_ID_DROPS,
|
||||
__BPF_MAP_ID_MAX,
|
||||
#define BPF_MAP_ID_MAX __BPF_MAP_ID_MAX
|
||||
};
|
||||
|
||||
struct count_tuple {
|
||||
long packets; /* type long for __sync_fetch_and_add() */
|
||||
long bytes;
|
||||
};
|
||||
|
||||
struct count_queue {
|
||||
long total;
|
||||
long mismatch;
|
||||
};
|
||||
|
||||
#endif /* __BPF_SHARED__ */
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#ifndef __BPF_SYS__
|
||||
#define __BPF_SYS__
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
static inline __u64 bpf_ptr_to_u64(const void *ptr)
|
||||
{
|
||||
return (__u64) (unsigned long) ptr;
|
||||
}
|
||||
|
||||
static inline int bpf_lookup_elem(int fd, void *key, void *value)
|
||||
{
|
||||
union bpf_attr attr = {
|
||||
.map_fd = fd,
|
||||
.key = bpf_ptr_to_u64(key),
|
||||
.value = bpf_ptr_to_u64(value),
|
||||
};
|
||||
|
||||
return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
|
||||
}
|
||||
|
||||
#endif /* __BPF_SYS__ */
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
#include "../../../include/bpf_api.h"
|
||||
|
||||
/* Cyclic dependency example to test the kernel's runtime upper
|
||||
* bound on loops. Also demonstrates on how to use direct-actions,
|
||||
* loaded as: tc filter add [...] bpf da obj [...]
|
||||
*/
|
||||
#define JMP_MAP_ID 0xabccba
|
||||
|
||||
struct bpf_elf_map __section_maps jmp_tc = {
|
||||
.type = BPF_MAP_TYPE_PROG_ARRAY,
|
||||
.id = JMP_MAP_ID,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(uint32_t),
|
||||
.pinning = PIN_OBJECT_NS,
|
||||
.max_elem = 1,
|
||||
};
|
||||
|
||||
__section_tail(JMP_MAP_ID, 0)
|
||||
int cls_loop(struct __sk_buff *skb)
|
||||
{
|
||||
printt("cb: %u\n", skb->cb[0]++);
|
||||
tail_call(skb, &jmp_tc, 0);
|
||||
|
||||
skb->tc_classid = TC_H_MAKE(1, 42);
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
__section_cls_entry
|
||||
int cls_entry(struct __sk_buff *skb)
|
||||
{
|
||||
tail_call(skb, &jmp_tc, 0);
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
#include "../../../include/bpf_api.h"
|
||||
|
||||
/* This example demonstrates how classifier run-time behaviour
|
||||
* can be altered with tail calls. We start out with an empty
|
||||
* jmp_tc array, then add section aaa to the array slot 0, and
|
||||
* later on atomically replace it with section bbb. Note that
|
||||
* as shown in other examples, the tc loader can prepopulate
|
||||
* tail called sections, here we start out with an empty one
|
||||
* on purpose to show it can also be done this way.
|
||||
*
|
||||
* tc filter add dev foo parent ffff: bpf obj graft.o
|
||||
* tc exec bpf dbg
|
||||
* [...]
|
||||
* Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough
|
||||
* <idle>-0 [001] ..s. 138993.202265: : fallthrough
|
||||
* Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough
|
||||
* [...]
|
||||
*
|
||||
* tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa
|
||||
* tc exec bpf dbg
|
||||
* [...]
|
||||
* Socket Thread-19818 [002] ..s. 139012.053587: : aaa
|
||||
* <idle>-0 [002] ..s. 139012.172359: : aaa
|
||||
* Socket Thread-19818 [001] ..s. 139012.173556: : aaa
|
||||
* [...]
|
||||
*
|
||||
* tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb
|
||||
* tc exec bpf dbg
|
||||
* [...]
|
||||
* Socket Thread-19818 [002] ..s. 139022.102967: : bbb
|
||||
* <idle>-0 [002] ..s. 139022.155640: : bbb
|
||||
* Socket Thread-19818 [001] ..s. 139022.156730: : bbb
|
||||
* [...]
|
||||
*/
|
||||
|
||||
struct bpf_elf_map __section_maps jmp_tc = {
|
||||
.type = BPF_MAP_TYPE_PROG_ARRAY,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(uint32_t),
|
||||
.pinning = PIN_GLOBAL_NS,
|
||||
.max_elem = 1,
|
||||
};
|
||||
|
||||
__section("aaa")
|
||||
int cls_aaa(struct __sk_buff *skb)
|
||||
{
|
||||
printt("aaa\n");
|
||||
return TC_H_MAKE(1, 42);
|
||||
}
|
||||
|
||||
__section("bbb")
|
||||
int cls_bbb(struct __sk_buff *skb)
|
||||
{
|
||||
printt("bbb\n");
|
||||
return TC_H_MAKE(1, 43);
|
||||
}
|
||||
|
||||
__section_cls_entry
|
||||
int cls_entry(struct __sk_buff *skb)
|
||||
{
|
||||
tail_call(skb, &jmp_tc, 0);
|
||||
printt("fallthrough\n");
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
#include "../../../include/bpf_api.h"
|
||||
|
||||
#define MAP_INNER_ID 42
|
||||
|
||||
struct bpf_elf_map __section_maps map_inner = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(uint32_t),
|
||||
.id = MAP_INNER_ID,
|
||||
.inner_idx = 0,
|
||||
.pinning = PIN_GLOBAL_NS,
|
||||
.max_elem = 1,
|
||||
};
|
||||
|
||||
struct bpf_elf_map __section_maps map_outer = {
|
||||
.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(uint32_t),
|
||||
.inner_id = MAP_INNER_ID,
|
||||
.pinning = PIN_GLOBAL_NS,
|
||||
.max_elem = 1,
|
||||
};
|
||||
|
||||
__section("egress")
|
||||
int emain(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_elf_map *map_inner;
|
||||
int key = 0, *val;
|
||||
|
||||
map_inner = map_lookup_elem(&map_outer, &key);
|
||||
if (map_inner) {
|
||||
val = map_lookup_elem(map_inner, &key);
|
||||
if (val)
|
||||
lock_xadd(val, 1);
|
||||
}
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
__section("ingress")
|
||||
int imain(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_elf_map *map_inner;
|
||||
int key = 0, *val;
|
||||
|
||||
map_inner = map_lookup_elem(&map_outer, &key);
|
||||
if (map_inner) {
|
||||
val = map_lookup_elem(map_inner, &key);
|
||||
if (val)
|
||||
printt("map val: %d\n", *val);
|
||||
}
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
#include "../../../include/bpf_api.h"
|
||||
|
||||
/* Minimal, stand-alone toy map pinning example:
|
||||
*
|
||||
* clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c
|
||||
* tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress
|
||||
* tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress
|
||||
*
|
||||
* Both classifier will share the very same map instance in this example,
|
||||
* so map content can be accessed from ingress *and* egress side!
|
||||
*
|
||||
* This example has a pinning of PIN_OBJECT_NS, so it's private and
|
||||
* thus shared among various program sections within the object.
|
||||
*
|
||||
* A setting of PIN_GLOBAL_NS would place it into a global namespace,
|
||||
* so that it can be shared among different object files. A setting
|
||||
* of PIN_NONE (= 0) means no sharing, so each tc invocation a new map
|
||||
* instance is being created.
|
||||
*/
|
||||
|
||||
struct bpf_elf_map __section_maps map_sh = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(uint32_t),
|
||||
.pinning = PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */
|
||||
.max_elem = 1,
|
||||
};
|
||||
|
||||
__section("egress")
|
||||
int emain(struct __sk_buff *skb)
|
||||
{
|
||||
int key = 0, *val;
|
||||
|
||||
val = map_lookup_elem(&map_sh, &key);
|
||||
if (val)
|
||||
lock_xadd(val, 1);
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
__section("ingress")
|
||||
int imain(struct __sk_buff *skb)
|
||||
{
|
||||
int key = 0, *val;
|
||||
|
||||
val = map_lookup_elem(&map_sh, &key);
|
||||
if (val)
|
||||
printt("map val: %d\n", *val);
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
|
|
@ -1,117 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include "../../../include/bpf_api.h"
|
||||
|
||||
#define ENTRY_INIT 3
|
||||
#define ENTRY_0 0
|
||||
#define ENTRY_1 1
|
||||
#define MAX_JMP_SIZE 2
|
||||
|
||||
#define FOO 42
|
||||
#define BAR 43
|
||||
|
||||
/* This example doesn't really do anything useful, but it's purpose is to
|
||||
* demonstrate eBPF tail calls on a very simple example.
|
||||
*
|
||||
* cls_entry() is our classifier entry point, from there we jump based on
|
||||
* skb->hash into cls_case1() or cls_case2(). They are both part of the
|
||||
* program array jmp_tc. Indicated via __section_tail(), the tc loader
|
||||
* populates the program arrays with the loaded file descriptors already.
|
||||
*
|
||||
* To demonstrate nested jumps, cls_case2() jumps within the same jmp_tc
|
||||
* array to cls_case1(). And whenever we arrive at cls_case1(), we jump
|
||||
* into cls_exit(), part of the jump array jmp_ex.
|
||||
*
|
||||
* Also, to show it's possible, all programs share map_sh and dump the value
|
||||
* that the entry point incremented. The sections that are loaded into a
|
||||
* program array can be atomically replaced during run-time, e.g. to change
|
||||
* classifier behaviour.
|
||||
*/
|
||||
|
||||
struct bpf_elf_map __section_maps jmp_tc = {
|
||||
.type = BPF_MAP_TYPE_PROG_ARRAY,
|
||||
.id = FOO,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(uint32_t),
|
||||
.pinning = PIN_OBJECT_NS,
|
||||
.max_elem = MAX_JMP_SIZE,
|
||||
};
|
||||
|
||||
struct bpf_elf_map __section_maps jmp_ex = {
|
||||
.type = BPF_MAP_TYPE_PROG_ARRAY,
|
||||
.id = BAR,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(uint32_t),
|
||||
.pinning = PIN_OBJECT_NS,
|
||||
.max_elem = 1,
|
||||
};
|
||||
|
||||
struct bpf_elf_map __section_maps map_sh = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(uint32_t),
|
||||
.pinning = PIN_OBJECT_NS,
|
||||
.max_elem = 1,
|
||||
};
|
||||
|
||||
__section_tail(FOO, ENTRY_0)
|
||||
int cls_case1(struct __sk_buff *skb)
|
||||
{
|
||||
int key = 0, *val;
|
||||
|
||||
val = map_lookup_elem(&map_sh, &key);
|
||||
if (val)
|
||||
printt("case1: map-val: %d from:%u\n", *val, skb->cb[0]);
|
||||
|
||||
skb->cb[0] = ENTRY_0;
|
||||
tail_call(skb, &jmp_ex, ENTRY_0);
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
__section_tail(FOO, ENTRY_1)
|
||||
int cls_case2(struct __sk_buff *skb)
|
||||
{
|
||||
int key = 0, *val;
|
||||
|
||||
val = map_lookup_elem(&map_sh, &key);
|
||||
if (val)
|
||||
printt("case2: map-val: %d from:%u\n", *val, skb->cb[0]);
|
||||
|
||||
skb->cb[0] = ENTRY_1;
|
||||
tail_call(skb, &jmp_tc, ENTRY_0);
|
||||
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
__section_tail(BAR, ENTRY_0)
|
||||
int cls_exit(struct __sk_buff *skb)
|
||||
{
|
||||
int key = 0, *val;
|
||||
|
||||
val = map_lookup_elem(&map_sh, &key);
|
||||
if (val)
|
||||
printt("exit: map-val: %d from:%u\n", *val, skb->cb[0]);
|
||||
|
||||
/* Termination point. */
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
__section_cls_entry
|
||||
int cls_entry(struct __sk_buff *skb)
|
||||
{
|
||||
int key = 0, *val;
|
||||
|
||||
/* For transferring state, we can use skb->cb[0] ... skb->cb[4]. */
|
||||
val = map_lookup_elem(&map_sh, &key);
|
||||
if (val) {
|
||||
lock_xadd(val, 1);
|
||||
|
||||
skb->cb[0] = ENTRY_INIT;
|
||||
tail_call(skb, &jmp_tc, skb->hash & (MAX_JMP_SIZE - 1));
|
||||
}
|
||||
|
||||
printt("fallthrough\n");
|
||||
return BPF_H_DEFAULT;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
|
|
@ -0,0 +1,983 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# cbq.init v0.7.3
|
||||
# Copyright (C) 1999 Pavel Golubev <pg@ksi-linux.com>
|
||||
# Copyright (C) 2001-2004 Lubomir Bulej <pallas@kadan.cz>
|
||||
#
|
||||
# chkconfig: 2345 11 89
|
||||
# description: sets up CBQ-based traffic control
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# To get the latest version, check on Freshmeat for actual location:
|
||||
#
|
||||
# http://freshmeat.net/projects/cbq.init
|
||||
#
|
||||
#
|
||||
# VERSION HISTORY
|
||||
# ---------------
|
||||
# v0.7.3- Deepak Singhal <singhal at users.sourceforge.net>
|
||||
# - fix timecheck to not ignore regular TIME rules after
|
||||
# encountering a TIME rule that spans over midnight
|
||||
# - Nathan Shafer <nicodemus at users.sourceforge.net>
|
||||
# - allow symlinks to class files
|
||||
# - Seth J. Blank <antifreeze at users.sourceforge.net>
|
||||
# - replace hardcoded ip/tc location with variables
|
||||
# - Mark Davis <mark.davis at gmx.de>
|
||||
# - allow setting of PRIO_{MARK,RULE,REALM} in class file
|
||||
# - Fernando Sanch <toptnc at users.sourceforge.net>
|
||||
# - allow underscores in interface names
|
||||
# v0.7.2- Paulo Sedrez
|
||||
# - fix time2abs to allow hours with leading zero in TIME rules
|
||||
# - Svetlin Simeonov <zvero at yahoo.com>
|
||||
# - fix cbq_device_list to allow VLAN interfaces
|
||||
# - Mark Davis <mark.davis at gmx.de>
|
||||
# - ignore *~ backup files when looking for classes
|
||||
# - Mike Boyer <boyer at administrative.com>
|
||||
# - fix to allow arguments to be passed to "restart" command
|
||||
# v0.7.1- Lubomir Bulej <pallas at kadan.cz>
|
||||
# - default value for PERTURB
|
||||
# - fixed small bug in RULE parser to correctly parse rules with
|
||||
# identical source and destination fields
|
||||
# - faster initial scanning of DEVICE fields
|
||||
# v0.7 - Lubomir Bulej <pallas at kadan.cz>
|
||||
# - lots of various cleanups and reorganizations; the parsing is now
|
||||
# some 40% faster, but the class ID must be in range 0x0002-0xffff
|
||||
# (again). Because of the number of internal changes and the above
|
||||
# class ID restriction, I bumped the version to 0.7 to indicate
|
||||
# something might have got broken :)
|
||||
# - changed PRIO_{U32,FW,ROUTE} to PRIO_{RULE,MARK,REALM}
|
||||
# for consistency with filter keywords
|
||||
# - exposed "compile" command
|
||||
# - Catalin Petrescu <taz at dntis.ro>
|
||||
# - support for port masks in RULE (u32) filter
|
||||
# - Jordan Vrtanoski <obeliks at mt.net.mk>
|
||||
# - support for week days in TIME rules
|
||||
# v0.6.4- Lubomir Bulej <pallas at kadan.cz>
|
||||
# - added PRIO_* variables to allow easy control of filter priorities
|
||||
# - added caching to speed up CBQ start, the cache is invalidated
|
||||
# whenever any of the configuration files changes
|
||||
# - updated the readme section + some cosmetic fixes
|
||||
# v0.6.3- Lubomir Bulej <pallas at kadan.cz>
|
||||
# - removed setup of (unnecessary) class 1:1 - all classes
|
||||
# now use qdisc's default class 1:0 as their parent
|
||||
# - minor fix in the timecheck branch - classes
|
||||
# without leaf qdisc were not updated
|
||||
# - minor fix to avoid timecheck failure when run
|
||||
# at time with minutes equal to 08 or 09
|
||||
# - respect CBQ_PATH setting in environment
|
||||
# - made PRIO=5 default, rendering it optional in configs
|
||||
# - added support for route filter, see notes about REALM keyword
|
||||
# - added support for fw filter, see notes about MARK keyword
|
||||
# - added filter display to "list" and "stats" commands
|
||||
# - readme section update + various cosmetic fixes
|
||||
# v0.6.2- Catalin Petrescu <taz at dntis.ro>
|
||||
# - added tunnels interface handling
|
||||
# v0.6.1- Pavel Golubev <pg at ksi-linux.com>
|
||||
# - added sch_prio module loading
|
||||
# (thanks johan at iglo.virtual.or.id for reminding)
|
||||
# - resolved errors resulting from stricter syntax checking in bash2
|
||||
# - Lubomir Bulej <pallas at kadan.cz>
|
||||
# - various cosmetic fixes
|
||||
# v0.6 - Lubomir Bulej <pallas at kadan.cz>
|
||||
# - attempt to limit number of spawned processes by utilizing
|
||||
# more of sed power (use sed instead of grep+cut)
|
||||
# - simplified TIME parser, using bash builtins
|
||||
# - added initial support for SFQ as leaf qdisc
|
||||
# - reworked the documentation part a little
|
||||
# - incorporated pending patches and ideas submitted by
|
||||
# following people for versions 0.3 into version 0.6
|
||||
# - Miguel Freitas <miguel at cetuc.puc-rio.br>
|
||||
# - in case of overlapping TIME parameters, the last match is taken
|
||||
# - Juanjo Ciarlante <jjo at mendoza.gov.ar>
|
||||
# - chkconfig tags, list + stats startup parameters
|
||||
# - optional tc & ip command logging (into /var/run/cbq-*)
|
||||
# - Rafal Maszkowski <rzm at icm.edu.pl>
|
||||
# - PEAK parameter for setting TBF's burst peak rate
|
||||
# - fix for many config files (use find instead of ls)
|
||||
# v0.5.1- Lubomir Bulej <pallas at kadan.cz>
|
||||
# - fixed little but serious bug in RULE parser
|
||||
# v0.5 - Lubomir Bulej <pallas at kadan.cz>
|
||||
# - added options PARENT, LEAF, ISOLATED and BOUNDED. This allows
|
||||
# (with some attention to config file ordering) for creating
|
||||
# hierarchical structures of shapers with classes able (or unable)
|
||||
# to borrow bandwidth from their parents.
|
||||
# - class ID check allows hexadecimal numbers
|
||||
# - rewritten & simplified RULE parser
|
||||
# - cosmetic changes to improve readability
|
||||
# - reorganization to avoid duplicate code (timecheck etc.)
|
||||
# - timecheck doesn't check classes without TIME fields anymore
|
||||
# v0.4 - Lubomir Bulej <pallas at kadan.cz>
|
||||
# - small bugfix in RULE parsing code
|
||||
# - simplified configuration parsing code
|
||||
# - several small cosmetic changes
|
||||
# - TIME parameter can be now specified more than once allowing you to
|
||||
# differentiate RATE throughout the whole day. Time overlapping is
|
||||
# not checked, first match is taken. Midnight wrap (eg. 20:00-6:00)
|
||||
# is allowed and taken care of.
|
||||
# v0.3a4- fixed small bug in IF operator. Thanks to
|
||||
# Rafal Maszkowski <rzm at icm.edu.pl>
|
||||
# v0.3a3- fixed grep bug when using more than 10 eth devices. Thanks to David
|
||||
# Trcka <trcka at poda.cz>.
|
||||
# v0.3a2- fixed bug in "if" operator. Thanks kad at dgtu.donetsk.ua.
|
||||
# v0.3a - added TIME parameter. Example: TIME=00:00-19:00;64Kbit/6Kbit
|
||||
# So, between 00:00 and 19:00 the RATE will be 64Kbit.
|
||||
# Just start "cbq.init timecheck" periodically from cron
|
||||
# (every 10 minutes for example). DON'T FORGET though, to run
|
||||
# "cbq.init start" for CBQ to initialize.
|
||||
# v0.2 - Some cosmetic changes. Now it is more compatible with old bash
|
||||
# version. Thanks to Stanislav V. Voronyi <stas at cnti.uanet.kharkov.ua>.
|
||||
# v0.1 - First public release
|
||||
#
|
||||
#
|
||||
# README
|
||||
# ------
|
||||
#
|
||||
# First of all - this is just a SIMPLE EXAMPLE of CBQ power.
|
||||
# Don't ask me "why" and "how" :)
|
||||
#
|
||||
# This script is meant to simplify setup and management of relatively simple
|
||||
# CBQ-based traffic control on Linux. Access to advanced networking features
|
||||
# of Linux kernel is provided by "ip" and "tc" utilities from A. Kuznetsov's
|
||||
# iproute2 package, available at ftp://ftp.inr.ac.ru/ip-routing. Because the
|
||||
# utilities serve primarily to translate user wishes to RTNETLINK commands,
|
||||
# their interface is rather spartan, intolerant and requires quite a lot of
|
||||
# typing. And typing is what this script attempts to reduce :)
|
||||
#
|
||||
# The advanced networking stuff in Linux is pretty flexible and this script
|
||||
# aims to bring some of its features to the not-so-hard-core Linux users. Of
|
||||
# course, there is a tradeoff between simplicity and flexibility and you may
|
||||
# realize that the flexibility suffered too much for your needs -- time to
|
||||
# face "ip" and "tc" interface.
|
||||
#
|
||||
# To speed up the "start" command, simple caching was introduced in version
|
||||
# 0.6.4. The caching works so that the sequence of "tc" commands for given
|
||||
# configuration is stored in a file (/var/cache/cbq.init by default) which
|
||||
# is used next time the "start" command is run to avoid repeated parsing of
|
||||
# configuration files. This cache is invalidated whenever any of the CBQ
|
||||
# configuration files changes. If you want to run "cbq.init start" without
|
||||
# caching, run it as "cbq.init start nocache". If you want to force cache
|
||||
# invalidation, run it as "cbq.init start invalidate". Caching is disabled
|
||||
# if you have logging enabled (ie. CBQ_DEBUG is not empty).
|
||||
#
|
||||
# If you only want cqb.init to translate your configuration to "tc" commands,
|
||||
# use "compile" command which will output "tc" commands required to build
|
||||
# your configuration. Bear in mind that "compile" does not check if the "tc"
|
||||
# commands were successful - this is done (in certain places) only when the
|
||||
# "start nocache" command is used, which is also useful when creating the
|
||||
# configuration to check whether it is completely valid.
|
||||
#
|
||||
# All CBQ parameters are valid for Ethernet interfaces only, The script was
|
||||
# tested on various Linux kernel versions from series 2.1 to 2.4 and several
|
||||
# distributions with KSI Linux (Nostromo version) as the premier one.
|
||||
#
|
||||
#
|
||||
# HOW DOES IT WORK?
|
||||
# -----------------
|
||||
#
|
||||
# Every traffic class must be described by a file in the $CBQ_PATH directory
|
||||
# (/etc/sysconfig/cbq by default) - one file per class.
|
||||
#
|
||||
# The config file names must obey mandatory format: cbq-<clsid>.<name> where
|
||||
# <clsid> is two-byte hexadecimal number in range <0002-FFFF> (which in fact
|
||||
# is a CBQ class ID) and <name> is the name of the class -- anything to help
|
||||
# you distinguish the configuration files. For small amount of classes it is
|
||||
# often possible (and convenient) to let <clsid> resemble bandwidth of the
|
||||
# class.
|
||||
#
|
||||
# Example of valid config name:
|
||||
# cbq-1280.My_first_shaper
|
||||
#
|
||||
#
|
||||
# The configuration file may contain the following parameters:
|
||||
#
|
||||
### Device parameters
|
||||
#
|
||||
# DEVICE=<ifname>,<bandwidth>[,<weight>] mandatory
|
||||
# DEVICE=eth0,10Mbit,1Mbit
|
||||
#
|
||||
# <ifname> is the name of the interface you want to control
|
||||
# traffic on, e.g. eth0
|
||||
# <bandwidth> is the physical bandwidth of the device, e.g. for
|
||||
# ethernet 10Mbit or 100Mbit, for arcnet 2Mbit
|
||||
# <weight> is tuning parameter that should be proportional to
|
||||
# <bandwidth>. As a rule of thumb: <weight> = <bandwidth> / 10
|
||||
#
|
||||
# When you have more classes on one interface, it is enough to specify
|
||||
# <bandwidth> [and <weight>] only once, therefore in other files you only
|
||||
# need to set DEVICE=<ifname>.
|
||||
#
|
||||
### Class parameters
|
||||
#
|
||||
# RATE=<speed> mandatory
|
||||
# RATE=5Mbit
|
||||
#
|
||||
# Bandwidth allocated to the class. Traffic going through the class is
|
||||
# shaped to conform to specified rate. You can use Kbit, Mbit or bps,
|
||||
# Kbps and Mbps as suffices. If you don't specify any unit, bits/sec
|
||||
# are used. Also note that "bps" means "bytes per second", not bits.
|
||||
#
|
||||
# WEIGHT=<speed> mandatory
|
||||
# WEIGHT=500Kbit
|
||||
#
|
||||
# Tuning parameter that should be proportional to RATE. As a rule
|
||||
# of thumb, use WEIGHT ~= RATE / 10.
|
||||
#
|
||||
# PRIO=<1-8> optional, default 5
|
||||
# PRIO=5
|
||||
#
|
||||
# Priority of class traffic. The higher the number, the lesser
|
||||
# the priority. Priority of 5 is just fine.
|
||||
#
|
||||
# PARENT=<clsid> optional, default not set
|
||||
# PARENT=1280
|
||||
#
|
||||
# Specifies ID of the parent class to which you want this class be
|
||||
# attached. You might want to use LEAF=none for the parent class as
|
||||
# mentioned below. By using this parameter and carefully ordering the
|
||||
# configuration files, it is possible to create simple hierarchical
|
||||
# structures of CBQ classes. The ordering is important so that parent
|
||||
# classes are constructed prior to their children.
|
||||
#
|
||||
# LEAF=none|tbf|sfq optional, default "tbf"
|
||||
#
|
||||
# Tells the script to attach specified leaf queueing discipline to CBQ
|
||||
# class. By default, TBF is used. Note that attaching TBF to CBQ class
|
||||
# shapes the traffic to conform to TBF parameters and prevents the class
|
||||
# from borrowing bandwidth from its parent even if you have BOUNDED set
|
||||
# to "no". To allow the class to borrow bandwith (provided it is not
|
||||
# bounded), you must set LEAF to "none" or "sfq".
|
||||
#
|
||||
# If you want to ensure (approximately) fair sharing of bandwidth among
|
||||
# several hosts in the same class, you might want to specify LEAF=sfq to
|
||||
# attach SFQ as leaf queueing discipline to that class.
|
||||
#
|
||||
# BOUNDED=yes|no optional, default "yes"
|
||||
#
|
||||
# If set to "yes", the class is not allowed to borrow bandwidth from
|
||||
# its parent class in overlimit situation. If set to "no", the class
|
||||
# will be allowed to borrow bandwidth from its parent.
|
||||
#
|
||||
# Note: Don't forget to set LEAF to "none" or "sfq", otherwise the class will
|
||||
# have TBF attached to itself and will not be able to borrow unused
|
||||
# bandwith from its parent.
|
||||
#
|
||||
# ISOLATED=yes|no optional, default "no"
|
||||
#
|
||||
# If set to "yes", the class will not lend unused bandwidth to
|
||||
# its children.
|
||||
#
|
||||
### TBF qdisc parameters
|
||||
#
|
||||
# BUFFER=<bytes>[/<bytes>] optional, default "10Kb/8"
|
||||
#
|
||||
# This parameter controls the depth of the token bucket. In other
|
||||
# words it represents the maximal burst size the class can send.
|
||||
# The optional part of parameter is used to determine the length
|
||||
# of intervals in packet sizes, for which the transmission times
|
||||
# are kept.
|
||||
#
|
||||
# LIMIT=<bytes> optional, default "15Kb"
|
||||
#
|
||||
# This parameter determines the maximal length of backlog. If
|
||||
# the queue contains more data than specified by LIMIT, the
|
||||
# newly arriving packets are dropped. The length of backlog
|
||||
# determines queue latency in case of congestion.
|
||||
#
|
||||
# PEAK=<speed> optional, default not set
|
||||
#
|
||||
# Maximal peak rate for short-term burst traffic. This allows you
|
||||
# to control the absolute peak rate the class can send at, because
|
||||
# single TBF that allows 256Kbit/s would of course allow rate of
|
||||
# 512Kbit for half a second or 1Mbit for a quarter of second.
|
||||
#
|
||||
# MTU=<bytes> optional, default "1500"
|
||||
#
|
||||
# Maximum number of bytes that can be sent at once over the
|
||||
# physical medium. This parameter is required when you specify
|
||||
# PEAK parameter. It defaults to MTU of ethernet - for other
|
||||
# media types you might want to change it.
|
||||
#
|
||||
# Note: Setting TBF as leaf qdisc will effectively prevent the class from
|
||||
# borrowing bandwidth from the ancestor class, because even if the
|
||||
# class allows more traffic to pass through, it is then shaped to
|
||||
# conform to TBF.
|
||||
#
|
||||
### SFQ qdisc parameters
|
||||
#
|
||||
# The SFQ queueing discipline is a cheap way for sharing class bandwidth
|
||||
# among several hosts. As it is stochastic, the fairness is approximate but
|
||||
# it will do the job in most cases. If you want real fairness, you should
|
||||
# probably use WRR (weighted round robin) or WFQ queueing disciplines. Note
|
||||
# that SFQ does not do any traffic shaping - the shaping is done by the CBQ
|
||||
# class the SFQ is attached to.
|
||||
#
|
||||
# QUANTUM=<bytes> optional, default not set
|
||||
#
|
||||
# This parameter should not be set lower than link MTU, for ethernet
|
||||
# it is 1500b, or (with MAC header) 1514b which is the value used
|
||||
# in Alexey Kuznetsov's examples.
|
||||
#
|
||||
# PERTURB=<seconds> optional, default "10"
|
||||
#
|
||||
# Period of hash function perturbation. If unset, hash reconfiguration
|
||||
# will never take place which is what you probably don't want. The
|
||||
# default value of 10 seconds is probably a good one.
|
||||
#
|
||||
### Filter parameters
|
||||
#
|
||||
# RULE=[[saddr[/prefix]][:port[/mask]],][daddr[/prefix]][:port[/mask]]
|
||||
#
|
||||
# These parameters make up "u32" filter rules that select traffic for
|
||||
# each of the classes. You can use multiple RULE fields per config.
|
||||
#
|
||||
# The optional port mask should only be used by advanced users who
|
||||
# understand how the u32 filter works.
|
||||
#
|
||||
# Some examples:
|
||||
#
|
||||
# RULE=10.1.1.0/24:80
|
||||
# selects traffic going to port 80 in network 10.1.1.0
|
||||
#
|
||||
# RULE=10.2.2.5
|
||||
# selects traffic going to any port on single host 10.2.2.5
|
||||
#
|
||||
# RULE=10.2.2.5:20/0xfffe
|
||||
# selects traffic going to ports 20 and 21 on host 10.2.2.5
|
||||
#
|
||||
# RULE=:25,10.2.2.128/26:5000
|
||||
# selects traffic going from anywhere on port 50 to
|
||||
# port 5000 in network 10.2.2.128
|
||||
#
|
||||
# RULE=10.5.5.5:80,
|
||||
# selects traffic going from port 80 of single host 10.5.5.5
|
||||
#
|
||||
#
|
||||
#
|
||||
# REALM=[srealm,][drealm]
|
||||
#
|
||||
# These parameters make up "route" filter rules that classify traffic
|
||||
# according to packet source/destination realms. For information about
|
||||
# realms, see Alexey Kuznetsov's IP Command Reference. This script
|
||||
# does not define any realms, it justs builds "tc filter" commands
|
||||
# for you if you need to classify traffic this way.
|
||||
#
|
||||
# Realm is either a decimal number or a string referencing entry in
|
||||
# /etc/iproute2/rt_realms (usually).
|
||||
#
|
||||
# Some examples:
|
||||
#
|
||||
# REALM=russia,internet
|
||||
# selects traffic going from realm "russia" to realm "internet"
|
||||
#
|
||||
# REALM=freenet,
|
||||
# selects traffic going from realm "freenet"
|
||||
#
|
||||
# REALM=10
|
||||
# selects traffic going to realm 10
|
||||
#
|
||||
#
|
||||
#
|
||||
# MARK=<mark>
|
||||
#
|
||||
# These parameters make up "fw" filter rules that select traffic for
|
||||
# each of the classes accoring to firewall "mark". Mark is a decimal
|
||||
# number packets are tagged with if firewall rules say so. You can
|
||||
# use multiple MARK fields per config.
|
||||
#
|
||||
#
|
||||
# Note: Rules for different filter types can be combined. Attention must be
|
||||
# paid to the priority of filter rules, which can be set below using
|
||||
# PRIO_{RULE,MARK,REALM} variables.
|
||||
#
|
||||
### Time ranging parameters
|
||||
#
|
||||
# TIME=[<dow>,<dow>, ...,<dow>/]<from>-<till>;<rate>/<weight>[/<peak>]
|
||||
# TIME=0,1,2,5/18:00-06:00;256Kbit/25Kbit
|
||||
# TIME=60123/18:00-06:00;256Kbit/25Kbit
|
||||
# TIME=18:00-06:00;256Kbit/25Kbit
|
||||
#
|
||||
# This parameter allows you to differentiate the class bandwidth
|
||||
# throughout the day. You can specify multiple TIME parameters, if
|
||||
# the times overlap, last match is taken. The fields <rate>, <weight>
|
||||
# and <peak> correspond to parameters RATE, WEIGHT and PEAK (which
|
||||
# is optional and applies to TBF leaf qdisc only).
|
||||
#
|
||||
# You can also specify days of week when the TIME rule applies. <dow>
|
||||
# is numeric, 0 corresponds to sunday, 1 corresponds to monday, etc.
|
||||
#
|
||||
###
|
||||
#
|
||||
# Sample configuration file: cbq-1280.My_first_shaper
|
||||
#
|
||||
# --------------------------------------------------------------------------
|
||||
# DEVICE=eth0,10Mbit,1Mbit
|
||||
# RATE=128Kbit
|
||||
# WEIGHT=10Kbit
|
||||
# PRIO=5
|
||||
# RULE=192.128.1.0/24
|
||||
# --------------------------------------------------------------------------
|
||||
#
|
||||
# The configuration says that we will control traffic on 10Mbit ethernet
|
||||
# device eth0 and the traffic going to network 192.168.1.0 will be
|
||||
# processed with priority 5 and shaped to rate of 128Kbit.
|
||||
#
|
||||
# Note that you can control outgoing traffic only. If you want to control
|
||||
# traffic in both directions, you must set up CBQ for both interfaces.
|
||||
#
|
||||
# Consider the following example:
|
||||
#
|
||||
# +---------+ 192.168.1.1
|
||||
# BACKBONE -----eth0-| linux |-eth1------*-[client]
|
||||
# +---------+
|
||||
#
|
||||
# Imagine you want to shape traffic from backbone to the client to 28Kbit
|
||||
# and traffic in the opposite direction to 128Kbit. You need to setup CBQ
|
||||
# on both eth0 and eth1 interfaces, thus you need two config files:
|
||||
#
|
||||
# cbq-028.backbone-client
|
||||
# --------------------------------------------------------------------------
|
||||
# DEVICE=eth1,10Mbit,1Mbit
|
||||
# RATE=28Kbit
|
||||
# WEIGHT=2Kbit
|
||||
# PRIO=5
|
||||
# RULE=192.168.1.1
|
||||
# --------------------------------------------------------------------------
|
||||
#
|
||||
# cbq-128.client-backbone
|
||||
# --------------------------------------------------------------------------
|
||||
# DEVICE=eth0,10Mbit,1Mbit
|
||||
# RATE=128Kbit
|
||||
# WEIGHT=10Kbit
|
||||
# PRIO=5
|
||||
# RULE=192.168.1.1,
|
||||
# --------------------------------------------------------------------------
|
||||
#
|
||||
# Pay attention to comma "," in the RULE field - it denotes source address!
|
||||
#
|
||||
# Enjoy.
|
||||
#
|
||||
#############################################################################
|
||||
|
||||
export LC_ALL=C
|
||||
|
||||
### Command locations
|
||||
TC=/sbin/tc
|
||||
IP=/sbin/ip
|
||||
MP=/sbin/modprobe
|
||||
|
||||
### Default filter priorities (must be different)
|
||||
PRIO_RULE_DEFAULT=${PRIO_RULE:-100}
|
||||
PRIO_MARK_DEFAULT=${PRIO_MARK:-200}
|
||||
PRIO_REALM_DEFAULT=${PRIO_REALM:-300}
|
||||
|
||||
### Default CBQ_PATH & CBQ_CACHE settings
|
||||
CBQ_PATH=${CBQ_PATH:-/etc/sysconfig/cbq}
|
||||
CBQ_CACHE=${CBQ_CACHE:-/var/cache/cbq.init}
|
||||
|
||||
### Uncomment to enable logfile for debugging
|
||||
#CBQ_DEBUG="/var/run/cbq-$1"
|
||||
|
||||
### Modules to probe for. Uncomment the last CBQ_PROBE
|
||||
### line if you have QoS support compiled into kernel
|
||||
CBQ_PROBE="sch_cbq sch_tbf sch_sfq sch_prio"
|
||||
CBQ_PROBE="$CBQ_PROBE cls_fw cls_u32 cls_route"
|
||||
#CBQ_PROBE=""
|
||||
|
||||
### Keywords required for qdisc & class configuration
|
||||
CBQ_WORDS="DEVICE|RATE|WEIGHT|PRIO|PARENT|LEAF|BOUNDED|ISOLATED"
|
||||
CBQ_WORDS="$CBQ_WORDS|PRIO_MARK|PRIO_RULE|PRIO_REALM|BUFFER"
|
||||
CBQ_WORDS="$CBQ_WORDS|LIMIT|PEAK|MTU|QUANTUM|PERTURB"
|
||||
|
||||
### Source AVPKT if it exists
|
||||
[ -r /etc/sysconfig/cbq/avpkt ] && . /etc/sysconfig/cbq/avpkt
|
||||
AVPKT=${AVPKT:-3000}
|
||||
|
||||
|
||||
#############################################################################
|
||||
############################# SUPPORT FUNCTIONS #############################
|
||||
#############################################################################
|
||||
|
||||
### Get list of network devices
|
||||
cbq_device_list () {
|
||||
ip link show| sed -n "/^[0-9]/ \
|
||||
{ s/^[0-9]\+: \([a-z0-9._]\+\)[:@].*/\1/; p; }"
|
||||
} # cbq_device_list
|
||||
|
||||
|
||||
### Remove root class from device $1
|
||||
cbq_device_off () {
|
||||
tc qdisc del dev $1 root 2> /dev/null
|
||||
} # cbq_device_off
|
||||
|
||||
|
||||
### Remove CBQ from all devices
|
||||
cbq_off () {
|
||||
for dev in `cbq_device_list`; do
|
||||
cbq_device_off $dev
|
||||
done
|
||||
} # cbq_off
|
||||
|
||||
|
||||
### Prefixed message
|
||||
cbq_message () {
|
||||
echo -e "**CBQ: $@"
|
||||
} # cbq_message
|
||||
|
||||
### Failure message
|
||||
cbq_failure () {
|
||||
cbq_message "$@"
|
||||
exit 1
|
||||
} # cbq_failure
|
||||
|
||||
### Failure w/ cbq-off
|
||||
cbq_fail_off () {
|
||||
cbq_message "$@"
|
||||
cbq_off
|
||||
exit 1
|
||||
} # cbq_fail_off
|
||||
|
||||
|
||||
### Convert time to absolute value
|
||||
cbq_time2abs () {
|
||||
local min=${1##*:}; min=${min##0}
|
||||
local hrs=${1%%:*}; hrs=${hrs##0}
|
||||
echo $[hrs*60 + min]
|
||||
} # cbq_time2abs
|
||||
|
||||
|
||||
### Display CBQ setup
|
||||
cbq_show () {
|
||||
for dev in `cbq_device_list`; do
|
||||
[ `tc qdisc show dev $dev| wc -l` -eq 0 ] && continue
|
||||
echo -e "### $dev: queueing disciplines\n"
|
||||
tc $1 qdisc show dev $dev; echo
|
||||
|
||||
[ `tc class show dev $dev| wc -l` -eq 0 ] && continue
|
||||
echo -e "### $dev: traffic classes\n"
|
||||
tc $1 class show dev $dev; echo
|
||||
|
||||
[ `tc filter show dev $dev| wc -l` -eq 0 ] && continue
|
||||
echo -e "### $dev: filtering rules\n"
|
||||
tc $1 filter show dev $dev; echo
|
||||
done
|
||||
} # cbq_show
|
||||
|
||||
|
||||
### Check configuration and load DEVICES, DEVFIELDS and CLASSLIST from $1
|
||||
cbq_init () {
|
||||
### Get a list of configured classes
|
||||
CLASSLIST=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \
|
||||
-not -name '*~' -printf "%f\n"| sort`
|
||||
[ -z "$CLASSLIST" ] &&
|
||||
cbq_failure "no configuration files found in $1!"
|
||||
|
||||
### Gather all DEVICE fields from $1/cbq-*
|
||||
DEVFIELDS=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \
|
||||
-not -name '*~' | xargs sed -n 's/#.*//; \
|
||||
s/[[:space:]]//g; /^DEVICE=[^,]*,[^,]*\(,[^,]*\)\?/ \
|
||||
{ s/.*=//; p; }'| sort -u`
|
||||
[ -z "$DEVFIELDS" ] &&
|
||||
cbq_failure "no DEVICE field found in $1/cbq-*!"
|
||||
|
||||
### Check for different DEVICE fields for the same device
|
||||
DEVICES=`echo "$DEVFIELDS"| sed 's/,.*//'| sort -u`
|
||||
[ `echo "$DEVICES"| wc -l` -ne `echo "$DEVFIELDS"| wc -l` ] &&
|
||||
cbq_failure "different DEVICE fields for single device!\n$DEVFIELDS"
|
||||
} # cbq_init
|
||||
|
||||
|
||||
### Load class configuration from $1/$2
|
||||
cbq_load_class () {
|
||||
CLASS=`echo $2| sed 's/^cbq-0*//; s/^\([0-9a-fA-F]\+\).*/\1/'`
|
||||
CFILE=`sed -n 's/#.*//; s/[[:space:]]//g; /^[[:alnum:]_]\+=[[:alnum:].,:;/*@-_]\+$/ p' $1/$2`
|
||||
|
||||
### Check class number
|
||||
IDVAL=`/usr/bin/printf "%d" 0x$CLASS 2> /dev/null`
|
||||
[ $? -ne 0 -o $IDVAL -lt 2 -o $IDVAL -gt 65535 ] &&
|
||||
cbq_fail_off "class ID of $2 must be in range <0002-FFFF>!"
|
||||
|
||||
### Set defaults & load class
|
||||
RATE=""; WEIGHT=""; PARENT=""; PRIO=5
|
||||
LEAF=tbf; BOUNDED=yes; ISOLATED=no
|
||||
BUFFER=10Kb/8; LIMIT=15Kb; MTU=1500
|
||||
PEAK=""; PERTURB=10; QUANTUM=""
|
||||
|
||||
PRIO_RULE=$PRIO_RULE_DEFAULT
|
||||
PRIO_MARK=$PRIO_MARK_DEFAULT
|
||||
PRIO_REALM=$PRIO_REALM_DEFAULT
|
||||
|
||||
eval `echo "$CFILE"| grep -E "^($CBQ_WORDS)="`
|
||||
|
||||
### Require RATE/WEIGHT
|
||||
[ -z "$RATE" -o -z "$WEIGHT" ] &&
|
||||
cbq_fail_off "missing RATE or WEIGHT in $2!"
|
||||
|
||||
### Class device
|
||||
DEVICE=${DEVICE%%,*}
|
||||
[ -z "$DEVICE" ] && cbq_fail_off "missing DEVICE field in $2!"
|
||||
|
||||
BANDWIDTH=`echo "$DEVFIELDS"| sed -n "/^$DEVICE,/ \
|
||||
{ s/[^,]*,\([^,]*\).*/\1/; p; q; }"`
|
||||
|
||||
### Convert to "tc" options
|
||||
PEAK=${PEAK:+peakrate $PEAK}
|
||||
PERTURB=${PERTURB:+perturb $PERTURB}
|
||||
QUANTUM=${QUANTUM:+quantum $QUANTUM}
|
||||
|
||||
[ "$BOUNDED" = "no" ] && BOUNDED="" || BOUNDED="bounded"
|
||||
[ "$ISOLATED" = "yes" ] && ISOLATED="isolated" || ISOLATED=""
|
||||
} # cbq_load_class
|
||||
|
||||
|
||||
#############################################################################
|
||||
#################################### INIT ###################################
|
||||
#############################################################################
|
||||
|
||||
### Check for presence of ip-route2 in usual place
|
||||
[ -x $TC -a -x $IP ] ||
|
||||
cbq_failure "ip-route2 utilities not installed or executable!"
|
||||
|
||||
|
||||
### ip/tc wrappers
|
||||
if [ "$1" = "compile" ]; then
|
||||
### no module probing
|
||||
CBQ_PROBE=""
|
||||
|
||||
ip () {
|
||||
$IP "$@"
|
||||
} # ip
|
||||
|
||||
### echo-only version of "tc" command
|
||||
tc () {
|
||||
echo "$TC $@"
|
||||
} # tc
|
||||
|
||||
elif [ -n "$CBQ_DEBUG" ]; then
|
||||
echo -e "# `date`" > $CBQ_DEBUG
|
||||
|
||||
### Logging version of "ip" command
|
||||
ip () {
|
||||
echo -e "\n# ip $@" >> $CBQ_DEBUG
|
||||
$IP "$@" 2>&1 | tee -a $CBQ_DEBUG
|
||||
} # ip
|
||||
|
||||
### Logging version of "tc" command
|
||||
tc () {
|
||||
echo -e "\n# tc $@" >> $CBQ_DEBUG
|
||||
$TC "$@" 2>&1 | tee -a $CBQ_DEBUG
|
||||
} # tc
|
||||
else
|
||||
### Default wrappers
|
||||
|
||||
ip () {
|
||||
$IP "$@"
|
||||
} # ip
|
||||
|
||||
tc () {
|
||||
$TC "$@"
|
||||
} # tc
|
||||
fi # ip/tc wrappers
|
||||
|
||||
|
||||
case "$1" in
|
||||
|
||||
#############################################################################
|
||||
############################### START/COMPILE ###############################
|
||||
#############################################################################
|
||||
|
||||
start|compile)
|
||||
|
||||
### Probe QoS modules (start only)
|
||||
for module in $CBQ_PROBE; do
|
||||
$MP $module || cbq_failure "failed to load module $module"
|
||||
done
|
||||
|
||||
### If we are in compile/nocache/logging mode, don't bother with cache
|
||||
if [ "$1" != "compile" -a "$2" != "nocache" -a -z "$CBQ_DEBUG" ]; then
|
||||
VALID=1
|
||||
|
||||
### validate the cache
|
||||
[ "$2" = "invalidate" -o ! -f $CBQ_CACHE ] && VALID=0
|
||||
if [ $VALID -eq 1 ]; then
|
||||
[ `find $CBQ_PATH -maxdepth 1 -newer $CBQ_CACHE| \
|
||||
wc -l` -gt 0 ] && VALID=0
|
||||
fi
|
||||
|
||||
### compile the config if the cache is invalid
|
||||
if [ $VALID -ne 1 ]; then
|
||||
$0 compile > $CBQ_CACHE ||
|
||||
cbq_fail_off "failed to compile CBQ configuration!"
|
||||
fi
|
||||
|
||||
### run the cached commands
|
||||
exec /bin/sh $CBQ_CACHE 2> /dev/null
|
||||
fi
|
||||
|
||||
### Load DEVICES, DEVFIELDS and CLASSLIST
|
||||
cbq_init $CBQ_PATH
|
||||
|
||||
|
||||
### Setup root qdisc on all configured devices
|
||||
for dev in $DEVICES; do
|
||||
### Retrieve device bandwidth and, optionally, weight
|
||||
DEVTEMP=`echo "$DEVFIELDS"| sed -n "/^$dev,/ { s/$dev,//; p; q; }"`
|
||||
DEVBWDT=${DEVTEMP%%,*}; DEVWGHT=${DEVTEMP##*,}
|
||||
[ "$DEVBWDT" = "$DEVWGHT" ] && DEVWGHT=""
|
||||
|
||||
### Device bandwidth is required
|
||||
if [ -z "$DEVBWDT" ]; then
|
||||
cbq_message "could not determine bandwidth for device $dev!"
|
||||
cbq_failure "please set up the DEVICE fields properly!"
|
||||
fi
|
||||
|
||||
### Check if the device is there
|
||||
ip link show $dev &> /dev/null ||
|
||||
cbq_fail_off "device $dev not found!"
|
||||
|
||||
### Remove old root qdisc from device
|
||||
cbq_device_off $dev
|
||||
|
||||
|
||||
### Setup root qdisc + class for device
|
||||
tc qdisc add dev $dev root handle 1 cbq \
|
||||
bandwidth $DEVBWDT avpkt $AVPKT cell 8
|
||||
|
||||
### Set weight of the root class if set
|
||||
[ -n "$DEVWGHT" ] &&
|
||||
tc class change dev $dev root cbq weight $DEVWGHT allot 1514
|
||||
|
||||
[ "$1" = "compile" ] && echo
|
||||
done # dev
|
||||
|
||||
|
||||
### Setup traffic classes
|
||||
for classfile in $CLASSLIST; do
|
||||
cbq_load_class $CBQ_PATH $classfile
|
||||
|
||||
### Create the class
|
||||
tc class add dev $DEVICE parent 1:$PARENT classid 1:$CLASS cbq \
|
||||
bandwidth $BANDWIDTH rate $RATE weight $WEIGHT prio $PRIO \
|
||||
allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED ||
|
||||
cbq_fail_off "failed to add class $CLASS with parent $PARENT on $DEVICE!"
|
||||
|
||||
### Create leaf qdisc if set
|
||||
if [ "$LEAF" = "tbf" ]; then
|
||||
tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS tbf \
|
||||
rate $RATE buffer $BUFFER limit $LIMIT mtu $MTU $PEAK
|
||||
elif [ "$LEAF" = "sfq" ]; then
|
||||
tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS sfq \
|
||||
$PERTURB $QUANTUM
|
||||
fi
|
||||
|
||||
|
||||
### Create fw filter for MARK fields
|
||||
for mark in `echo "$CFILE"| sed -n '/^MARK/ { s/.*=//; p; }'`; do
|
||||
### Attach fw filter to root class
|
||||
tc filter add dev $DEVICE parent 1:0 protocol ip \
|
||||
prio $PRIO_MARK handle $mark fw classid 1:$CLASS
|
||||
done ### mark
|
||||
|
||||
### Create route filter for REALM fields
|
||||
for realm in `echo "$CFILE"| sed -n '/^REALM/ { s/.*=//; p; }'`; do
|
||||
### Split realm into source & destination realms
|
||||
SREALM=${realm%%,*}; DREALM=${realm##*,}
|
||||
[ "$SREALM" = "$DREALM" ] && SREALM=""
|
||||
|
||||
### Convert asterisks to empty strings
|
||||
SREALM=${SREALM#\*}; DREALM=${DREALM#\*}
|
||||
|
||||
### Attach route filter to the root class
|
||||
tc filter add dev $DEVICE parent 1:0 protocol ip \
|
||||
prio $PRIO_REALM route ${SREALM:+from $SREALM} \
|
||||
${DREALM:+to $DREALM} classid 1:$CLASS
|
||||
done ### realm
|
||||
|
||||
### Create u32 filter for RULE fields
|
||||
for rule in `echo "$CFILE"| sed -n '/^RULE/ { s/.*=//; p; }'`; do
|
||||
### Split rule into source & destination
|
||||
SRC=${rule%%,*}; DST=${rule##*,}
|
||||
[ "$SRC" = "$rule" ] && SRC=""
|
||||
|
||||
|
||||
### Split destination into address, port & mask fields
|
||||
DADDR=${DST%%:*}; DTEMP=${DST##*:}
|
||||
[ "$DADDR" = "$DST" ] && DTEMP=""
|
||||
|
||||
DPORT=${DTEMP%%/*}; DMASK=${DTEMP##*/}
|
||||
[ "$DPORT" = "$DTEMP" ] && DMASK="0xffff"
|
||||
|
||||
|
||||
### Split up source (if specified)
|
||||
SADDR=""; SPORT=""
|
||||
if [ -n "$SRC" ]; then
|
||||
SADDR=${SRC%%:*}; STEMP=${SRC##*:}
|
||||
[ "$SADDR" = "$SRC" ] && STEMP=""
|
||||
|
||||
SPORT=${STEMP%%/*}; SMASK=${STEMP##*/}
|
||||
[ "$SPORT" = "$STEMP" ] && SMASK="0xffff"
|
||||
fi
|
||||
|
||||
|
||||
### Convert asterisks to empty strings
|
||||
SADDR=${SADDR#\*}; DADDR=${DADDR#\*}
|
||||
|
||||
### Compose u32 filter rules
|
||||
u32_s="${SPORT:+match ip sport $SPORT $SMASK}"
|
||||
u32_s="${SADDR:+match ip src $SADDR} $u32_s"
|
||||
u32_d="${DPORT:+match ip dport $DPORT $DMASK}"
|
||||
u32_d="${DADDR:+match ip dst $DADDR} $u32_d"
|
||||
|
||||
### Uncomment the following if you want to see parsed rules
|
||||
#echo "$rule: $u32_s $u32_d"
|
||||
|
||||
### Attach u32 filter to the appropriate class
|
||||
tc filter add dev $DEVICE parent 1:0 protocol ip \
|
||||
prio $PRIO_RULE u32 $u32_s $u32_d classid 1:$CLASS
|
||||
done ### rule
|
||||
|
||||
[ "$1" = "compile" ] && echo
|
||||
done ### classfile
|
||||
;;
|
||||
|
||||
|
||||
#############################################################################
|
||||
################################# TIME CHECK ################################
|
||||
#############################################################################
|
||||
|
||||
timecheck)
|
||||
|
||||
### Get time + weekday
|
||||
TIME_TMP=`date +%w/%k:%M`
|
||||
TIME_DOW=${TIME_TMP%%/*}
|
||||
TIME_NOW=${TIME_TMP##*/}
|
||||
|
||||
### Load DEVICES, DEVFIELDS and CLASSLIST
|
||||
cbq_init $CBQ_PATH
|
||||
|
||||
### Run through all classes
|
||||
for classfile in $CLASSLIST; do
|
||||
### Gather all TIME rules from class config
|
||||
TIMESET=`sed -n 's/#.*//; s/[[:space:]]//g; /^TIME/ { s/.*=//; p; }' \
|
||||
$CBQ_PATH/$classfile`
|
||||
[ -z "$TIMESET" ] && continue
|
||||
|
||||
MATCH=0; CHANGE=0
|
||||
for timerule in $TIMESET; do
|
||||
TIME_ABS=`cbq_time2abs $TIME_NOW`
|
||||
|
||||
### Split TIME rule to pieces
|
||||
TIMESPEC=${timerule%%;*}; PARAMS=${timerule##*;}
|
||||
WEEKDAYS=${TIMESPEC%%/*}; INTERVAL=${TIMESPEC##*/}
|
||||
BEG_TIME=${INTERVAL%%-*}; END_TIME=${INTERVAL##*-}
|
||||
|
||||
### Check the day-of-week (if present)
|
||||
[ "$WEEKDAYS" != "$INTERVAL" -a \
|
||||
-n "${WEEKDAYS##*$TIME_DOW*}" ] && continue
|
||||
|
||||
### Compute interval boundaries
|
||||
BEG_ABS=`cbq_time2abs $BEG_TIME`
|
||||
END_ABS=`cbq_time2abs $END_TIME`
|
||||
|
||||
### Midnight wrap fixup
|
||||
if [ $BEG_ABS -gt $END_ABS ]; then
|
||||
[ $TIME_ABS -le $END_ABS ] &&
|
||||
TIME_ABS=$[TIME_ABS + 24*60]
|
||||
|
||||
END_ABS=$[END_ABS + 24*60]
|
||||
fi
|
||||
|
||||
### If the time matches, remember params and set MATCH flag
|
||||
if [ $TIME_ABS -ge $BEG_ABS -a $TIME_ABS -lt $END_ABS ]; then
|
||||
TMP_RATE=${PARAMS%%/*}; PARAMS=${PARAMS#*/}
|
||||
TMP_WGHT=${PARAMS%%/*}; TMP_PEAK=${PARAMS##*/}
|
||||
|
||||
[ "$TMP_PEAK" = "$TMP_WGHT" ] && TMP_PEAK=""
|
||||
TMP_PEAK=${TMP_PEAK:+peakrate $TMP_PEAK}
|
||||
|
||||
MATCH=1
|
||||
fi
|
||||
done ### timerule
|
||||
|
||||
|
||||
cbq_load_class $CBQ_PATH $classfile
|
||||
|
||||
### Get current RATE of CBQ class
|
||||
RATE_NOW=`tc class show dev $DEVICE| sed -n \
|
||||
"/cbq 1:$CLASS / { s/.*rate //; s/ .*//; p; q; }"`
|
||||
[ -z "$RATE_NOW" ] && continue
|
||||
|
||||
### Time interval matched
|
||||
if [ $MATCH -ne 0 ]; then
|
||||
|
||||
### Check if there is any change in class RATE
|
||||
if [ "$RATE_NOW" != "$TMP_RATE" ]; then
|
||||
NEW_RATE="$TMP_RATE"
|
||||
NEW_WGHT="$TMP_WGHT"
|
||||
NEW_PEAK="$TMP_PEAK"
|
||||
CHANGE=1
|
||||
fi
|
||||
|
||||
### Match not found, reset to default RATE if necessary
|
||||
elif [ "$RATE_NOW" != "$RATE" ]; then
|
||||
NEW_WGHT="$WEIGHT"
|
||||
NEW_RATE="$RATE"
|
||||
NEW_PEAK="$PEAK"
|
||||
CHANGE=1
|
||||
fi
|
||||
|
||||
### If there are no changes, go for next class
|
||||
[ $CHANGE -eq 0 ] && continue
|
||||
|
||||
### Replace CBQ class
|
||||
tc class replace dev $DEVICE classid 1:$CLASS cbq \
|
||||
bandwidth $BANDWIDTH rate $NEW_RATE weight $NEW_WGHT prio $PRIO \
|
||||
allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED
|
||||
|
||||
### Replace leaf qdisc (if any)
|
||||
if [ "$LEAF" = "tbf" ]; then
|
||||
tc qdisc replace dev $DEVICE handle $CLASS tbf \
|
||||
rate $NEW_RATE buffer $BUFFER limit $LIMIT mtu $MTU $NEW_PEAK
|
||||
fi
|
||||
|
||||
cbq_message "$TIME_NOW: class $CLASS on $DEVICE changed rate ($RATE_NOW -> $NEW_RATE)"
|
||||
done ### class file
|
||||
;;
|
||||
|
||||
|
||||
#############################################################################
|
||||
################################## THE REST #################################
|
||||
#############################################################################
|
||||
|
||||
stop)
|
||||
cbq_off
|
||||
;;
|
||||
|
||||
list)
|
||||
cbq_show
|
||||
;;
|
||||
|
||||
stats)
|
||||
cbq_show -s
|
||||
;;
|
||||
|
||||
restart)
|
||||
shift
|
||||
$0 stop
|
||||
$0 start "$@"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Usage: `basename $0` {start|compile|stop|restart|timecheck|list|stats}"
|
||||
esac
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
#! /bin/sh
|
||||
|
||||
TC=/home/root/tc
|
||||
IP=/home/root/ip
|
||||
DEVICE=eth1
|
||||
BANDWIDTH="bandwidth 10Mbit"
|
||||
|
||||
# Attach CBQ on $DEVICE. It will have handle 1:.
|
||||
# $BANDWIDTH is real $DEVICE bandwidth (10Mbit).
|
||||
# avpkt is average packet size.
|
||||
# mpu is minimal packet size.
|
||||
|
||||
$TC qdisc add dev $DEVICE root handle 1: cbq \
|
||||
$BANDWIDTH avpkt 1000 mpu 64
|
||||
|
||||
# Create root class with classid 1:1. This step is not necessary.
|
||||
# bandwidth is the same as on CBQ itself.
|
||||
# rate == all the bandwidth
|
||||
# allot is MTU + MAC header
|
||||
# maxburst measure allowed class burstiness (please,read S.Floyd and VJ papers)
|
||||
# est 1sec 8sec means, that kernel will evaluate average rate
|
||||
# on this class with period 1sec and time constant 8sec.
|
||||
# This rate is viewed with "tc -s class ls dev $DEVICE"
|
||||
|
||||
$TC class add dev $DEVICE parent 1:0 classid :1 est 1sec 8sec cbq \
|
||||
$BANDWIDTH rate 10Mbit allot 1514 maxburst 50 avpkt 1000
|
||||
|
||||
# Bulk.
|
||||
# New parameters are:
|
||||
# weight, which is set to be proportional to
|
||||
# "rate". It is not necessary, weight=1 will work as well.
|
||||
# defmap and split say that best effort ttraffic, not classfied
|
||||
# by another means will fall to this class.
|
||||
|
||||
$TC class add dev $DEVICE parent 1:1 classid :2 est 1sec 8sec cbq \
|
||||
$BANDWIDTH rate 4Mbit allot 1514 weight 500Kbit \
|
||||
prio 6 maxburst 50 avpkt 1000 split 1:0 defmap ff3d
|
||||
|
||||
# OPTIONAL.
|
||||
# Attach "sfq" qdisc to this class, quantum is MTU, perturb
|
||||
# gives period of hash function perturbation in seconds.
|
||||
#
|
||||
$TC qdisc add dev $DEVICE parent 1:2 sfq quantum 1514b perturb 15
|
||||
|
||||
# Interactive-burst class
|
||||
|
||||
$TC class add dev $DEVICE parent 1:1 classid :3 est 2sec 16sec cbq \
|
||||
$BANDWIDTH rate 1Mbit allot 1514 weight 100Kbit \
|
||||
prio 2 maxburst 100 avpkt 1000 split 1:0 defmap c0
|
||||
|
||||
$TC qdisc add dev $DEVICE parent 1:3 sfq quantum 1514b perturb 15
|
||||
|
||||
# Background.
|
||||
|
||||
$TC class add dev $DEVICE parent 1:1 classid :4 est 1sec 8sec cbq \
|
||||
$BANDWIDTH rate 100Kbit allot 1514 weight 10Mbit \
|
||||
prio 7 maxburst 10 avpkt 1000 split 1:0 defmap 2
|
||||
|
||||
$TC qdisc add dev $DEVICE parent 1:4 sfq quantum 1514b perturb 15
|
||||
|
||||
# Realtime class for RSVP
|
||||
|
||||
$TC class add dev $DEVICE parent 1:1 classid 1:7FFE cbq \
|
||||
rate 5Mbit $BANDWIDTH allot 1514b avpkt 1000 \
|
||||
maxburst 20
|
||||
|
||||
# Reclassified realtime traffic
|
||||
#
|
||||
# New element: split is not 1:0, but 1:7FFE. It means,
|
||||
# that only real-time packets, which violated policing filters
|
||||
# or exceeded reshaping buffers will fall to it.
|
||||
|
||||
$TC class add dev $DEVICE parent 1:7FFE classid 1:7FFF est 4sec 32sec cbq \
|
||||
rate 1Mbit $BANDWIDTH allot 1514b avpkt 1000 weight 10Kbit \
|
||||
prio 6 maxburst 10 split 1:7FFE defmap ffff
|
||||
|
||||
|
|
@ -0,0 +1,446 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# dhclient-script for Linux.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version
|
||||
# 2 of the License, or (at your option) any later version.
|
||||
#
|
||||
# Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
||||
#
|
||||
# Probably, I did not understand, what this funny feature as "alias"
|
||||
# means exactly. For now I suppose, that it is a static address, which
|
||||
# we should install and preserve.
|
||||
#
|
||||
|
||||
exec >> /var/log/DHS.log 2>&1
|
||||
|
||||
echo dhc-script $* reason=$reason
|
||||
set | grep "^\(old_\|new_\|check_\)"
|
||||
|
||||
LOG () {
|
||||
echo LOG $* ;
|
||||
}
|
||||
|
||||
# convert 8bit mask to length
|
||||
# arg: $1 = mask
|
||||
#
|
||||
Mask8ToLen() {
|
||||
local l=0;
|
||||
|
||||
while [ $l -le 7 ]; do
|
||||
if [ $[ ( 1 << $l ) + $1 ] -eq 256 ]; then
|
||||
return $[ 8 - $l ]
|
||||
fi
|
||||
l=$[ $l + 1 ]
|
||||
done
|
||||
return 0;
|
||||
}
|
||||
|
||||
# convert inet dotted quad mask to length
|
||||
# arg: $1 = dotquad mask
|
||||
#
|
||||
MaskToLen() {
|
||||
local masklen=0
|
||||
local mask8=$1
|
||||
|
||||
case $1 in
|
||||
0.0.0.0)
|
||||
return 0;
|
||||
;;
|
||||
255.*.0.0)
|
||||
masklen=8
|
||||
mask8=${mask8#255.}
|
||||
mask8=${mask8%.0.0}
|
||||
;;
|
||||
255.255.*.0)
|
||||
masklen=16
|
||||
mask8=${mask8#255.255.}
|
||||
mask8=${mask8%.0}
|
||||
;;
|
||||
255.255.255.*)
|
||||
masklen=24
|
||||
mask8=${mask8#255.255.255.}
|
||||
;;
|
||||
*)
|
||||
return 255
|
||||
;;
|
||||
esac
|
||||
Mask8ToLen $mask8
|
||||
return $[ $? + $masklen ]
|
||||
}
|
||||
|
||||
# calculate ABC "natural" mask
|
||||
# arg: $1 = dotquad address
|
||||
#
|
||||
ABCMask () {
|
||||
local class;
|
||||
|
||||
class=${1%%.*}
|
||||
|
||||
if [ "$1" = "255.255.255.255" ]; then
|
||||
echo $1
|
||||
elif [ "$1" = "0.0.0.0" ]; then
|
||||
echo $1
|
||||
elif [ $class -ge 224 ]; then
|
||||
echo 240.0.0.0
|
||||
elif [ $class -ge 192 ]; then
|
||||
echo 255.255.255.0
|
||||
elif [ $class -ge 128 ]; then
|
||||
echo 255.255.0.0
|
||||
else
|
||||
echo 255.0.0.0
|
||||
fi
|
||||
}
|
||||
|
||||
# calculate ABC "natural" mask length
|
||||
# arg: $1 = dotquad address
|
||||
#
|
||||
ABCMaskLen () {
|
||||
local class;
|
||||
|
||||
class=${1%%.*}
|
||||
|
||||
if [ "$1" = "255.255.255.255" ]; then
|
||||
return 32
|
||||
elif [ "$1" = "0.0.0.0" ]; then
|
||||
return 0
|
||||
elif [ $class -ge 224 ]; then
|
||||
return 4;
|
||||
elif [ $class -ge 192 ]; then
|
||||
return 24;
|
||||
elif [ $class -ge 128 ]; then
|
||||
return 16;
|
||||
else
|
||||
return 8;
|
||||
fi
|
||||
}
|
||||
|
||||
# Delete IP address
|
||||
# args: $1 = interface
|
||||
# $2 = address
|
||||
# $3 = mask
|
||||
# $4 = broadcast
|
||||
# $5 = label
|
||||
#
|
||||
DelINETAddr () {
|
||||
local masklen=32
|
||||
local addrid=$1
|
||||
|
||||
LOG DelINETAddr $*
|
||||
|
||||
if [ "$5" ]; then
|
||||
addrid=$addrid:$5
|
||||
fi
|
||||
LOG ifconfig $addrid down
|
||||
ifconfig $addrid down
|
||||
}
|
||||
|
||||
# Add IP address
|
||||
# args: $1 = interface
|
||||
# $2 = address
|
||||
# $3 = mask
|
||||
# $4 = broadcast
|
||||
# $5 = label
|
||||
#
|
||||
AddINETAddr () {
|
||||
local mask_arg
|
||||
local brd_arg
|
||||
local addrid=$1
|
||||
|
||||
LOG AddINETAddr $*
|
||||
|
||||
if [ "$5" ]; then
|
||||
addrid=$addrid:$5
|
||||
fi
|
||||
if [ "$3" ]; then
|
||||
mask_arg="netmask $3"
|
||||
fi
|
||||
if [ "$4" ]; then
|
||||
brd_arg="broadcast $4"
|
||||
fi
|
||||
|
||||
LOG ifconfig $addrid $2 $mask_arg $brd_arg up
|
||||
ifconfig $addrid $2 $mask_arg $brd_arg up
|
||||
}
|
||||
|
||||
# Add default routes
|
||||
# args: $1 = routers list
|
||||
#
|
||||
AddDefaultRoutes() {
|
||||
local router
|
||||
|
||||
if [ "$1" ]; then
|
||||
LOG AddDefaultRoutes $*
|
||||
for router in $1; do
|
||||
LOG route add default gw $router
|
||||
route add default gw $router
|
||||
done ;
|
||||
fi
|
||||
}
|
||||
|
||||
# Delete default routes
|
||||
# args: $1 = routers list
|
||||
#
|
||||
DelDefaultRoutes() {
|
||||
local router
|
||||
|
||||
if [ "$1" ]; then
|
||||
LOG DelDefaultRoutes $*
|
||||
|
||||
for router in $1; do
|
||||
LOG route del default gw $router
|
||||
route del default gw $router
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# ping a host
|
||||
# args: $1 = dotquad address of the host
|
||||
#
|
||||
PingNode() {
|
||||
LOG PingNode $*
|
||||
if ping -q -c 1 -w 2 $1 ; then
|
||||
return 0;
|
||||
fi
|
||||
return 1;
|
||||
}
|
||||
|
||||
# Check (and add route, if alive) default routers
|
||||
# args: $1 = routers list
|
||||
# returns: 0 if at least one router is alive.
|
||||
#
|
||||
CheckRouterList() {
|
||||
local router
|
||||
local succeed=1
|
||||
|
||||
LOG CheckRouterList $*
|
||||
|
||||
for router in $1; do
|
||||
if PingNode $router ; then
|
||||
succeed=0
|
||||
route add default gw $router
|
||||
fi
|
||||
done
|
||||
return $succeed
|
||||
}
|
||||
|
||||
# Delete/create static routes.
|
||||
# args: $1 = operation (del/add)
|
||||
# $2 = routes list in format "dst1 nexthop1 dst2 ..."
|
||||
#
|
||||
# BEWARE: this feature of DHCP is obsolete, because does not
|
||||
# support subnetting.
|
||||
#
|
||||
X-StaticRouteList() {
|
||||
local op=$1
|
||||
local lst="$2"
|
||||
local masklen
|
||||
|
||||
LOG X-StaticRouteList $*
|
||||
|
||||
if [ "$lst" ]; then
|
||||
set $lst
|
||||
while [ $# -gt 1 ]; do
|
||||
route $op -net $1 netmask `ABCMask "$1"` gw $2
|
||||
shift; shift;
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# Create static routes.
|
||||
# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
|
||||
#
|
||||
AddStaticRouteList() {
|
||||
LOG AddStaticRouteList $*
|
||||
X-StaticRouteList add "$1"
|
||||
}
|
||||
|
||||
# Delete static routes.
|
||||
# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
|
||||
#
|
||||
DelStaticRouteList() {
|
||||
LOG DelStaticRouteList $*
|
||||
X-StaticRouteList del "$1"
|
||||
}
|
||||
|
||||
# Broadcast unsolicited ARP to update neighbours' caches.
|
||||
# args: $1 = interface
|
||||
# $2 = address
|
||||
#
|
||||
UnsolicitedARP() {
|
||||
if [ -f /sbin/arping ]; then
|
||||
/sbin/arping -A -c 1 -I "$1" "$2" &
|
||||
(sleep 2 ; /sbin/arping -U -c 1 -I "$1" "$2" ) &
|
||||
fi
|
||||
}
|
||||
|
||||
# Duplicate address detection.
|
||||
# args: $1 = interface
|
||||
# $2 = test address
|
||||
# returns: 0, if DAD succeeded.
|
||||
DAD() {
|
||||
if [ -f /sbin/arping ]; then
|
||||
/sbin/arping -c 2 -w 3 -D -I "$1" "$2"
|
||||
return $?
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
# Setup resolver.
|
||||
# args: NO
|
||||
# domain and nameserver list are passed in global variables.
|
||||
#
|
||||
# NOTE: we try to be careful and not to break user supplied resolv.conf.
|
||||
# The script mangles it, only if it has dhcp magic signature.
|
||||
#
|
||||
UpdateDNS() {
|
||||
local nameserver
|
||||
local idstring="#### Generated by DHCPCD"
|
||||
|
||||
LOG UpdateDNS $*
|
||||
|
||||
if [ "$new_domain_name" = "" -a "$new_domain_name_servers" = "" ]; then
|
||||
return 0;
|
||||
fi
|
||||
|
||||
echo $idstring > /etc/resolv.conf.dhcp
|
||||
if [ "$new_domain_name" ]; then
|
||||
echo search $new_domain_name >> /etc/resolv.conf.dhcp
|
||||
fi
|
||||
echo options ndots:1 >> /etc/resolv.conf.dhcp
|
||||
|
||||
if [ "$new_domain_name_servers" ]; then
|
||||
for nameserver in $new_domain_name_servers; do
|
||||
echo nameserver $nameserver >> /etc/resolv.conf.dhcp
|
||||
done
|
||||
else
|
||||
echo nameserver 127.0.0.1 >> /etc/resolv.conf.dhcp
|
||||
fi
|
||||
|
||||
if [ -f /etc/resolv.conf ]; then
|
||||
if [ "`head -1 /etc/resolv.conf`" != "$idstring" ]; then
|
||||
return 0
|
||||
fi
|
||||
if [ "$old_domain_name" = "$new_domain_name" -a
|
||||
"$new_domain_name_servers" = "$old_domain_name_servers" ]; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
mv /etc/resolv.conf.dhcp /etc/resolv.conf
|
||||
}
|
||||
|
||||
case $reason in
|
||||
NBI)
|
||||
exit 1
|
||||
;;
|
||||
|
||||
MEDIUM)
|
||||
exit 0
|
||||
;;
|
||||
|
||||
PREINIT)
|
||||
ifconfig $interface:dhcp down
|
||||
ifconfig $interface:dhcp1 down
|
||||
if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
|
||||
ifconfig $interface:dhcp 10.10.10.10 netmask 255.255.255.255
|
||||
ifconfig $interface:dhcp down
|
||||
if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
|
||||
LOG The interface $interface already configured.
|
||||
fi
|
||||
fi
|
||||
ifconfig $interface:dhcp up
|
||||
exit 0
|
||||
;;
|
||||
|
||||
ARPSEND)
|
||||
exit 0
|
||||
;;
|
||||
|
||||
ARPCHECK)
|
||||
if DAD "$interface" "$check_ip_address" ; then
|
||||
exit 0
|
||||
fi
|
||||
exit 1
|
||||
;;
|
||||
|
||||
BOUND|RENEW|REBIND|REBOOT)
|
||||
if [ "$old_ip_address" -a "$alias_ip_address" -a \
|
||||
"$alias_ip_address" != "$old_ip_address" ]; then
|
||||
DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||
fi
|
||||
if [ "$old_ip_address" -a "$old_ip_address" != "$new_ip_address" ]; then
|
||||
DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
|
||||
DelDefaultRoutes "$old_routers"
|
||||
DelStaticRouteList "$old_static_routes"
|
||||
fi
|
||||
if [ "$old_ip_address" = "" -o "$old_ip_address" != "$new_ip_address" -o \
|
||||
"$reason" = "BOUND" -o "$reason" = "REBOOT" ]; then
|
||||
AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
|
||||
AddStaticRouteList "$new_static_routes"
|
||||
AddDefaultRoutes "$new_routers"
|
||||
UnsolicitedARP "$interface" "$new_ip_address"
|
||||
fi
|
||||
if [ "$new_ip_address" != "$alias_ip_address" -a "$alias_ip_address" ]; then
|
||||
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||
fi
|
||||
UpdateDNS
|
||||
exit 0
|
||||
;;
|
||||
|
||||
EXPIRE|FAIL)
|
||||
if [ "$alias_ip_address" ]; then
|
||||
DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||
fi
|
||||
if [ "$old_ip_address" ]; then
|
||||
DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
|
||||
DelDefaultRoutes "$old_routers"
|
||||
DelStaticRouteList "$old_static_routes"
|
||||
fi
|
||||
if [ "$alias_ip_address" ]; then
|
||||
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||
fi
|
||||
exit 0
|
||||
;;
|
||||
|
||||
TIMEOUT)
|
||||
if [ "$alias_ip_address" ]; then
|
||||
DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||
fi
|
||||
# Seems, <null address> means, that no more old leases found.
|
||||
# Or does it mean bug in dhcpcd? 8) Fail for now.
|
||||
if [ "$new_ip_address" = "<null address>" ]; then
|
||||
if [ "$old_ip_address" ]; then
|
||||
DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
|
||||
fi
|
||||
if [ "$alias_ip_address" ]; then
|
||||
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
if DAD "$interface" "$new_ip_address" ; then
|
||||
AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
|
||||
UnsolicitedARP "$interface" "$new_ip_address"
|
||||
if [ "$alias_ip_address" -a "$alias_ip_address" != "$new_ip_address" ]; then
|
||||
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||
UnsolicitedARP "$interface" "$alias_ip_address"
|
||||
fi
|
||||
if CheckRouterList "$new_routers" ; then
|
||||
AddStaticRouteList "$new_static_routes"
|
||||
UpdateDNS
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
DelINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
|
||||
DelDefaultRoutes "$old_routers"
|
||||
DelStaticRouteList "$old_static_routes"
|
||||
if [ "$alias_ip_address" ]; then
|
||||
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||
fi
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
#! /bin/sh -x
|
||||
#
|
||||
# sample script on using the ingress capabilities
|
||||
# This script just tags on the ingress interfac using Ipchains
|
||||
# the result is used for fast classification and re-marking
|
||||
# on the egress interface
|
||||
#
|
||||
#path to various utilities;
|
||||
#change to reflect yours.
|
||||
#
|
||||
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||
TC=$IPROUTE/tc/tc
|
||||
IP=$IPROUTE/ip/ip
|
||||
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||
INDEV=eth2
|
||||
EGDEV="dev eth1"
|
||||
#
|
||||
# tag all incoming packets from host 10.2.0.24 to value 1
|
||||
# tag all incoming packets from host 10.2.0.3 to value 2
|
||||
# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
|
||||
#These values are used in the egress
|
||||
#
|
||||
############################################################
|
||||
$IPCHAINS -A input -s 10.2.0.4/24 -m 3
|
||||
$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
|
||||
$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
|
||||
|
||||
######################## Egress side ########################
|
||||
|
||||
|
||||
# attach a dsmarker
|
||||
#
|
||||
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64 set_tc_index
|
||||
#
|
||||
# values of the DSCP to change depending on the class
|
||||
#
|
||||
#becomes EF
|
||||
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||
value 0xb8
|
||||
#becomes AF11
|
||||
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||
value 0x28
|
||||
#becomes AF21
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x48
|
||||
#
|
||||
#
|
||||
# The class mapping
|
||||
#
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
|
||||
#
|
||||
|
||||
#
|
||||
echo "---- qdisc parameters Ingress ----------"
|
||||
$TC qdisc ls dev $INDEV
|
||||
echo "---- Class parameters Ingress ----------"
|
||||
$TC class ls dev $INDEV
|
||||
echo "---- filter parameters Ingress ----------"
|
||||
$TC filter ls dev $INDEV parent 1:0
|
||||
|
||||
echo "---- qdisc parameters Egress ----------"
|
||||
$TC qdisc ls $EGDEV
|
||||
echo "---- Class parameters Egress ----------"
|
||||
$TC class ls $EGDEV
|
||||
echo "---- filter parameters Egress ----------"
|
||||
$TC filter ls $EGDEV parent 1:0
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
#! /bin/sh -x
|
||||
#
|
||||
# sample script on using the ingress capabilities
|
||||
# This script tags the fwmark on the ingress interface using IPchains
|
||||
# the result is used first for policing on the Ingress interface then
|
||||
# for fast classification and re-marking
|
||||
# on the egress interface
|
||||
#
|
||||
#path to various utilities;
|
||||
#change to reflect yours.
|
||||
#
|
||||
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||
TC=$IPROUTE/tc/tc
|
||||
IP=$IPROUTE/ip/ip
|
||||
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||
INDEV=eth2
|
||||
EGDEV="dev eth1"
|
||||
#
|
||||
# tag all incoming packets from host 10.2.0.24 to value 1
|
||||
# tag all incoming packets from host 10.2.0.3 to value 2
|
||||
# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
|
||||
#These values are used in the egress
|
||||
############################################################
|
||||
$IPCHAINS -A input -s 10.2.0.0/24 -m 3
|
||||
$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
|
||||
$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
|
||||
############################################################
|
||||
#
|
||||
# install the ingress qdisc on the ingress interface
|
||||
############################################################
|
||||
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||
############################################################
|
||||
|
||||
#
|
||||
# attach a fw classifier to the ingress which polices anything marked
|
||||
# by ipchains to tag value 3 (The rest of the subnet packets -- not
|
||||
# tag 1 or 2) to not go beyond 1.5Mbps
|
||||
# Allow up to at least 60 packets to burst (assuming maximum packet
|
||||
# size of # 1.5 KB) in the long run and upto about 6 packets in the
|
||||
# shot run
|
||||
|
||||
############################################################
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 3 fw \
|
||||
police rate 1500kbit burst 90k mtu 9k drop flowid :1
|
||||
############################################################
|
||||
|
||||
######################## Egress side ########################
|
||||
|
||||
|
||||
# attach a dsmarker
|
||||
#
|
||||
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||
#
|
||||
# values of the DSCP to change depending on the class
|
||||
#
|
||||
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||
value 0xb8
|
||||
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||
value 0x28
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x48
|
||||
#
|
||||
#
|
||||
# The class mapping
|
||||
#
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
|
||||
#
|
||||
|
||||
#
|
||||
echo "---- qdisc parameters Ingress ----------"
|
||||
$TC qdisc ls dev $INDEV
|
||||
echo "---- Class parameters Ingress ----------"
|
||||
$TC class ls dev $INDEV
|
||||
echo "---- filter parameters Ingress ----------"
|
||||
$TC filter ls dev $INDEV parent ffff:
|
||||
|
||||
echo "---- qdisc parameters Egress ----------"
|
||||
$TC qdisc ls $EGDEV
|
||||
echo "---- Class parameters Egress ----------"
|
||||
$TC class ls $EGDEV
|
||||
echo "---- filter parameters Egress ----------"
|
||||
$TC filter ls $EGDEV parent 1:0
|
||||
#
|
||||
#deleting the ingress qdisc
|
||||
#$TC qdisc del $DEV ingress
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
#! /bin/sh -x
|
||||
#
|
||||
# sample script on using the ingress capabilities using u32 classifier
|
||||
# This script tags tcindex based on metering on the ingress
|
||||
# interface the result is used for fast classification and re-marking
|
||||
# on the egress interface
|
||||
# This is an example of a color aware mode marker with PIR configured
|
||||
# based on draft-wahjak-mcm-00.txt (section 3.1)
|
||||
#
|
||||
# The colors are defined using the Diffserv Fields
|
||||
#path to various utilities;
|
||||
#change to reflect yours.
|
||||
#
|
||||
IPROUTE=/usr/src/iproute2-current
|
||||
TC=$IPROUTE/tc/tc
|
||||
IP=$IPROUTE/ip/ip
|
||||
INDEV=eth0
|
||||
EGDEV="dev eth1"
|
||||
CIR1=1500kbit
|
||||
CIR2=1000kbit
|
||||
|
||||
#The CBS is about 60 MTU sized packets
|
||||
CBS1=90k
|
||||
CBS2=90k
|
||||
|
||||
############################################################
|
||||
#
|
||||
# install the ingress qdisc on the ingress interface
|
||||
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||
############################################################
|
||||
#
|
||||
# Create u32 filters
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1: u32 \
|
||||
divisor 1
|
||||
############################################################
|
||||
|
||||
# The meters: Note that we have shared meters in this case as identified
|
||||
# by the index parameter
|
||||
meter1=" police index 1 rate $CIR1 burst $CBS1 "
|
||||
meter2=" police index 2 rate $CIR2 burst $CBS1 "
|
||||
meter3=" police index 3 rate $CIR2 burst $CBS2 "
|
||||
meter4=" police index 4 rate $CIR1 burst $CBS2 "
|
||||
meter5=" police index 5 rate $CIR1 burst $CBS2 "
|
||||
|
||||
# All packets are marked with a tcindex value which is used on the egress
|
||||
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||
|
||||
# *********************** AF41 ***************************
|
||||
#AF41 (DSCP 0x22) is passed on with a tcindex value 1
|
||||
#if it doesnt exceed its CIR/CBS
|
||||
#policer 1 is used.
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
|
||||
match ip tos 0x88 0xfc \
|
||||
$meter1 \
|
||||
continue flowid :1
|
||||
#
|
||||
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||
# tcindex value of 2
|
||||
# policer 2 is used
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
|
||||
match ip tos 0x88 0xfc \
|
||||
$meter2 \
|
||||
continue flowid :2
|
||||
#
|
||||
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||
# of 3 (policer 3)
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||
match ip tos 0x88 0xfc \
|
||||
$meter3 \
|
||||
drop flowid :3
|
||||
#
|
||||
|
||||
# *********************** AF42 ***************************
|
||||
#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
|
||||
#if it doesnt exceed its CIR/CBS
|
||||
#policer 2 is used. Note that this is shared with the AF41
|
||||
#
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
|
||||
match ip tos 0x90 0xfc \
|
||||
$meter2 \
|
||||
continue flowid :2
|
||||
#
|
||||
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||
# of 3 (policer 3)
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||
match ip tos 0x90 0xfc \
|
||||
$meter3 \
|
||||
drop flowid :3
|
||||
#
|
||||
# *********************** AF43 ***************************
|
||||
#
|
||||
#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
|
||||
#if it doesnt exceed its CIR/CBS
|
||||
#policer 3 is used. Note that this is shared with the AF41 and AF42
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||
match ip tos 0x98 0xfc \
|
||||
$meter3 \
|
||||
drop flowid :3
|
||||
#
|
||||
# *********************** BE ***************************
|
||||
#
|
||||
# Anything else (not from the AF4*) gets discarded if it
|
||||
# exceeds 1Mbps and by default goes to BE if it doesnt
|
||||
# Note that the BE class is also used by the AF4* in the worst
|
||||
# case
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
|
||||
match ip src 0/0\
|
||||
$meter4 \
|
||||
drop flowid :4
|
||||
|
||||
######################## Egress side ########################
|
||||
|
||||
# attach a dsmarker
|
||||
#
|
||||
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||
#
|
||||
# values of the DSCP to change depending on the class
|
||||
#note that the ECN bits are masked out
|
||||
#
|
||||
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||
#
|
||||
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||
value 0x88
|
||||
#AF42
|
||||
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||
value 0x90
|
||||
#AF43
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x98
|
||||
#BE
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x0
|
||||
#
|
||||
#
|
||||
# The class mapping
|
||||
#
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 1 tcindex classid 1:1
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 2 tcindex classid 1:2
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 3 tcindex classid 1:3
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 4 tcindex classid 1:4
|
||||
#
|
||||
|
||||
#
|
||||
echo "---- qdisc parameters Ingress ----------"
|
||||
$TC qdisc ls dev $INDEV
|
||||
echo "---- Class parameters Ingress ----------"
|
||||
$TC class ls dev $INDEV
|
||||
echo "---- filter parameters Ingress ----------"
|
||||
$TC filter ls dev $INDEV parent ffff:
|
||||
|
||||
echo "---- qdisc parameters Egress ----------"
|
||||
$TC qdisc ls $EGDEV
|
||||
echo "---- Class parameters Egress ----------"
|
||||
$TC class ls $EGDEV
|
||||
echo "---- filter parameters Egress ----------"
|
||||
$TC filter ls $EGDEV parent 1:0
|
||||
#
|
||||
#deleting the ingress qdisc
|
||||
#$TC qdisc del $INDEV ingress
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
#! /bin/sh -x
|
||||
#
|
||||
# sample script on using the ingress capabilities
|
||||
# This script fwmark tags(IPchains) based on metering on the ingress
|
||||
# interface the result is used for fast classification and re-marking
|
||||
# on the egress interface
|
||||
# This is an example of a color blind mode marker with no PIR configured
|
||||
# based on draft-wahjak-mcm-00.txt (section 3.1)
|
||||
#
|
||||
#path to various utilities;
|
||||
#change to reflect yours.
|
||||
#
|
||||
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||
TC=$IPROUTE/tc/tc
|
||||
IP=$IPROUTE/ip/ip
|
||||
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||
INDEV=eth2
|
||||
EGDEV="dev eth1"
|
||||
CIR1=1500kbit
|
||||
CIR2=1000kbit
|
||||
|
||||
#The CBS is about 60 MTU sized packets
|
||||
CBS1=90k
|
||||
CBS2=90k
|
||||
|
||||
meter1="police rate $CIR1 burst $CBS1 "
|
||||
meter2="police rate $CIR1 burst $CBS2 "
|
||||
meter3="police rate $CIR2 burst $CBS1 "
|
||||
meter4="police rate $CIR2 burst $CBS2 "
|
||||
meter5="police rate $CIR2 burst $CBS2 "
|
||||
#
|
||||
# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
|
||||
# tag all incoming packets from any other subnet to fw tag 2
|
||||
############################################################
|
||||
$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
|
||||
$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
|
||||
#
|
||||
############################################################
|
||||
# install the ingress qdisc on the ingress interface
|
||||
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||
#
|
||||
############################################################
|
||||
|
||||
# All packets are marked with a tcindex value which is used on the egress
|
||||
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||
#
|
||||
############################################################
|
||||
#
|
||||
# anything with fw tag of 1 is passed on with a tcindex value 1
|
||||
#if it doesnt exceed its allocated rate (CIR/CBS)
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
|
||||
$meter1 \
|
||||
continue flowid 4:1
|
||||
#
|
||||
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||
#tcindex value of 2
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
|
||||
$meter2 \
|
||||
continue flowid 4:2
|
||||
#
|
||||
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||
# of 3
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
|
||||
$meter3 \
|
||||
drop flowid 4:3
|
||||
#
|
||||
# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it
|
||||
# exceeds 1Mbps and by default goes to BE if it doesnt
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 2 fw \
|
||||
$meter5 \
|
||||
drop flowid 4:4
|
||||
|
||||
|
||||
######################## Egress side ########################
|
||||
|
||||
|
||||
# attach a dsmarker
|
||||
#
|
||||
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||
#
|
||||
# values of the DSCP to change depending on the class
|
||||
#note that the ECN bits are masked out
|
||||
#
|
||||
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||
#
|
||||
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||
value 0x88
|
||||
#AF42
|
||||
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||
value 0x90
|
||||
#AF43
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x98
|
||||
#BE
|
||||
$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
|
||||
value 0x0
|
||||
#
|
||||
#
|
||||
# The class mapping (using tcindex; could easily have
|
||||
# replaced it with the fw classifier instead)
|
||||
#
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 1 tcindex classid 1:1
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 2 tcindex classid 1:2
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 3 tcindex classid 1:3
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 4 tcindex classid 1:4
|
||||
#
|
||||
|
||||
#
|
||||
echo "---- qdisc parameters Ingress ----------"
|
||||
$TC qdisc ls dev $INDEV
|
||||
echo "---- Class parameters Ingress ----------"
|
||||
$TC class ls dev $INDEV
|
||||
echo "---- filter parameters Ingress ----------"
|
||||
$TC filter ls dev $INDEV parent ffff:
|
||||
|
||||
echo "---- qdisc parameters Egress ----------"
|
||||
$TC qdisc ls $EGDEV
|
||||
echo "---- Class parameters Egress ----------"
|
||||
$TC class ls $EGDEV
|
||||
echo "---- filter parameters Egress ----------"
|
||||
$TC filter ls $EGDEV parent 1:0
|
||||
#
|
||||
#deleting the ingress qdisc
|
||||
#$TC qdisc del $INDEV ingress
|
||||
|
|
@ -0,0 +1,198 @@
|
|||
#! /bin/sh -x
|
||||
#
|
||||
# sample script on using the ingress capabilities using u32 classifier
|
||||
# This script tags tcindex based on metering on the ingress
|
||||
# interface the result is used for fast classification and re-marking
|
||||
# on the egress interface
|
||||
# This is an example of a color aware mode marker with PIR configured
|
||||
# based on draft-wahjak-mcm-00.txt (section 3.2)
|
||||
#
|
||||
# The colors are defined using the Diffserv Fields
|
||||
#path to various utilities;
|
||||
#change to reflect yours.
|
||||
#
|
||||
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||
TC=$IPROUTE/tc/tc
|
||||
IP=$IPROUTE/ip/ip
|
||||
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||
INDEV=eth2
|
||||
EGDEV="dev eth1"
|
||||
CIR1=1000kbit
|
||||
CIR2=500kbit
|
||||
# the PIR is what is in excess of the CIR
|
||||
PIR1=1000kbit
|
||||
PIR2=500kbit
|
||||
|
||||
#The CBS is about 60 MTU sized packets
|
||||
CBS1=90k
|
||||
CBS2=90k
|
||||
#the EBS is about 20 max sized packets
|
||||
EBS1=30k
|
||||
EBS2=30k
|
||||
|
||||
# The meters: Note that we have shared meters in this case as identified
|
||||
# by the index parameter
|
||||
meter1=" police index 1 rate $CIR1 burst $CBS1 "
|
||||
meter1a=" police index 2 rate $PIR1 burst $EBS1 "
|
||||
meter2=" police index 3 rate $CIR2 burst $CBS1 "
|
||||
meter2a=" police index 4 rate $PIR2 burst $EBS1 "
|
||||
meter3=" police index 5 rate $CIR2 burst $CBS2 "
|
||||
meter3a=" police index 6 rate $PIR2 burst $EBS2 "
|
||||
meter4=" police index 7 rate $CIR1 burst $CBS2 "
|
||||
|
||||
############################################################
|
||||
#
|
||||
# install the ingress qdisc on the ingress interface
|
||||
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||
############################################################
|
||||
#
|
||||
# All packets are marked with a tcindex value which is used on the egress
|
||||
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||
#
|
||||
# *********************** AF41 ***************************
|
||||
#AF41 (DSCP 0x22) from is passed on with a tcindex value 1
|
||||
#if it doesnt exceed its CIR/CBS + PIR/EBS
|
||||
#policer 1 is used.
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
|
||||
match ip tos 0x88 0xfc \
|
||||
$meter1 \
|
||||
continue flowid :1
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
|
||||
match ip tos 0x88 0xfc \
|
||||
$meter1a \
|
||||
continue flowid :1
|
||||
#
|
||||
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||
# tcindex value of 2
|
||||
# policer 2 is used
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
|
||||
match ip tos 0x88 0xfc \
|
||||
$meter2 \
|
||||
continue flowid :2
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
|
||||
match ip tos 0x88 0xfc \
|
||||
$meter2a \
|
||||
continue flowid :2
|
||||
#
|
||||
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||
# of 3 (policer 3)
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
|
||||
match ip tos 0x88 0xfc \
|
||||
$meter3 \
|
||||
continue flowid :3
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||
match ip tos 0x88 0xfc \
|
||||
$meter3a \
|
||||
drop flowid :3
|
||||
#
|
||||
# *********************** AF42 ***************************
|
||||
#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
|
||||
#if it doesnt exceed its CIR/CBS + PIR/EBS
|
||||
#policer 2 is used. Note that this is shared with the AF41
|
||||
#
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 8 u32 \
|
||||
match ip tos 0x90 0xfc \
|
||||
$meter2 \
|
||||
continue flowid :2
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 9 u32 \
|
||||
match ip tos 0x90 0xfc \
|
||||
$meter2a \
|
||||
continue flowid :2
|
||||
#
|
||||
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||
# of 3 (policer 3)
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 10 u32 \
|
||||
match ip tos 0x90 0xfc \
|
||||
$meter3 \
|
||||
continue flowid :3
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 11 u32 \
|
||||
match ip tos 0x90 0xfc \
|
||||
$meter3a \
|
||||
drop flowid :3
|
||||
|
||||
#
|
||||
# *********************** AF43 ***************************
|
||||
#
|
||||
#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
|
||||
#if it doesnt exceed its CIR/CBS + PIR/EBS
|
||||
#policer 3 is used. Note that this is shared with the AF41 and AF42
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 13 u32 \
|
||||
match ip tos 0x98 0xfc \
|
||||
$meter3 \
|
||||
continue flowid :3
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 14 u32 \
|
||||
match ip tos 0x98 0xfc \
|
||||
$meter3a \
|
||||
drop flowid :3
|
||||
#
|
||||
## *********************** BE ***************************
|
||||
##
|
||||
## Anything else (not from the AF4*) gets discarded if it
|
||||
## exceeds 1Mbps and by default goes to BE if it doesnt
|
||||
## Note that the BE class is also used by the AF4* in the worst
|
||||
## case
|
||||
##
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 16 u32 \
|
||||
match ip src 0/0\
|
||||
$meter4 \
|
||||
drop flowid :4
|
||||
|
||||
######################## Egress side ########################
|
||||
|
||||
# attach a dsmarker
|
||||
#
|
||||
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||
#
|
||||
# values of the DSCP to change depending on the class
|
||||
#note that the ECN bits are masked out
|
||||
#
|
||||
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||
#
|
||||
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||
value 0x88
|
||||
#AF42
|
||||
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||
value 0x90
|
||||
#AF43
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x98
|
||||
#BE
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x0
|
||||
#
|
||||
#
|
||||
# The class mapping
|
||||
#
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 1 tcindex classid 1:1
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 2 tcindex classid 1:2
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 3 tcindex classid 1:3
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 4 tcindex classid 1:4
|
||||
#
|
||||
|
||||
#
|
||||
echo "---- qdisc parameters Ingress ----------"
|
||||
$TC qdisc ls dev $INDEV
|
||||
echo "---- Class parameters Ingress ----------"
|
||||
$TC class ls dev $INDEV
|
||||
echo "---- filter parameters Ingress ----------"
|
||||
$TC filter ls dev $INDEV parent ffff:
|
||||
|
||||
echo "---- qdisc parameters Egress ----------"
|
||||
$TC qdisc ls $EGDEV
|
||||
echo "---- Class parameters Egress ----------"
|
||||
$TC class ls $EGDEV
|
||||
echo "---- filter parameters Egress ----------"
|
||||
$TC filter ls $EGDEV parent 1:0
|
||||
#
|
||||
#deleting the ingress qdisc
|
||||
#$TC qdisc del $INDEV ingress
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
#! /bin/sh -x
|
||||
#
|
||||
# sample script on using the ingress capabilities
|
||||
# This script fwmark tags(IPchains) based on metering on the ingress
|
||||
# interface the result is used for fast classification and re-marking
|
||||
# on the egress interface
|
||||
# This is an example of a color blind mode marker with no PIR configured
|
||||
# based on draft-wahjak-mcm-00.txt (section 3.1)
|
||||
#
|
||||
#path to various utilities;
|
||||
#change to reflect yours.
|
||||
#
|
||||
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||
TC=$IPROUTE/tc/tc
|
||||
IP=$IPROUTE/ip/ip
|
||||
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||
INDEV=eth2
|
||||
EGDEV="dev eth1"
|
||||
CIR1=1500kbit
|
||||
CIR2=500kbit
|
||||
|
||||
#The CBS is about 60 MTU sized packets
|
||||
CBS1=90k
|
||||
CBS2=90k
|
||||
|
||||
meter1="police rate $CIR1 burst $CBS1 "
|
||||
meter1a="police rate $CIR2 burst $CBS1 "
|
||||
meter2="police rate $CIR1 burst $CBS2 "
|
||||
meter2a="police rate $CIR2 burst $CBS2 "
|
||||
meter3="police rate $CIR2 burst $CBS1 "
|
||||
meter3a="police rate $CIR2 burst $CBS1 "
|
||||
meter4="police rate $CIR2 burst $CBS2 "
|
||||
meter5="police rate $CIR1 burst $CBS2 "
|
||||
#
|
||||
# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
|
||||
# tag all incoming packets from any other subnet to fw tag 2
|
||||
############################################################
|
||||
$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
|
||||
$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
|
||||
#
|
||||
############################################################
|
||||
# install the ingress qdisc on the ingress interface
|
||||
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||
#
|
||||
############################################################
|
||||
|
||||
# All packets are marked with a tcindex value which is used on the egress
|
||||
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||
#
|
||||
############################################################
|
||||
#
|
||||
# anything with fw tag of 1 is passed on with a tcindex value 1
|
||||
#if it doesnt exceed its allocated rate (CIR/CBS)
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 1 handle 1 fw \
|
||||
$meter1 \
|
||||
continue flowid 4:1
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 2 handle 1 fw \
|
||||
$meter1a \
|
||||
continue flowid 4:1
|
||||
#
|
||||
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||
#tcindex value of 2
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 3 handle 1 fw \
|
||||
$meter2 \
|
||||
continue flowid 4:2
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
|
||||
$meter2a \
|
||||
continue flowid 4:2
|
||||
#
|
||||
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||
# of 3
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
|
||||
$meter3 \
|
||||
continue flowid 4:3
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
|
||||
$meter3a \
|
||||
drop flowid 4:3
|
||||
#
|
||||
# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it
|
||||
# exceeds 1Mbps and by default goes to BE if it doesnt
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 7 handle 2 fw \
|
||||
$meter5 \
|
||||
drop flowid 4:4
|
||||
|
||||
|
||||
######################## Egress side ########################
|
||||
|
||||
|
||||
# attach a dsmarker
|
||||
#
|
||||
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||
#
|
||||
# values of the DSCP to change depending on the class
|
||||
#note that the ECN bits are masked out
|
||||
#
|
||||
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||
#
|
||||
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||
value 0x88
|
||||
#AF42
|
||||
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||
value 0x90
|
||||
#AF43
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x98
|
||||
#BE
|
||||
$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
|
||||
value 0x0
|
||||
#
|
||||
#
|
||||
# The class mapping (using tcindex; could easily have
|
||||
# replaced it with the fw classifier instead)
|
||||
#
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 1 tcindex classid 1:1
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 2 tcindex classid 1:2
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 3 tcindex classid 1:3
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 4 tcindex classid 1:4
|
||||
#
|
||||
|
||||
#
|
||||
echo "---- qdisc parameters Ingress ----------"
|
||||
$TC qdisc ls dev $INDEV
|
||||
echo "---- Class parameters Ingress ----------"
|
||||
$TC class ls dev $INDEV
|
||||
echo "---- filter parameters Ingress ----------"
|
||||
$TC filter ls dev $INDEV parent ffff:
|
||||
|
||||
echo "---- qdisc parameters Egress ----------"
|
||||
$TC qdisc ls $EGDEV
|
||||
echo "---- Class parameters Egress ----------"
|
||||
$TC class ls $EGDEV
|
||||
echo "---- filter parameters Egress ----------"
|
||||
$TC filter ls $EGDEV parent 1:0
|
||||
#
|
||||
#deleting the ingress qdisc
|
||||
#$TC qdisc del $INDEV ingress
|
||||
|
|
@ -0,0 +1,145 @@
|
|||
#! /bin/sh
|
||||
#
|
||||
# sample script on using the ingress capabilities using u32 classifier
|
||||
# This script tags tcindex based on metering on the ingress
|
||||
# interface the result is used for fast classification and re-marking
|
||||
# on the egress interface
|
||||
# This is an example of a color blind mode marker with PIR configured
|
||||
# based on draft-wahjak-mcm-00.txt (section 3.2)
|
||||
#
|
||||
#path to various utilities;
|
||||
#change to reflect yours.
|
||||
#
|
||||
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||
TC=$IPROUTE/tc/tc
|
||||
IP=$IPROUTE/ip/ip
|
||||
INDEV=eth2
|
||||
EGDEV="dev eth1"
|
||||
CIR1=1000kbit
|
||||
CIR2=1000kbit
|
||||
# The PIR is the excess (in addition to the CIR i.e if always
|
||||
# going to the PIR --> average rate is CIR+PIR)
|
||||
PIR1=1000kbit
|
||||
PIR2=500kbit
|
||||
|
||||
#The CBS is about 60 MTU sized packets
|
||||
CBS1=90k
|
||||
CBS2=90k
|
||||
#the EBS is about 10 max sized packets
|
||||
EBS1=15k
|
||||
EBS2=15k
|
||||
# The meters
|
||||
meter1=" police rate $CIR1 burst $CBS1 "
|
||||
meter1a=" police rate $PIR1 burst $EBS1 "
|
||||
meter2=" police rate $CIR2 burst $CBS1 "
|
||||
meter2a="police rate $PIR2 burst $CBS1 "
|
||||
meter3=" police rate $CIR2 burst $CBS2 "
|
||||
meter3a=" police rate $PIR2 burst $EBS2 "
|
||||
meter4=" police rate $CIR1 burst $CBS2 "
|
||||
meter5=" police rate $CIR1 burst $CBS2 "
|
||||
|
||||
|
||||
# install the ingress qdisc on the ingress interface
|
||||
############################################################
|
||||
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||
############################################################
|
||||
#
|
||||
############################################################
|
||||
|
||||
# All packets are marked with a tcindex value which is used on the egress
|
||||
# NOTE: tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||
#
|
||||
#anything from subnet 10.2.0.2/24 is passed on with a tcindex value 1
|
||||
#if it doesnt exceed its CIR/CBS + PIR/EBS
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
|
||||
match ip src 10.2.0.0/24 $meter1 \
|
||||
continue flowid :1
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
|
||||
match ip src 10.2.0.0/24 $meter1a \
|
||||
continue flowid :1
|
||||
|
||||
#
|
||||
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||
#tcindex value of 2
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
|
||||
match ip src 10.2.0.0/24 $meter2 \
|
||||
continue flowid :2
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
|
||||
match ip src 10.2.0.0/24 $meter2a \
|
||||
continue flowid :2
|
||||
#
|
||||
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||
# of 3
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
|
||||
match ip src 10.2.0.0/24 $meter3 \
|
||||
continue flowid :3
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||
match ip src 10.2.0.0/24 $meter3a \
|
||||
drop flowid :3
|
||||
#
|
||||
#
|
||||
# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it
|
||||
# exceeds 1Mbps and by default goes to BE if it doesnt
|
||||
#
|
||||
$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
|
||||
match ip src 0/0 $meter5 \
|
||||
drop flowid :4
|
||||
|
||||
|
||||
######################## Egress side ########################
|
||||
|
||||
|
||||
# attach a dsmarker
|
||||
#
|
||||
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||
#
|
||||
# values of the DSCP to change depending on the class
|
||||
#note that the ECN bits are masked out
|
||||
#
|
||||
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||
#
|
||||
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||
value 0x88
|
||||
#AF42
|
||||
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||
value 0x90
|
||||
#AF43
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x98
|
||||
#BE
|
||||
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||
value 0x0
|
||||
#
|
||||
#
|
||||
# The class mapping
|
||||
#
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 1 tcindex classid 1:1
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 2 tcindex classid 1:2
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 3 tcindex classid 1:3
|
||||
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||
handle 4 tcindex classid 1:4
|
||||
#
|
||||
|
||||
#
|
||||
echo "---- qdisc parameters Ingress ----------"
|
||||
$TC qdisc ls dev $INDEV
|
||||
echo "---- Class parameters Ingress ----------"
|
||||
$TC class ls dev $INDEV
|
||||
echo "---- filter parameters Ingress ----------"
|
||||
$TC filter ls dev $INDEV parent ffff:
|
||||
|
||||
echo "---- qdisc parameters Egress ----------"
|
||||
$TC qdisc ls $EGDEV
|
||||
echo "---- Class parameters Egress ----------"
|
||||
$TC class ls $EGDEV
|
||||
echo "---- filter parameters Egress ----------"
|
||||
$TC filter ls $EGDEV parent 1:0
|
||||
#
|
||||
#deleting the ingress qdisc
|
||||
#$TC qdisc del $INDEV ingress
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
|
||||
Note all these are mere examples which can be customized to your needs
|
||||
|
||||
AFCBQ
|
||||
-----
|
||||
AF PHB built using CBQ, DSMARK,GRED (default in GRIO mode) ,RED for BE
|
||||
and the tcindex classifier with some algorithmic mapping
|
||||
|
||||
EFCBQ
|
||||
-----
|
||||
EF PHB built using CBQ (for rate control and prioritization),
|
||||
DSMARK( to remark DSCPs), tcindex classifier and RED for the BE
|
||||
traffic.
|
||||
|
||||
EFPRIO
|
||||
------
|
||||
EF PHB using the PRIO scheduler, Token Bucket to rate control EF,
|
||||
tcindex classifier, DSMARK to remark, and RED for the BE traffic
|
||||
|
||||
EDGE scripts
|
||||
==============
|
||||
|
||||
CB-3(1|2)-(u32/chains)
|
||||
======================
|
||||
|
||||
|
||||
The major differences are that the classifier is u32 on -u32 extension
|
||||
and IPchains on the chains extension. CB stands for color Blind
|
||||
and 31 is for the mode where only a CIR and CBS are defined whereas
|
||||
32 stands for a mode where a CIR/CBS + PIR/EBS are defined.
|
||||
|
||||
Color Blind (CB)
|
||||
==========-----=
|
||||
We look at one special subnet that we are interested in for simplicty
|
||||
reasons to demonstrate the capability. We send the packets from that
|
||||
subnet to AF4*, BE or end up dropping depending on the metering results.
|
||||
|
||||
|
||||
The algorithm overview is as follows:
|
||||
|
||||
*classify:
|
||||
|
||||
**case: subnet X
|
||||
----------------
|
||||
if !exceed meter1 tag as AF41
|
||||
else
|
||||
if !exceed meter2 tag as AF42
|
||||
else
|
||||
if !exceed meter 3 tag as AF43
|
||||
else
|
||||
drop
|
||||
|
||||
default case: Any other subnet
|
||||
-------------------------------
|
||||
if !exceed meter 5 tag as AF43
|
||||
else
|
||||
drop
|
||||
|
||||
|
||||
One Egress side change the DSCPs of the packets to reflect AF4* and BE
|
||||
based on the tags from the ingress.
|
||||
|
||||
-------------------------------------------------------------
|
||||
|
||||
Color Aware
|
||||
===========
|
||||
|
||||
Define some meters with + policing and give them IDs eg
|
||||
|
||||
meter1=police index 1 rate $CIR1 burst $CBS1
|
||||
meter2=police index 2 rate $CIR2 burst $CBS2 etc
|
||||
|
||||
General overview:
|
||||
classify based on the DSCPs and use the policer ids to decide tagging
|
||||
|
||||
|
||||
*classify on ingress:
|
||||
|
||||
switch (dscp) {
|
||||
case AF41: /* tos&0xfc == 0x88 */
|
||||
if (!exceed meter1) break;
|
||||
case AF42: /* tos&0xfc == 0x90 */
|
||||
if (!exceed meter2) {
|
||||
tag as AF42;
|
||||
break;
|
||||
}
|
||||
case AF43: /* tos&0xfc == 0x98 */
|
||||
if (!exceed meter3) {
|
||||
tag as AF43;
|
||||
break;
|
||||
} else
|
||||
drop;
|
||||
default:
|
||||
if (!exceed meter4) tag as BE;
|
||||
else drop;
|
||||
}
|
||||
|
||||
On the Egress side mark the proper AF tags
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
#
|
||||
# AF using CBQ for a single interface eth0
|
||||
# 4 AF classes using GRED and one BE using RED
|
||||
# Things you might want to change:
|
||||
# - the device bandwidth (set at 10Mbits)
|
||||
# - the bandwidth allocated for each AF class and the BE class
|
||||
# - the drop probability associated with each AF virtual queue
|
||||
#
|
||||
# AF DSCP values used (based on AF draft 04)
|
||||
# -----------------------------------------
|
||||
# AF DSCP values
|
||||
# AF1 1. 0x0a 2. 0x0c 3. 0x0e
|
||||
# AF2 1. 0x12 2. 0x14 3. 0x16
|
||||
# AF3 1. 0x1a 2. 0x1c 3. 0x1e
|
||||
# AF4 1. 0x22 2. 0x24 3. 0x26
|
||||
|
||||
#
|
||||
#
|
||||
# A simple DSCP-class relationship formula used to generate
|
||||
# values in the for loop of this script; $drop stands for the
|
||||
# DP
|
||||
# $dscp = ($class*8+$drop*2)
|
||||
#
|
||||
# if you use GRIO buffer sharing, then GRED priority is set as follows:
|
||||
# $gprio=$drop+1;
|
||||
#
|
||||
|
||||
$TC = "/usr/src/iproute2-current/tc/tc";
|
||||
$DEV = "dev lo";
|
||||
$DEV = "dev eth1";
|
||||
$DEV = "dev eth0";
|
||||
# the BE-class number
|
||||
$beclass = "5";
|
||||
|
||||
#GRIO buffer sharing on or off?
|
||||
$GRIO = "";
|
||||
$GRIO = "grio";
|
||||
# The bandwidth of your device
|
||||
$linerate="10Mbit";
|
||||
# The BE and AF rates
|
||||
%rate_table=();
|
||||
$berate="1500Kbit";
|
||||
$rate_table{"AF1rate"}="1500Kbit";
|
||||
$rate_table{"AF2rate"}="1500Kbit";
|
||||
$rate_table{"AF3rate"}="1500Kbit";
|
||||
$rate_table{"AF4rate"}="1500Kbit";
|
||||
#
|
||||
#
|
||||
#
|
||||
print "\n# --- General setup ---\n";
|
||||
print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
|
||||
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex mask 0xfc " .
|
||||
"shift 2 pass_on\n";
|
||||
#"shift 2\n";
|
||||
print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth $linerate ".
|
||||
"cell 8 avpkt 1000 mpu 64\n";
|
||||
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 tcindex ".
|
||||
"mask 0xf0 shift 4 pass_on\n";
|
||||
for $class (1..4) {
|
||||
print "\n# --- AF Class $class specific setup---\n";
|
||||
$AFrate=sprintf("AF%drate",$class);
|
||||
print "$TC class add $DEV parent 2:0 classid 2:$class cbq ".
|
||||
"bandwidth $linerate rate $rate_table{$AFrate} avpkt 1000 prio ".
|
||||
(6-$class)." bounded allot 1514 weight 1 maxburst 21\n";
|
||||
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle $class ".
|
||||
"tcindex classid 2:$class\n";
|
||||
print "$TC qdisc add $DEV parent 2:$class gred setup DPs 3 default 2 ".
|
||||
"$GRIO\n";
|
||||
#
|
||||
# per DP setup
|
||||
#
|
||||
for $drop (1..3) {
|
||||
print "\n# --- AF Class $class DP $drop---\n";
|
||||
$dscp = $class*8+$drop*2;
|
||||
$tcindex = sprintf("1%x%x",$class,$drop);
|
||||
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 ".
|
||||
"handle $dscp tcindex classid 1:$tcindex\n";
|
||||
$prob = $drop*0.02;
|
||||
if ($GRIO) {
|
||||
$gprio = $drop+1;
|
||||
print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
|
||||
"max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
|
||||
"probability $prob ".
|
||||
"prio $gprio\n";
|
||||
} else {
|
||||
print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
|
||||
"max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
|
||||
"probability $prob \n";
|
||||
}
|
||||
}
|
||||
}
|
||||
#
|
||||
#
|
||||
print "\n#------BE Queue setup------\n";
|
||||
print "$TC filter add $DEV parent 1:0 protocol ip prio 2 ".
|
||||
"handle 0 tcindex mask 0 classid 1:1\n";
|
||||
print "$TC class add $DEV parent 2:0 classid 2:$beclass cbq ".
|
||||
"bandwidth $linerate rate $berate avpkt 1000 prio 6 " .
|
||||
"bounded allot 1514 weight 1 maxburst 21 \n";
|
||||
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle 0 tcindex ".
|
||||
"classid 2:5\n";
|
||||
print "$TC qdisc add $DEV parent 2:5 red limit 60KB min 15KB max 45KB ".
|
||||
"burst 20 avpkt 1000 bandwidth $linerate probability 0.4\n";
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
#!/usr/bin/perl
|
||||
$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
|
||||
$DEV = "dev eth1";
|
||||
$efrate="1.5Mbit";
|
||||
$MTU="1.5kB";
|
||||
print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
|
||||
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
|
||||
"mask 0xfc shift 2\n";
|
||||
print "$TC qdisc add $DEV parent 1:0 handle 2:0 prio\n";
|
||||
#
|
||||
# EF class: Maximum about one MTU sized packet allowed on the queue
|
||||
#
|
||||
print "$TC qdisc add $DEV parent 2:1 tbf rate $efrate burst $MTU limit 1.6kB\n";
|
||||
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
|
||||
"handle 0x2e tcindex classid 2:1 pass_on\n";
|
||||
#
|
||||
# BE class
|
||||
#
|
||||
print "#BE class(2:2) \n";
|
||||
print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
|
||||
"min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
|
||||
"probability 0.4\n";
|
||||
#
|
||||
print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
|
||||
"handle 0 tcindex mask 0 classid 2:2 pass_on\n";
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
|
||||
$DEV = "dev eth1";
|
||||
print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
|
||||
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
|
||||
"mask 0xfc shift 2\n";
|
||||
print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth ".
|
||||
"10Mbit cell 8 avpkt 1000 mpu 64\n";
|
||||
#
|
||||
# EF class
|
||||
#
|
||||
print "$TC class add $DEV parent 2:0 classid 2:1 cbq bandwidth ".
|
||||
"10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated ".
|
||||
"allot 1514 weight 1 maxburst 10 \n";
|
||||
# packet fifo for EF?
|
||||
print "$TC qdisc add $DEV parent 2:1 pfifo limit 5\n";
|
||||
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
|
||||
"handle 0x2e tcindex classid 2:1 pass_on\n";
|
||||
#
|
||||
# BE class
|
||||
#
|
||||
print "#BE class(2:2) \n";
|
||||
print "$TC class add $DEV parent 2:0 classid 2:2 cbq bandwidth ".
|
||||
"10Mbit rate 5Mbit avpkt 1000 prio 7 allot 1514 weight 1 ".
|
||||
"maxburst 21 borrow split 2:0 defmap 0xffff \n";
|
||||
print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
|
||||
"min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
|
||||
"probability 0.4\n";
|
||||
print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
|
||||
"handle 0 tcindex mask 0 classid 2:2 pass_on\n";
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
|
||||
These were the tests done to validate the Diffserv scripts.
|
||||
This document will be updated continously. If you do more
|
||||
thorough validation testing please post the details to the
|
||||
diffserv mailing list.
|
||||
Nevertheless, these tests should serve for basic validation.
|
||||
|
||||
AFCBQ, EFCBQ, EFPRIO
|
||||
----------------------
|
||||
|
||||
generate all possible DSCPs and observe that they
|
||||
get sent to the proper classes. In the case of AF also
|
||||
to the correct Virtual Queues.
|
||||
|
||||
Edge1
|
||||
-----
|
||||
generate TOS values 0x0,0x10,0xbb each with IP addresses
|
||||
10.2.0.24 (mark 1), 10.2.0.3 (mark2) and 10.2.0.30 (mark 3)
|
||||
and observe that they get marked as expected.
|
||||
|
||||
Edge2
|
||||
-----
|
||||
|
||||
-Repeat the tests in Edge1
|
||||
-ftp with data direction from 10.2.0.2
|
||||
*observe that the metering/policing works correctly (and the marking
|
||||
as well). In this case the mark used will be 3
|
||||
|
||||
Edge31-cb-chains
|
||||
----------------
|
||||
|
||||
-ftp with data direction from 10.2.0.2
|
||||
|
||||
*observe that the metering/policing works correctly (and the marking
|
||||
as well). In this case the mark used will be 1.
|
||||
|
||||
Metering: The data throughput should not exceed 2*CIR1 + 2*CIR2
|
||||
which is roughly: 5mbps
|
||||
|
||||
Marking: the should be a variation of marked packets:
|
||||
AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
|
||||
|
||||
More tests required to see the interaction of several sources (other
|
||||
than subnet 10.2.0.0/24).
|
||||
|
||||
Edge31-ca-u32
|
||||
--------------
|
||||
|
||||
Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the
|
||||
discard port of 10.1.0.2 (behind eth1)
|
||||
|
||||
1) generate with src tos = 0x88
|
||||
Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
|
||||
approximately 5mbps
|
||||
Marking: Should vary between 0x88,0x90,0x98 and 0x0
|
||||
|
||||
2) generate with src tos = 0x90
|
||||
Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
|
||||
approximately 3.5mbps
|
||||
Marking: Should vary between 0x90,0x98 and 0x0
|
||||
|
||||
3) generate with src tos = 0x98
|
||||
Metering: Allocated throughput should not exceed CIR1 + CIR2
|
||||
approximately 2.5mbps
|
||||
Marking: Should vary between 0x98 and 0x0
|
||||
|
||||
4) generate with src tos any other than the above
|
||||
Metering: Allocated throughput should not exceed CIR1
|
||||
approximately 1.5mbps
|
||||
Marking: Should be consistent at 0x0
|
||||
|
||||
TODO: Testing on how each color shares when all 4 types of packets
|
||||
are going through the edge device
|
||||
|
||||
Edge32-cb-u32, Edge32-cb-chains
|
||||
-------------------------------
|
||||
|
||||
-ftp with data direction from 10.2.0.2
|
||||
|
||||
*observe that the metering/policing works correctly (and the marking
|
||||
as well).
|
||||
|
||||
Metering:
|
||||
The data throughput should not exceed 2*CIR1 + 2*CIR2
|
||||
+ 2*PIR2 + PIR1 for u32 which is roughly: 6mbps
|
||||
The data throughput should not exceed 2*CIR1 + 5*CIR2
|
||||
for chains which is roughly: 6mbps
|
||||
|
||||
Marking: the should be a variation of marked packets:
|
||||
AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
|
||||
|
||||
TODO:
|
||||
-More tests required to see the interaction of several sources (other
|
||||
than subnet 10.2.0.0/24).
|
||||
-More tests needed to capture stats on how many times the CIR was exceeded
|
||||
but the data was not remarked etc.
|
||||
|
||||
Edge32-ca-u32
|
||||
--------------
|
||||
|
||||
Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the
|
||||
discard port of 10.1.0.2 (behind eth1)
|
||||
|
||||
1) generate with src tos = 0x88
|
||||
Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
|
||||
+PIR1 -- approximately 4mbps
|
||||
Marking: Should vary between 0x88,0x90,0x98 and 0x0
|
||||
|
||||
2) generate with src tos = 0x90
|
||||
Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
|
||||
+ 2* PIR2 approximately 3mbps
|
||||
Marking: Should vary between 0x90,0x98 and 0x0
|
||||
|
||||
3) generate with src tos = 0x98
|
||||
Metering: Allocated throughput should not exceed PIR1+ CIR1 + CIR2
|
||||
approximately 2.5mbps
|
||||
Marking: Should vary between 0x98 and 0x0
|
||||
|
||||
4) generate with src tos any other than the above
|
||||
Metering: Allocated throughput should not exceed CIR1
|
||||
approximately 1mbps
|
||||
Marking: Should be consistent at 0x0
|
||||
|
||||
TODO: Testing on how each color shares when all 4 types of packets
|
||||
are going through the edge device
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Setup address label from /etc/gai.conf
|
||||
#
|
||||
# Written by YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>, 2010.
|
||||
#
|
||||
|
||||
IP=ip
|
||||
DEFAULT_GAICONF=/etc/gai.conf
|
||||
verbose=
|
||||
debug=
|
||||
|
||||
function run ()
|
||||
{
|
||||
if [ x"$verbose" != x"" ]; then
|
||||
echo "$@"
|
||||
fi
|
||||
if [ x"$debug" = x"" ]; then
|
||||
"$@"
|
||||
fi
|
||||
}
|
||||
|
||||
function do_load_config ()
|
||||
{
|
||||
file=$1; shift
|
||||
flush=1
|
||||
cat $file | while read command prefix label; do
|
||||
if [ x"$command" = x"#label" ]; then
|
||||
if [ ${flush} = 1 ]; then
|
||||
run ${IP} -6 addrlabel flush
|
||||
flush=0
|
||||
fi
|
||||
run ${IP} -6 addrlabel add prefix $prefix label $label
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function do_list_config ()
|
||||
{
|
||||
${IP} -6 addrlabel list | while read p pfx l lbl; do
|
||||
echo label ${pfx} ${lbl}
|
||||
done
|
||||
}
|
||||
|
||||
function help ()
|
||||
{
|
||||
echo "Usage: $0 [-v] {--list | --config [ ${DEFAULT_GAICONF} ] | --default}"
|
||||
exit 1
|
||||
}
|
||||
|
||||
TEMP=`getopt -o c::dlv -l config::,default,list,verbose -n gaiconf -- "$@"`
|
||||
|
||||
if [ $? != 0 ]; then
|
||||
echo "Terminating..." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TEMPFILE=`mktemp`
|
||||
|
||||
eval set -- "$TEMP"
|
||||
|
||||
while true ; do
|
||||
case "$1" in
|
||||
-c|--config)
|
||||
if [ x"$cmd" != x"" ]; then
|
||||
help
|
||||
fi
|
||||
case "$2" in
|
||||
"") gai_conf="${DEFAULT_GAICONF}"
|
||||
shift 2
|
||||
;;
|
||||
*) gai_conf="$2"
|
||||
shift 2
|
||||
esac
|
||||
cmd=config
|
||||
;;
|
||||
-d|--default)
|
||||
if [ x"$cmd" != x"" ]; then
|
||||
help
|
||||
fi
|
||||
gai_conf=${TEMPFILE}
|
||||
cmd=config
|
||||
;;
|
||||
-l|--list)
|
||||
if [ x"$cmd" != x"" ]; then
|
||||
help
|
||||
fi
|
||||
cmd=list
|
||||
shift
|
||||
;;
|
||||
-v)
|
||||
verbose=1
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift;
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error!" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
case "$cmd" in
|
||||
config)
|
||||
if [ x"$gai_conf" = x"${TEMPFILE}" ]; then
|
||||
sed -e 's/^[[:space:]]*//' <<END_OF_DEFAULT >${TEMPFILE}
|
||||
label ::1/128 0
|
||||
label ::/0 1
|
||||
label 2002::/16 2
|
||||
label ::/96 3
|
||||
label ::ffff:0:0/96 4
|
||||
label fec0::/10 5
|
||||
label fc00::/7 6
|
||||
label 2001:0::/32 7
|
||||
END_OF_DEFAULT
|
||||
fi
|
||||
do_load_config "$gai_conf"
|
||||
;;
|
||||
list)
|
||||
do_list_config
|
||||
;;
|
||||
*)
|
||||
help
|
||||
;;
|
||||
esac
|
||||
|
||||
rm -f "${TEMPFILE}"
|
||||
|
||||
exit 0
|
||||
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
GENLOBJ=genl.o
|
||||
|
||||
include ../config.mk
|
||||
include ../Config
|
||||
SHARED_LIBS ?= y
|
||||
|
||||
CFLAGS += -fno-strict-aliasing
|
||||
|
|
@ -21,7 +20,6 @@ endif
|
|||
all: genl
|
||||
|
||||
genl: $(GENLOBJ) $(LIBNETLINK) $(LIBUTIL) $(GENLLIB)
|
||||
$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
|
||||
|
||||
install: all
|
||||
install -m 0755 genl $(DESTDIR)$(SBINDIR)
|
||||
|
|
|
|||
169
genl/ctrl.c
169
genl/ctrl.c
|
|
@ -13,6 +13,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <syslog.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
|
|
@ -28,18 +29,89 @@
|
|||
static int usage(void)
|
||||
{
|
||||
fprintf(stderr,"Usage: ctrl <CMD>\n" \
|
||||
"CMD := get <PARMS> | list | monitor | policy <PARMS>\n" \
|
||||
"CMD := get <PARMS> | list | monitor\n" \
|
||||
"PARMS := name <name> | id <id>\n" \
|
||||
"Examples:\n" \
|
||||
"\tctrl ls\n" \
|
||||
"\tctrl monitor\n" \
|
||||
"\tctrl get name foobar\n" \
|
||||
"\tctrl get id 0xF\n"
|
||||
"\tctrl policy name foobar\n"
|
||||
"\tctrl policy id 0xF\n");
|
||||
"\tctrl get id 0xF\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int genl_ctrl_resolve_family(const char *family)
|
||||
{
|
||||
struct rtnl_handle rth;
|
||||
struct nlmsghdr *nlh;
|
||||
struct genlmsghdr *ghdr;
|
||||
int ret = 0;
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
char buf[4096];
|
||||
} req;
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
|
||||
nlh = &req.n;
|
||||
nlh->nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
|
||||
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
|
||||
nlh->nlmsg_type = GENL_ID_CTRL;
|
||||
|
||||
ghdr = NLMSG_DATA(&req.n);
|
||||
ghdr->cmd = CTRL_CMD_GETFAMILY;
|
||||
|
||||
if (rtnl_open_byproto(&rth, 0, NETLINK_GENERIC) < 0) {
|
||||
fprintf(stderr, "Cannot open generic netlink socket\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
addattr_l(nlh, 128, CTRL_ATTR_FAMILY_NAME, family, strlen(family) + 1);
|
||||
|
||||
if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) {
|
||||
fprintf(stderr, "Error talking to the kernel\n");
|
||||
goto errout;
|
||||
}
|
||||
|
||||
{
|
||||
struct rtattr *tb[CTRL_ATTR_MAX + 1];
|
||||
struct genlmsghdr *ghdr = NLMSG_DATA(nlh);
|
||||
int len = nlh->nlmsg_len;
|
||||
struct rtattr *attrs;
|
||||
|
||||
if (nlh->nlmsg_type != GENL_ID_CTRL) {
|
||||
fprintf(stderr, "Not a controller message, nlmsg_len=%d "
|
||||
"nlmsg_type=0x%x\n", nlh->nlmsg_len, nlh->nlmsg_type);
|
||||
goto errout;
|
||||
}
|
||||
|
||||
if (ghdr->cmd != CTRL_CMD_NEWFAMILY) {
|
||||
fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd);
|
||||
goto errout;
|
||||
}
|
||||
|
||||
len -= NLMSG_LENGTH(GENL_HDRLEN);
|
||||
|
||||
if (len < 0) {
|
||||
fprintf(stderr, "wrong controller message len %d\n", len);
|
||||
return -1;
|
||||
}
|
||||
|
||||
attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
|
||||
parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len);
|
||||
|
||||
if (tb[CTRL_ATTR_FAMILY_ID] == NULL) {
|
||||
fprintf(stderr, "Missing family id TLV\n");
|
||||
goto errout;
|
||||
}
|
||||
|
||||
ret = rta_getattr_u16(tb[CTRL_ATTR_FAMILY_ID]);
|
||||
}
|
||||
|
||||
errout:
|
||||
rtnl_close(&rth);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void print_ctrl_cmd_flags(FILE *fp, __u32 fl)
|
||||
{
|
||||
fprintf(fp, "\n\t\tCapabilities (0x%x):\n ", fl);
|
||||
|
|
@ -60,7 +132,7 @@ static void print_ctrl_cmd_flags(FILE *fp, __u32 fl)
|
|||
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
|
||||
static int print_ctrl_cmds(FILE *fp, struct rtattr *arg, __u32 ctrl_ver)
|
||||
{
|
||||
struct rtattr *tb[CTRL_ATTR_OP_MAX + 1];
|
||||
|
|
@ -105,8 +177,8 @@ static int print_ctrl_grp(FILE *fp, struct rtattr *arg, __u32 ctrl_ver)
|
|||
/*
|
||||
* The controller sends one nlmsg per family
|
||||
*/
|
||||
static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
||||
struct nlmsghdr *n, void *arg)
|
||||
static int print_ctrl(const struct sockaddr_nl *who, struct nlmsghdr *n,
|
||||
void *arg)
|
||||
{
|
||||
struct rtattr *tb[CTRL_ATTR_MAX + 1];
|
||||
struct genlmsghdr *ghdr = NLMSG_DATA(n);
|
||||
|
|
@ -125,8 +197,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
|||
ghdr->cmd != CTRL_CMD_DELFAMILY &&
|
||||
ghdr->cmd != CTRL_CMD_NEWFAMILY &&
|
||||
ghdr->cmd != CTRL_CMD_NEWMCAST_GRP &&
|
||||
ghdr->cmd != CTRL_CMD_DELMCAST_GRP &&
|
||||
ghdr->cmd != CTRL_CMD_GETPOLICY) {
|
||||
ghdr->cmd != CTRL_CMD_DELMCAST_GRP) {
|
||||
fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -139,7 +210,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
|||
}
|
||||
|
||||
attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
|
||||
parse_rtattr_flags(tb, CTRL_ATTR_MAX, attrs, len, NLA_F_NESTED);
|
||||
parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len);
|
||||
|
||||
if (tb[CTRL_ATTR_FAMILY_NAME]) {
|
||||
char *name = RTA_DATA(tb[CTRL_ATTR_FAMILY_NAME]);
|
||||
|
|
@ -162,36 +233,6 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
|||
__u32 *ma = RTA_DATA(tb[CTRL_ATTR_MAXATTR]);
|
||||
fprintf(fp, " max attribs: %d ",*ma);
|
||||
}
|
||||
if (tb[CTRL_ATTR_OP_POLICY]) {
|
||||
const struct rtattr *pos;
|
||||
|
||||
rtattr_for_each_nested(pos, tb[CTRL_ATTR_OP_POLICY]) {
|
||||
struct rtattr *ptb[CTRL_ATTR_POLICY_DUMP_MAX + 1];
|
||||
struct rtattr *pattrs = RTA_DATA(pos);
|
||||
int plen = RTA_PAYLOAD(pos);
|
||||
|
||||
parse_rtattr_flags(ptb, CTRL_ATTR_POLICY_DUMP_MAX,
|
||||
pattrs, plen, NLA_F_NESTED);
|
||||
|
||||
fprintf(fp, " op %d policies:",
|
||||
pos->rta_type & ~NLA_F_NESTED);
|
||||
|
||||
if (ptb[CTRL_ATTR_POLICY_DO]) {
|
||||
__u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DO]);
|
||||
|
||||
fprintf(fp, " do=%d", *v);
|
||||
}
|
||||
|
||||
if (ptb[CTRL_ATTR_POLICY_DUMP]) {
|
||||
__u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DUMP]);
|
||||
|
||||
fprintf(fp, " dump=%d", *v);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (tb[CTRL_ATTR_POLICY])
|
||||
nl_print_policy(tb[CTRL_ATTR_POLICY], fp);
|
||||
|
||||
/* end of family definitions .. */
|
||||
fprintf(fp,"\n");
|
||||
if (tb[CTRL_ATTR_OPS]) {
|
||||
|
|
@ -240,37 +281,34 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int print_ctrl2(struct nlmsghdr *n, void *arg)
|
||||
{
|
||||
return print_ctrl(NULL, n, arg);
|
||||
}
|
||||
|
||||
static int ctrl_list(int cmd, int argc, char **argv)
|
||||
{
|
||||
struct rtnl_handle rth;
|
||||
struct nlmsghdr *nlh;
|
||||
struct genlmsghdr *ghdr;
|
||||
int ret = -1;
|
||||
char d[GENL_NAMSIZ];
|
||||
struct {
|
||||
struct nlmsghdr n;
|
||||
struct genlmsghdr g;
|
||||
char buf[4096];
|
||||
} req = {
|
||||
.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN),
|
||||
.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
|
||||
.n.nlmsg_type = GENL_ID_CTRL,
|
||||
.g.cmd = CTRL_CMD_GETFAMILY,
|
||||
};
|
||||
struct nlmsghdr *nlh = &req.n;
|
||||
struct nlmsghdr *answer = NULL;
|
||||
} req;
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
|
||||
nlh = &req.n;
|
||||
nlh->nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
|
||||
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
|
||||
nlh->nlmsg_type = GENL_ID_CTRL;
|
||||
|
||||
ghdr = NLMSG_DATA(&req.n);
|
||||
ghdr->cmd = CTRL_CMD_GETFAMILY;
|
||||
|
||||
if (rtnl_open_byproto(&rth, 0, NETLINK_GENERIC) < 0) {
|
||||
fprintf(stderr, "Cannot open generic netlink socket\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (cmd == CTRL_CMD_GETFAMILY || cmd == CTRL_CMD_GETPOLICY) {
|
||||
req.g.cmd = cmd;
|
||||
|
||||
if (cmd == CTRL_CMD_GETFAMILY) {
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "Wrong number of params\n");
|
||||
return -1;
|
||||
|
|
@ -278,7 +316,7 @@ static int ctrl_list(int cmd, int argc, char **argv)
|
|||
|
||||
if (matches(*argv, "name") == 0) {
|
||||
NEXT_ARG();
|
||||
strlcpy(d, *argv, sizeof(d));
|
||||
strncpy(d, *argv, sizeof (d) - 1);
|
||||
addattr_l(nlh, 128, CTRL_ATTR_FAMILY_NAME,
|
||||
d, strlen(d) + 1);
|
||||
} else if (matches(*argv, "id") == 0) {
|
||||
|
|
@ -295,22 +333,20 @@ static int ctrl_list(int cmd, int argc, char **argv)
|
|||
fprintf(stderr, "Wrong params\n");
|
||||
goto ctrl_done;
|
||||
}
|
||||
}
|
||||
|
||||
if (cmd == CTRL_CMD_GETFAMILY) {
|
||||
if (rtnl_talk(&rth, nlh, &answer) < 0) {
|
||||
if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) {
|
||||
fprintf(stderr, "Error talking to the kernel\n");
|
||||
goto ctrl_done;
|
||||
}
|
||||
|
||||
if (print_ctrl2(answer, (void *) stdout) < 0) {
|
||||
if (print_ctrl(NULL, nlh, (void *) stdout) < 0) {
|
||||
fprintf(stderr, "Dump terminated\n");
|
||||
goto ctrl_done;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (cmd == CTRL_CMD_UNSPEC || cmd == CTRL_CMD_GETPOLICY) {
|
||||
if (cmd == CTRL_CMD_UNSPEC) {
|
||||
nlh->nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
|
||||
nlh->nlmsg_seq = rth.dump = ++rth.seq;
|
||||
|
||||
|
|
@ -319,13 +355,12 @@ static int ctrl_list(int cmd, int argc, char **argv)
|
|||
goto ctrl_done;
|
||||
}
|
||||
|
||||
rtnl_dump_filter(&rth, print_ctrl2, stdout);
|
||||
rtnl_dump_filter(&rth, print_ctrl, stdout);
|
||||
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
ctrl_done:
|
||||
free(answer);
|
||||
rtnl_close(&rth);
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -361,8 +396,6 @@ static int parse_ctrl(struct genl_util *a, int argc, char **argv)
|
|||
matches(*argv, "show") == 0 ||
|
||||
matches(*argv, "lst") == 0)
|
||||
return ctrl_list(CTRL_CMD_UNSPEC, argc-1, argv+1);
|
||||
if (matches(*argv, "policy") == 0)
|
||||
return ctrl_list(CTRL_CMD_GETPOLICY, argc-1, argv+1);
|
||||
if (matches(*argv, "help") == 0)
|
||||
return usage();
|
||||
|
||||
|
|
@ -375,5 +408,5 @@ static int parse_ctrl(struct genl_util *a, int argc, char **argv)
|
|||
struct genl_util ctrl_genl_util = {
|
||||
.name = "ctrl",
|
||||
.parse_genlopt = parse_ctrl,
|
||||
.print_genlopt = print_ctrl2,
|
||||
.print_genlopt = print_ctrl,
|
||||
};
|
||||
|
|
|
|||
44
genl/genl.c
44
genl/genl.c
|
|
@ -13,6 +13,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <syslog.h>
|
||||
#include <fcntl.h>
|
||||
#include <dlfcn.h>
|
||||
#include <sys/socket.h>
|
||||
|
|
@ -22,19 +23,21 @@
|
|||
#include <errno.h>
|
||||
#include <linux/netlink.h>
|
||||
#include <linux/rtnetlink.h> /* until we put our own header */
|
||||
#include "version.h"
|
||||
#include "SNAPSHOT.h"
|
||||
#include "utils.h"
|
||||
#include "genl_utils.h"
|
||||
|
||||
int show_stats;
|
||||
int show_details;
|
||||
int show_raw;
|
||||
int show_stats = 0;
|
||||
int show_details = 0;
|
||||
int show_raw = 0;
|
||||
int resolve_hosts = 0;
|
||||
|
||||
static void *BODY;
|
||||
static struct genl_util *genl_list;
|
||||
static struct genl_util * genl_list;
|
||||
|
||||
|
||||
static int print_nofopt(struct nlmsghdr *n, void *arg)
|
||||
static int print_nofopt(const struct sockaddr_nl *who, struct nlmsghdr *n,
|
||||
void *arg)
|
||||
{
|
||||
fprintf((FILE *) arg, "unknown genl type ..\n");
|
||||
return 0;
|
||||
|
|
@ -43,16 +46,15 @@ static int print_nofopt(struct nlmsghdr *n, void *arg)
|
|||
static int parse_nofopt(struct genl_util *f, int argc, char **argv)
|
||||
{
|
||||
if (argc) {
|
||||
fprintf(stderr,
|
||||
"Unknown genl \"%s\", hence option \"%s\" is unparsable\n",
|
||||
f->name, *argv);
|
||||
fprintf(stderr, "Unknown genl \"%s\", hence option \"%s\" "
|
||||
"is unparsable\n", f->name, *argv);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct genl_util *get_genl_kind(const char *str)
|
||||
static struct genl_util *get_genl_kind(char *str)
|
||||
{
|
||||
void *dlh;
|
||||
char buf[256];
|
||||
|
|
@ -84,8 +86,9 @@ reg:
|
|||
return f;
|
||||
|
||||
noexist:
|
||||
f = calloc(1, sizeof(*f));
|
||||
f = malloc(sizeof(*f));
|
||||
if (f) {
|
||||
memset(f, 0, sizeof(*f));
|
||||
strncpy(f->name, str, 15);
|
||||
f->parse_genlopt = parse_nofopt;
|
||||
f->print_genlopt = print_nofopt;
|
||||
|
|
@ -98,10 +101,9 @@ static void usage(void) __attribute__((noreturn));
|
|||
|
||||
static void usage(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: genl [ OPTIONS ] OBJECT [help] }\n"
|
||||
"where OBJECT := { ctrl etc }\n"
|
||||
" OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -V[ersion] | -h[elp] }\n");
|
||||
fprintf(stderr, "Usage: genl [ OPTIONS ] OBJECT | help }\n"
|
||||
"where OBJECT := { ctrl etc }\n"
|
||||
" OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] }\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
|
|
@ -118,26 +120,24 @@ int main(int argc, char **argv)
|
|||
} else if (matches(argv[1], "-raw") == 0) {
|
||||
++show_raw;
|
||||
} else if (matches(argv[1], "-Version") == 0) {
|
||||
printf("genl utility, iproute2-%s\n", version);
|
||||
printf("genl utility, iproute2-ss%s\n", SNAPSHOT);
|
||||
exit(0);
|
||||
} else if (matches(argv[1], "-help") == 0) {
|
||||
usage();
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"Option \"%s\" is unknown, try \"genl -help\".\n",
|
||||
argv[1]);
|
||||
fprintf(stderr, "Option \"%s\" is unknown, try "
|
||||
"\"genl -help\".\n", argv[1]);
|
||||
exit(-1);
|
||||
}
|
||||
argc--; argv++;
|
||||
}
|
||||
|
||||
if (argc > 1) {
|
||||
struct genl_util *a;
|
||||
int ret;
|
||||
|
||||
struct genl_util *a = NULL;
|
||||
a = get_genl_kind(argv[1]);
|
||||
if (!a) {
|
||||
fprintf(stderr, "bad genl %s\n", argv[1]);
|
||||
fprintf(stderr,"bad genl %s\n", argv[1]);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,17 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _TC_UTIL_H_
|
||||
#define _TC_UTIL_H_ 1
|
||||
|
||||
#include <linux/genetlink.h>
|
||||
#include "utils.h"
|
||||
#include "linux/genetlink.h"
|
||||
|
||||
struct genl_util {
|
||||
struct genl_util
|
||||
{
|
||||
struct genl_util *next;
|
||||
char name[16];
|
||||
int (*parse_genlopt)(struct genl_util *fu, int argc, char **argv);
|
||||
int (*print_genlopt)(struct nlmsghdr *n, void *arg);
|
||||
int (*print_genlopt)(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
|
||||
};
|
||||
|
||||
extern int genl_ctrl_resolve_family(const char *family);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* This file creates a dummy version of dynamic loading
|
||||
* for environments where dynamic linking
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
static const char SNAPSHOT[] = "150706";
|
||||
|
|
@ -1,275 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __BPF_API__
|
||||
#define __BPF_API__
|
||||
|
||||
/* Note:
|
||||
*
|
||||
* This file can be included into eBPF kernel programs. It contains
|
||||
* a couple of useful helper functions, map/section ABI (bpf_elf.h),
|
||||
* misc macros and some eBPF specific LLVM built-ins.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <linux/pkt_cls.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
#include "bpf_elf.h"
|
||||
|
||||
/** libbpf pin type. */
|
||||
enum libbpf_pin_type {
|
||||
LIBBPF_PIN_NONE,
|
||||
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
|
||||
LIBBPF_PIN_BY_NAME,
|
||||
};
|
||||
|
||||
/** Type helper macros. */
|
||||
|
||||
#define __uint(name, val) int (*name)[val]
|
||||
#define __type(name, val) typeof(val) *name
|
||||
#define __array(name, val) typeof(val) *name[]
|
||||
|
||||
/** Misc macros. */
|
||||
|
||||
#ifndef __stringify
|
||||
# define __stringify(X) #X
|
||||
#endif
|
||||
|
||||
#ifndef __maybe_unused
|
||||
# define __maybe_unused __attribute__((__unused__))
|
||||
#endif
|
||||
|
||||
#ifndef offsetof
|
||||
# define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
|
||||
#endif
|
||||
|
||||
#ifndef likely
|
||||
# define likely(X) __builtin_expect(!!(X), 1)
|
||||
#endif
|
||||
|
||||
#ifndef unlikely
|
||||
# define unlikely(X) __builtin_expect(!!(X), 0)
|
||||
#endif
|
||||
|
||||
#ifndef htons
|
||||
# define htons(X) __constant_htons((X))
|
||||
#endif
|
||||
|
||||
#ifndef ntohs
|
||||
# define ntohs(X) __constant_ntohs((X))
|
||||
#endif
|
||||
|
||||
#ifndef htonl
|
||||
# define htonl(X) __constant_htonl((X))
|
||||
#endif
|
||||
|
||||
#ifndef ntohl
|
||||
# define ntohl(X) __constant_ntohl((X))
|
||||
#endif
|
||||
|
||||
#ifndef __inline__
|
||||
# define __inline__ __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
/** Section helper macros. */
|
||||
|
||||
#ifndef __section
|
||||
# define __section(NAME) \
|
||||
__attribute__((section(NAME), used))
|
||||
#endif
|
||||
|
||||
#ifndef __section_tail
|
||||
# define __section_tail(ID, KEY) \
|
||||
__section(__stringify(ID) "/" __stringify(KEY))
|
||||
#endif
|
||||
|
||||
#ifndef __section_xdp_entry
|
||||
# define __section_xdp_entry \
|
||||
__section(ELF_SECTION_PROG)
|
||||
#endif
|
||||
|
||||
#ifndef __section_cls_entry
|
||||
# define __section_cls_entry \
|
||||
__section(ELF_SECTION_CLASSIFIER)
|
||||
#endif
|
||||
|
||||
#ifndef __section_act_entry
|
||||
# define __section_act_entry \
|
||||
__section(ELF_SECTION_ACTION)
|
||||
#endif
|
||||
|
||||
#ifndef __section_lwt_entry
|
||||
# define __section_lwt_entry \
|
||||
__section(ELF_SECTION_PROG)
|
||||
#endif
|
||||
|
||||
#ifndef __section_license
|
||||
# define __section_license \
|
||||
__section(ELF_SECTION_LICENSE)
|
||||
#endif
|
||||
|
||||
#ifndef __section_maps
|
||||
# define __section_maps \
|
||||
__section(ELF_SECTION_MAPS)
|
||||
#endif
|
||||
|
||||
/** Declaration helper macros. */
|
||||
|
||||
#ifndef BPF_LICENSE
|
||||
# define BPF_LICENSE(NAME) \
|
||||
char ____license[] __section_license = NAME
|
||||
#endif
|
||||
|
||||
/** Classifier helper */
|
||||
|
||||
#ifndef BPF_H_DEFAULT
|
||||
# define BPF_H_DEFAULT -1
|
||||
#endif
|
||||
|
||||
/** BPF helper functions for tc. Individual flags are in linux/bpf.h */
|
||||
|
||||
#ifndef __BPF_FUNC
|
||||
# define __BPF_FUNC(NAME, ...) \
|
||||
(* NAME)(__VA_ARGS__) __maybe_unused
|
||||
#endif
|
||||
|
||||
#ifndef BPF_FUNC
|
||||
# define BPF_FUNC(NAME, ...) \
|
||||
__BPF_FUNC(NAME, __VA_ARGS__) = (void *) BPF_FUNC_##NAME
|
||||
#endif
|
||||
|
||||
/* Map access/manipulation */
|
||||
static void *BPF_FUNC(map_lookup_elem, void *map, const void *key);
|
||||
static int BPF_FUNC(map_update_elem, void *map, const void *key,
|
||||
const void *value, uint32_t flags);
|
||||
static int BPF_FUNC(map_delete_elem, void *map, const void *key);
|
||||
|
||||
/* Time access */
|
||||
static uint64_t BPF_FUNC(ktime_get_ns);
|
||||
|
||||
/* Debugging */
|
||||
|
||||
/* FIXME: __attribute__ ((format(printf, 1, 3))) not possible unless
|
||||
* llvm bug https://llvm.org/bugs/show_bug.cgi?id=26243 gets resolved.
|
||||
* It would require ____fmt to be made const, which generates a reloc
|
||||
* entry (non-map).
|
||||
*/
|
||||
static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...);
|
||||
|
||||
#ifndef printt
|
||||
# define printt(fmt, ...) \
|
||||
({ \
|
||||
char ____fmt[] = fmt; \
|
||||
trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \
|
||||
})
|
||||
#endif
|
||||
|
||||
/* Random numbers */
|
||||
static uint32_t BPF_FUNC(get_prandom_u32);
|
||||
|
||||
/* Tail calls */
|
||||
static void BPF_FUNC(tail_call, struct __sk_buff *skb, void *map,
|
||||
uint32_t index);
|
||||
|
||||
/* System helpers */
|
||||
static uint32_t BPF_FUNC(get_smp_processor_id);
|
||||
static uint32_t BPF_FUNC(get_numa_node_id);
|
||||
|
||||
/* Packet misc meta data */
|
||||
static uint32_t BPF_FUNC(get_cgroup_classid, struct __sk_buff *skb);
|
||||
static int BPF_FUNC(skb_under_cgroup, void *map, uint32_t index);
|
||||
|
||||
static uint32_t BPF_FUNC(get_route_realm, struct __sk_buff *skb);
|
||||
static uint32_t BPF_FUNC(get_hash_recalc, struct __sk_buff *skb);
|
||||
static uint32_t BPF_FUNC(set_hash_invalid, struct __sk_buff *skb);
|
||||
|
||||
/* Packet redirection */
|
||||
static int BPF_FUNC(redirect, int ifindex, uint32_t flags);
|
||||
static int BPF_FUNC(clone_redirect, struct __sk_buff *skb, int ifindex,
|
||||
uint32_t flags);
|
||||
|
||||
/* Packet manipulation */
|
||||
static int BPF_FUNC(skb_load_bytes, struct __sk_buff *skb, uint32_t off,
|
||||
void *to, uint32_t len);
|
||||
static int BPF_FUNC(skb_store_bytes, struct __sk_buff *skb, uint32_t off,
|
||||
const void *from, uint32_t len, uint32_t flags);
|
||||
|
||||
static int BPF_FUNC(l3_csum_replace, struct __sk_buff *skb, uint32_t off,
|
||||
uint32_t from, uint32_t to, uint32_t flags);
|
||||
static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off,
|
||||
uint32_t from, uint32_t to, uint32_t flags);
|
||||
static int BPF_FUNC(csum_diff, const void *from, uint32_t from_size,
|
||||
const void *to, uint32_t to_size, uint32_t seed);
|
||||
static int BPF_FUNC(csum_update, struct __sk_buff *skb, uint32_t wsum);
|
||||
|
||||
static int BPF_FUNC(skb_change_type, struct __sk_buff *skb, uint32_t type);
|
||||
static int BPF_FUNC(skb_change_proto, struct __sk_buff *skb, uint32_t proto,
|
||||
uint32_t flags);
|
||||
static int BPF_FUNC(skb_change_tail, struct __sk_buff *skb, uint32_t nlen,
|
||||
uint32_t flags);
|
||||
|
||||
static int BPF_FUNC(skb_pull_data, struct __sk_buff *skb, uint32_t len);
|
||||
|
||||
/* Event notification */
|
||||
static int __BPF_FUNC(skb_event_output, struct __sk_buff *skb, void *map,
|
||||
uint64_t index, const void *data, uint32_t size) =
|
||||
(void *) BPF_FUNC_perf_event_output;
|
||||
|
||||
/* Packet vlan encap/decap */
|
||||
static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto,
|
||||
uint16_t vlan_tci);
|
||||
static int BPF_FUNC(skb_vlan_pop, struct __sk_buff *skb);
|
||||
|
||||
/* Packet tunnel encap/decap */
|
||||
static int BPF_FUNC(skb_get_tunnel_key, struct __sk_buff *skb,
|
||||
struct bpf_tunnel_key *to, uint32_t size, uint32_t flags);
|
||||
static int BPF_FUNC(skb_set_tunnel_key, struct __sk_buff *skb,
|
||||
const struct bpf_tunnel_key *from, uint32_t size,
|
||||
uint32_t flags);
|
||||
|
||||
static int BPF_FUNC(skb_get_tunnel_opt, struct __sk_buff *skb,
|
||||
void *to, uint32_t size);
|
||||
static int BPF_FUNC(skb_set_tunnel_opt, struct __sk_buff *skb,
|
||||
const void *from, uint32_t size);
|
||||
|
||||
/** LLVM built-ins, mem*() routines work for constant size */
|
||||
|
||||
#ifndef lock_xadd
|
||||
# define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val))
|
||||
#endif
|
||||
|
||||
#ifndef memset
|
||||
# define memset(s, c, n) __builtin_memset((s), (c), (n))
|
||||
#endif
|
||||
|
||||
#ifndef memcpy
|
||||
# define memcpy(d, s, n) __builtin_memcpy((d), (s), (n))
|
||||
#endif
|
||||
|
||||
#ifndef memmove
|
||||
# define memmove(d, s, n) __builtin_memmove((d), (s), (n))
|
||||
#endif
|
||||
|
||||
/* FIXME: __builtin_memcmp() is not yet fully useable unless llvm bug
|
||||
* https://llvm.org/bugs/show_bug.cgi?id=26218 gets resolved. Also
|
||||
* this one would generate a reloc entry (non-map), otherwise.
|
||||
*/
|
||||
#if 0
|
||||
#ifndef memcmp
|
||||
# define memcmp(a, b, n) __builtin_memcmp((a), (b), (n))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
unsigned long long load_byte(void *skb, unsigned long long off)
|
||||
asm ("llvm.bpf.load.byte");
|
||||
|
||||
unsigned long long load_half(void *skb, unsigned long long off)
|
||||
asm ("llvm.bpf.load.half");
|
||||
|
||||
unsigned long long load_word(void *skb, unsigned long long off)
|
||||
asm ("llvm.bpf.load.word");
|
||||
|
||||
#endif /* __BPF_API__ */
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __BPF_ELF__
|
||||
#define __BPF_ELF__
|
||||
|
||||
|
|
@ -16,38 +15,19 @@
|
|||
/* ELF section names, etc */
|
||||
#define ELF_SECTION_LICENSE "license"
|
||||
#define ELF_SECTION_MAPS "maps"
|
||||
#define ELF_SECTION_PROG "prog"
|
||||
#define ELF_SECTION_CLASSIFIER "classifier"
|
||||
#define ELF_SECTION_ACTION "action"
|
||||
|
||||
#define ELF_MAX_MAPS 64
|
||||
#define ELF_MAX_LICENSE_LEN 128
|
||||
|
||||
/* Object pinning settings */
|
||||
#define PIN_NONE 0
|
||||
#define PIN_OBJECT_NS 1
|
||||
#define PIN_GLOBAL_NS 2
|
||||
|
||||
/* ELF map definition */
|
||||
struct bpf_elf_map {
|
||||
__u32 type;
|
||||
__u32 size_key;
|
||||
__u32 size_value;
|
||||
__u32 max_elem;
|
||||
__u32 flags;
|
||||
__u32 id;
|
||||
__u32 pinning;
|
||||
__u32 inner_id;
|
||||
__u32 inner_idx;
|
||||
};
|
||||
|
||||
#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
|
||||
struct ____btf_map_##name { \
|
||||
type_key key; \
|
||||
type_val value; \
|
||||
}; \
|
||||
struct ____btf_map_##name \
|
||||
__attribute__ ((section(".maps." #name), used)) \
|
||||
____btf_map_##name = { }
|
||||
|
||||
#endif /* __BPF_ELF__ */
|
||||
|
|
|
|||
|
|
@ -1,10 +1,8 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __BPF_SCM__
|
||||
#define __BPF_SCM__
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "bpf_elf.h"
|
||||
|
|
|
|||
|
|
@ -1,327 +0,0 @@
|
|||
/*
|
||||
* bpf_util.h BPF common code
|
||||
*
|
||||
* This program is free software; you can distribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Authors: Daniel Borkmann <daniel@iogearbox.net>
|
||||
* Jiri Pirko <jiri@resnulli.us>
|
||||
*/
|
||||
|
||||
#ifndef __BPF_UTIL__
|
||||
#define __BPF_UTIL__
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/elf-em.h>
|
||||
#include <linux/if_alg.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "bpf_scm.h"
|
||||
|
||||
#define BPF_ENV_UDS "TC_BPF_UDS"
|
||||
#define BPF_ENV_MNT "TC_BPF_MNT"
|
||||
|
||||
#ifndef BPF_MAX_LOG
|
||||
# define BPF_MAX_LOG 4096
|
||||
#endif
|
||||
|
||||
#define BPF_DIR_GLOBALS "globals"
|
||||
|
||||
#ifndef BPF_FS_MAGIC
|
||||
# define BPF_FS_MAGIC 0xcafe4a11
|
||||
#endif
|
||||
|
||||
#define BPF_DIR_MNT "/sys/fs/bpf"
|
||||
|
||||
#ifndef TRACEFS_MAGIC
|
||||
# define TRACEFS_MAGIC 0x74726163
|
||||
#endif
|
||||
|
||||
#define TRACE_DIR_MNT "/sys/kernel/tracing"
|
||||
|
||||
#ifndef AF_ALG
|
||||
# define AF_ALG 38
|
||||
#endif
|
||||
|
||||
#ifndef EM_BPF
|
||||
# define EM_BPF 247
|
||||
#endif
|
||||
|
||||
struct bpf_cfg_ops {
|
||||
void (*cbpf_cb)(void *nl, const struct sock_filter *ops, int ops_len);
|
||||
void (*ebpf_cb)(void *nl, int fd, const char *annotation);
|
||||
};
|
||||
|
||||
enum bpf_mode {
|
||||
CBPF_BYTECODE,
|
||||
CBPF_FILE,
|
||||
EBPF_OBJECT,
|
||||
EBPF_PINNED,
|
||||
BPF_MODE_MAX,
|
||||
};
|
||||
|
||||
struct bpf_cfg_in {
|
||||
const char *object;
|
||||
const char *section;
|
||||
const char *uds;
|
||||
enum bpf_prog_type type;
|
||||
enum bpf_mode mode;
|
||||
__u32 ifindex;
|
||||
bool verbose;
|
||||
int argc;
|
||||
char **argv;
|
||||
struct sock_filter opcodes[BPF_MAXINSNS];
|
||||
union {
|
||||
int n_opcodes;
|
||||
int prog_fd;
|
||||
};
|
||||
};
|
||||
|
||||
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
|
||||
|
||||
#define BPF_ALU64_REG(OP, DST, SRC) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
#define BPF_ALU32_REG(OP, DST, SRC) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU | BPF_OP(OP) | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
|
||||
|
||||
#define BPF_ALU64_IMM(OP, DST, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
#define BPF_ALU32_IMM(OP, DST, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU | BPF_OP(OP) | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Short form of mov, dst_reg = src_reg */
|
||||
|
||||
#define BPF_MOV64_REG(DST, SRC) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU64 | BPF_MOV | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
#define BPF_MOV32_REG(DST, SRC) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU | BPF_MOV | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
/* Short form of mov, dst_reg = imm32 */
|
||||
|
||||
#define BPF_MOV64_IMM(DST, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU64 | BPF_MOV | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
#define BPF_MOV32_IMM(DST, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU | BPF_MOV | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
|
||||
#define BPF_LD_IMM64(DST, IMM) \
|
||||
BPF_LD_IMM64_RAW(DST, 0, IMM)
|
||||
|
||||
#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_LD | BPF_DW | BPF_IMM, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = (__u32) (IMM) }), \
|
||||
((struct bpf_insn) { \
|
||||
.code = 0, /* zero is reserved opcode */ \
|
||||
.dst_reg = 0, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = ((__u64) (IMM)) >> 32 })
|
||||
|
||||
#ifndef BPF_PSEUDO_MAP_FD
|
||||
# define BPF_PSEUDO_MAP_FD 1
|
||||
#endif
|
||||
|
||||
/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
|
||||
#define BPF_LD_MAP_FD(DST, MAP_FD) \
|
||||
BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
|
||||
|
||||
|
||||
/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
|
||||
|
||||
#define BPF_LD_ABS(SIZE, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
|
||||
.dst_reg = 0, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
|
||||
|
||||
#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = 0 })
|
||||
|
||||
/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
|
||||
|
||||
#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = 0 })
|
||||
|
||||
/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
|
||||
|
||||
#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = OFF, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
|
||||
|
||||
#define BPF_JMP_REG(OP, DST, SRC, OFF) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_JMP | BPF_OP(OP) | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = 0 })
|
||||
|
||||
/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
|
||||
|
||||
#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_JMP | BPF_OP(OP) | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = OFF, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Raw code statement block */
|
||||
|
||||
#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = CODE, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Program exit */
|
||||
|
||||
#define BPF_EXIT_INSN() \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_JMP | BPF_EXIT, \
|
||||
.dst_reg = 0, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
int bpf_parse_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops);
|
||||
int bpf_load_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops,
|
||||
void *nl);
|
||||
int bpf_parse_and_load_common(struct bpf_cfg_in *cfg,
|
||||
const struct bpf_cfg_ops *ops, void *nl);
|
||||
|
||||
const char *bpf_prog_to_default_section(enum bpf_prog_type type);
|
||||
|
||||
int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
|
||||
int bpf_trace_pipe(void);
|
||||
|
||||
void bpf_print_ops(struct rtattr *bpf_ops, __u16 len);
|
||||
|
||||
int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns,
|
||||
size_t size_insns, const char *license, __u32 ifindex,
|
||||
char *log, size_t size_log);
|
||||
int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns,
|
||||
size_t size_insns, const char *license, char *log,
|
||||
size_t size_log);
|
||||
|
||||
int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type);
|
||||
int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type);
|
||||
int bpf_program_attach(int prog_fd, int target_fd, enum bpf_attach_type type);
|
||||
|
||||
int bpf_dump_prog_info(FILE *f, uint32_t id);
|
||||
|
||||
#ifdef HAVE_ELF
|
||||
int bpf_send_map_fds(const char *path, const char *obj);
|
||||
int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
|
||||
unsigned int entries);
|
||||
#ifdef HAVE_LIBBPF
|
||||
int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg);
|
||||
int iproute2_bpf_fetch_ancillary(void);
|
||||
int iproute2_get_root_path(char *root_path, size_t len);
|
||||
bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname);
|
||||
bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap,
|
||||
struct bpf_elf_map *omap, char *omap_name);
|
||||
int iproute2_find_map_name_by_id(unsigned int map_id, char *name);
|
||||
int iproute2_load_libbpf(struct bpf_cfg_in *cfg);
|
||||
#endif /* HAVE_LIBBPF */
|
||||
#else
|
||||
static inline int bpf_send_map_fds(const char *path, const char *obj)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int bpf_recv_map_fds(const char *path, int *fds,
|
||||
struct bpf_map_aux *aux,
|
||||
unsigned int entries)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
#ifdef HAVE_LIBBPF
|
||||
static inline int iproute2_load_libbpf(struct bpf_cfg_in *cfg)
|
||||
{
|
||||
fprintf(stderr, "No ELF library support compiled in.\n");
|
||||
return -1;
|
||||
}
|
||||
#endif /* HAVE_LIBBPF */
|
||||
#endif /* HAVE_ELF */
|
||||
|
||||
const char *get_libbpf_version(void);
|
||||
|
||||
#endif /* __BPF_UTIL__ */
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue