Compare commits
4 Commits
main
...
flag-names
| Author | SHA1 | Date |
|---|---|---|
|
|
297d7588d7 | |
|
|
d09c20e5f8 | |
|
|
5d83550131 | |
|
|
12d2bc1d71 |
130
.clang-format
130
.clang-format
|
|
@ -1,130 +0,0 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
|
||||||
#
|
|
||||||
# clang-format configuration file. Intended for clang-format >= 4.
|
|
||||||
#
|
|
||||||
# For more information, see:
|
|
||||||
#
|
|
||||||
# Documentation/process/clang-format.rst
|
|
||||||
# https://clang.llvm.org/docs/ClangFormat.html
|
|
||||||
# https://clang.llvm.org/docs/ClangFormatStyleOptions.html
|
|
||||||
#
|
|
||||||
---
|
|
||||||
AccessModifierOffset: -4
|
|
||||||
AlignAfterOpenBracket: Align
|
|
||||||
AlignConsecutiveAssignments: false
|
|
||||||
AlignConsecutiveDeclarations: false
|
|
||||||
#AlignEscapedNewlines: Left # Unknown to clang-format-4.0
|
|
||||||
AlignOperands: true
|
|
||||||
AlignTrailingComments: false
|
|
||||||
AllowAllParametersOfDeclarationOnNextLine: false
|
|
||||||
AllowShortBlocksOnASingleLine: false
|
|
||||||
AllowShortCaseLabelsOnASingleLine: false
|
|
||||||
AllowShortFunctionsOnASingleLine: None
|
|
||||||
AllowShortIfStatementsOnASingleLine: false
|
|
||||||
AllowShortLoopsOnASingleLine: false
|
|
||||||
AlwaysBreakAfterDefinitionReturnType: None
|
|
||||||
AlwaysBreakAfterReturnType: None
|
|
||||||
AlwaysBreakBeforeMultilineStrings: false
|
|
||||||
AlwaysBreakTemplateDeclarations: false
|
|
||||||
BinPackArguments: true
|
|
||||||
BinPackParameters: true
|
|
||||||
BraceWrapping:
|
|
||||||
AfterClass: false
|
|
||||||
AfterControlStatement: false
|
|
||||||
AfterEnum: false
|
|
||||||
AfterFunction: true
|
|
||||||
AfterNamespace: true
|
|
||||||
AfterObjCDeclaration: false
|
|
||||||
AfterStruct: false
|
|
||||||
AfterUnion: false
|
|
||||||
#AfterExternBlock: false # Unknown to clang-format-5.0
|
|
||||||
BeforeCatch: false
|
|
||||||
BeforeElse: false
|
|
||||||
IndentBraces: false
|
|
||||||
#SplitEmptyFunction: true # Unknown to clang-format-4.0
|
|
||||||
#SplitEmptyRecord: true # Unknown to clang-format-4.0
|
|
||||||
#SplitEmptyNamespace: true # Unknown to clang-format-4.0
|
|
||||||
BreakBeforeBinaryOperators: None
|
|
||||||
BreakBeforeBraces: Custom
|
|
||||||
#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0
|
|
||||||
BreakBeforeTernaryOperators: false
|
|
||||||
BreakConstructorInitializersBeforeComma: false
|
|
||||||
#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
|
|
||||||
BreakAfterJavaFieldAnnotations: false
|
|
||||||
BreakStringLiterals: false
|
|
||||||
ColumnLimit: 80
|
|
||||||
CommentPragmas: '^ IWYU pragma:'
|
|
||||||
#CompactNamespaces: false # Unknown to clang-format-4.0
|
|
||||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
|
||||||
ConstructorInitializerIndentWidth: 8
|
|
||||||
ContinuationIndentWidth: 8
|
|
||||||
Cpp11BracedListStyle: false
|
|
||||||
DerivePointerAlignment: false
|
|
||||||
DisableFormat: false
|
|
||||||
ExperimentalAutoDetectBinPacking: false
|
|
||||||
#FixNamespaceComments: false # Unknown to clang-format-4.0
|
|
||||||
|
|
||||||
# Taken from:
|
|
||||||
# git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \
|
|
||||||
# | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$, - '\1'," \
|
|
||||||
# | sort | uniq
|
|
||||||
ForEachMacros:
|
|
||||||
- 'list_for_each_entry'
|
|
||||||
- 'list_for_each_entry_safe'
|
|
||||||
- 'mnl_attr_for_each_nested'
|
|
||||||
- 'hlist_for_each'
|
|
||||||
- 'hlist_for_each_safe'
|
|
||||||
- 'hlist_for_each_entry'
|
|
||||||
|
|
||||||
#IncludeBlocks: Preserve # Unknown to clang-format-5.0
|
|
||||||
IncludeCategories:
|
|
||||||
- Regex: '.*'
|
|
||||||
Priority: 1
|
|
||||||
IncludeIsMainRegex: '(Test)?$'
|
|
||||||
IndentCaseLabels: false
|
|
||||||
#IndentPPDirectives: None # Unknown to clang-format-5.0
|
|
||||||
IndentWidth: 8
|
|
||||||
IndentWrappedFunctionNames: false
|
|
||||||
JavaScriptQuotes: Leave
|
|
||||||
JavaScriptWrapImports: true
|
|
||||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
|
||||||
MacroBlockBegin: ''
|
|
||||||
MacroBlockEnd: ''
|
|
||||||
MaxEmptyLinesToKeep: 1
|
|
||||||
NamespaceIndentation: Inner
|
|
||||||
#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0
|
|
||||||
ObjCBlockIndentWidth: 8
|
|
||||||
ObjCSpaceAfterProperty: true
|
|
||||||
ObjCSpaceBeforeProtocolList: true
|
|
||||||
|
|
||||||
# Taken from git's rules
|
|
||||||
#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0
|
|
||||||
PenaltyBreakBeforeFirstCallParameter: 30
|
|
||||||
PenaltyBreakComment: 10
|
|
||||||
PenaltyBreakFirstLessLess: 0
|
|
||||||
PenaltyBreakString: 10
|
|
||||||
PenaltyExcessCharacter: 100
|
|
||||||
PenaltyReturnTypeOnItsOwnLine: 60
|
|
||||||
|
|
||||||
PointerAlignment: Right
|
|
||||||
ReflowComments: false
|
|
||||||
SortIncludes: false
|
|
||||||
#SortUsingDeclarations: false # Unknown to clang-format-4.0
|
|
||||||
SpaceAfterCStyleCast: false
|
|
||||||
SpaceAfterTemplateKeyword: true
|
|
||||||
SpaceBeforeAssignmentOperators: true
|
|
||||||
#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0
|
|
||||||
#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0
|
|
||||||
SpaceBeforeParens: ControlStatements
|
|
||||||
#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0
|
|
||||||
SpaceInEmptyParentheses: false
|
|
||||||
SpacesBeforeTrailingComments: 1
|
|
||||||
SpacesInAngles: false
|
|
||||||
SpacesInContainerLiterals: false
|
|
||||||
SpacesInCStyleCastParentheses: false
|
|
||||||
SpacesInParentheses: false
|
|
||||||
SpacesInSquareBrackets: false
|
|
||||||
Standard: Cpp03
|
|
||||||
TabWidth: 8
|
|
||||||
UseTab: Always
|
|
||||||
...
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
# locally generated
|
|
||||||
Config
|
|
||||||
static-syms.h
|
static-syms.h
|
||||||
config.*
|
config.*
|
||||||
|
Config
|
||||||
*.o
|
*.o
|
||||||
*.a
|
*.a
|
||||||
*.so
|
*.so
|
||||||
|
|
@ -39,3 +38,13 @@ testsuite/results
|
||||||
testsuite/iproute2/iproute2-this
|
testsuite/iproute2/iproute2-this
|
||||||
testsuite/tools/generate_nlmsg
|
testsuite/tools/generate_nlmsg
|
||||||
testsuite/tests/ip/link/dev_wo_vf_rate.nl
|
testsuite/tests/ip/link/dev_wo_vf_rate.nl
|
||||||
|
|
||||||
|
# doc files generated at runtime
|
||||||
|
doc/*.aux
|
||||||
|
doc/*.log
|
||||||
|
doc/*.toc
|
||||||
|
doc/*.ps
|
||||||
|
doc/*.dvi
|
||||||
|
doc/*.html
|
||||||
|
doc/*.pdf
|
||||||
|
doc/*.out
|
||||||
|
|
|
||||||
22
.mailmap
22
.mailmap
|
|
@ -1,22 +0,0 @@
|
||||||
#
|
|
||||||
# This list is used by git-shortlog to fix a few botched name translations
|
|
||||||
# in the git archive, either because the author's full name was messed up
|
|
||||||
# and/or not always written the same way, making contributions from the
|
|
||||||
# same person appearing not to be so or badly displayed.
|
|
||||||
#
|
|
||||||
# Format
|
|
||||||
# Full name <goodaddress> <badaddress>
|
|
||||||
Steve Wise <larrystevenwise@gmail.com> <swise@opengridcomputing.com>
|
|
||||||
Steve Wise <larrystevenwise@gmail.com> <swise@chelsio.com>
|
|
||||||
|
|
||||||
Stephen Hemminger <stephen@networkplumber.org> <sthemmin@microsoft.com>
|
|
||||||
Stephen Hemminger <stephen@networkplumber.org> <shemming@brocade.com>
|
|
||||||
Stephen Hemminger <stephen@networkplumber.org> <stephen.hemminger@vyatta.com>
|
|
||||||
Stephen Hemminger <stephen@networkplumber.org> <shemminger@vyatta.com>
|
|
||||||
Stephen Hemminger <stephen@networkplumber.org> <shemminger>
|
|
||||||
Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
|
|
||||||
Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
|
|
||||||
Stephen Hemminger <stephen@networkplumber.org> <osdl.org!shemminger>
|
|
||||||
Stephen Hemminger <stephen@networkplumber.org> <osdl.net!shemminger>
|
|
||||||
|
|
||||||
David Ahern <dsahern@gmail.com> <dsa@cumulusnetworks.com>
|
|
||||||
104
Makefile
104
Makefile
|
|
@ -1,13 +1,6 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
# Include "Config" if already generated
|
||||||
# Top level Makefile for iproute2
|
ifneq ($(wildcard Config),)
|
||||||
|
include Config
|
||||||
-include config.mk
|
|
||||||
|
|
||||||
ifeq ("$(origin V)", "command line")
|
|
||||||
VERBOSE = $(V)
|
|
||||||
endif
|
|
||||||
ifndef VERBOSE
|
|
||||||
VERBOSE = 0
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(VERBOSE),0)
|
ifeq ($(VERBOSE),0)
|
||||||
|
|
@ -15,10 +8,9 @@ MAKEFLAGS += --no-print-directory
|
||||||
endif
|
endif
|
||||||
|
|
||||||
PREFIX?=/usr
|
PREFIX?=/usr
|
||||||
|
LIBDIR?=$(PREFIX)/lib
|
||||||
SBINDIR?=/sbin
|
SBINDIR?=/sbin
|
||||||
CONFDIR?=/etc/iproute2
|
CONFDIR?=/etc/iproute2
|
||||||
NETNS_RUN_DIR?=/var/run/netns
|
|
||||||
NETNS_ETC_DIR?=/etc/netns
|
|
||||||
DATADIR?=$(PREFIX)/share
|
DATADIR?=$(PREFIX)/share
|
||||||
HDRDIR?=$(PREFIX)/include/iproute2
|
HDRDIR?=$(PREFIX)/include/iproute2
|
||||||
DOCDIR?=$(DATADIR)/doc/iproute2
|
DOCDIR?=$(DATADIR)/doc/iproute2
|
||||||
|
|
@ -37,101 +29,79 @@ ifneq ($(SHARED_LIBS),y)
|
||||||
DEFINES+= -DNO_SHARED_LIBS
|
DEFINES+= -DNO_SHARED_LIBS
|
||||||
endif
|
endif
|
||||||
|
|
||||||
DEFINES+=-DCONFDIR=\"$(CONFDIR)\" \
|
DEFINES+=-DCONFDIR=\"$(CONFDIR)\"
|
||||||
-DNETNS_RUN_DIR=\"$(NETNS_RUN_DIR)\" \
|
|
||||||
-DNETNS_ETC_DIR=\"$(NETNS_ETC_DIR)\"
|
|
||||||
|
|
||||||
#options for AX.25
|
#options for decnet
|
||||||
ADDLIB+=ax25_ntop.o
|
ADDLIB+=dnet_ntop.o dnet_pton.o
|
||||||
|
|
||||||
#options for AX.25
|
#options for ipx
|
||||||
ADDLIB+=rose_ntop.o
|
ADDLIB+=ipx_ntop.o ipx_pton.o
|
||||||
|
|
||||||
#options for mpls
|
#options for mpls
|
||||||
ADDLIB+=mpls_ntop.o mpls_pton.o
|
ADDLIB+=mpls_ntop.o mpls_pton.o
|
||||||
|
|
||||||
#options for NETROM
|
|
||||||
ADDLIB+=netrom_ntop.o
|
|
||||||
|
|
||||||
CC := gcc
|
CC := gcc
|
||||||
HOSTCC ?= $(CC)
|
HOSTCC ?= $(CC)
|
||||||
DEFINES += -D_GNU_SOURCE
|
DEFINES += -D_GNU_SOURCE
|
||||||
# Turn on transparent support for LFS
|
# Turn on transparent support for LFS
|
||||||
DEFINES += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
|
DEFINES += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
|
||||||
CCOPTS = -O2 -pipe
|
CCOPTS = -O2
|
||||||
WFLAGS := -Wall -Wstrict-prototypes -Wmissing-prototypes
|
WFLAGS := -Wall -Wstrict-prototypes -Wmissing-prototypes
|
||||||
WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2
|
WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2
|
||||||
|
|
||||||
CFLAGS := $(WFLAGS) $(CCOPTS) -I../include -I../include/uapi $(DEFINES) $(CFLAGS)
|
CFLAGS := $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) $(CFLAGS)
|
||||||
YACCFLAGS = -d -t -v
|
YACCFLAGS = -d -t -v
|
||||||
|
|
||||||
SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma dcb man vdpa
|
SUBDIRS=lib ip tc bridge misc netem genl tipc devlink man
|
||||||
|
|
||||||
LIBNETLINK=../lib/libutil.a ../lib/libnetlink.a
|
LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
|
||||||
LDLIBS += $(LIBNETLINK)
|
LDLIBS += $(LIBNETLINK)
|
||||||
|
|
||||||
all: config.mk
|
ifeq ($(HAVE_ELF),y)
|
||||||
|
CFLAGS += -DHAVE_ELF
|
||||||
|
LDLIBS += -lelf
|
||||||
|
endif
|
||||||
|
|
||||||
|
all: Config
|
||||||
@set -e; \
|
@set -e; \
|
||||||
for i in $(SUBDIRS); \
|
for i in $(SUBDIRS); \
|
||||||
do echo; echo $$i; $(MAKE) -C $$i; done
|
do echo; echo $$i; $(MAKE) $(MFLAGS) -C $$i; done
|
||||||
|
|
||||||
.PHONY: clean clobber distclean check cscope version
|
Config:
|
||||||
|
sh configure $(KERNEL_INCLUDE)
|
||||||
help:
|
|
||||||
@echo "Make Targets:"
|
|
||||||
@echo " all - build binaries"
|
|
||||||
@echo " clean - remove products of build"
|
|
||||||
@echo " distclean - remove configuration and build"
|
|
||||||
@echo " install - install binaries on local machine"
|
|
||||||
@echo " check - run tests"
|
|
||||||
@echo " cscope - build cscope database"
|
|
||||||
@echo " version - update version"
|
|
||||||
@echo ""
|
|
||||||
@echo "Make Arguments:"
|
|
||||||
@echo " V=[0|1] - set build verbosity level"
|
|
||||||
|
|
||||||
config.mk:
|
|
||||||
@if [ ! -f config.mk -o configure -nt config.mk ]; then \
|
|
||||||
sh configure $(KERNEL_INCLUDE); \
|
|
||||||
fi
|
|
||||||
|
|
||||||
install: all
|
install: all
|
||||||
install -m 0755 -d $(DESTDIR)$(SBINDIR)
|
install -m 0755 -d $(DESTDIR)$(SBINDIR)
|
||||||
install -m 0755 -d $(DESTDIR)$(CONFDIR)
|
install -m 0755 -d $(DESTDIR)$(CONFDIR)
|
||||||
install -m 0755 -d $(DESTDIR)$(ARPDDIR)
|
install -m 0755 -d $(DESTDIR)$(ARPDDIR)
|
||||||
install -m 0755 -d $(DESTDIR)$(HDRDIR)
|
install -m 0755 -d $(DESTDIR)$(HDRDIR)
|
||||||
@for i in $(SUBDIRS); do $(MAKE) -C $$i install; done
|
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples
|
||||||
|
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv
|
||||||
|
install -m 0644 README.iproute2+tc $(shell find examples -maxdepth 1 -type f) \
|
||||||
|
$(DESTDIR)$(DOCDIR)/examples
|
||||||
|
install -m 0644 $(shell find examples/diffserv -maxdepth 1 -type f) \
|
||||||
|
$(DESTDIR)$(DOCDIR)/examples/diffserv
|
||||||
|
@for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done
|
||||||
install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
|
install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
|
||||||
install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR)
|
install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR)
|
||||||
install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR)
|
install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR)
|
||||||
install -m 0644 bash-completion/devlink $(DESTDIR)$(BASH_COMPDIR)
|
|
||||||
install -m 0644 include/bpf_elf.h $(DESTDIR)$(HDRDIR)
|
install -m 0644 include/bpf_elf.h $(DESTDIR)$(HDRDIR)
|
||||||
|
|
||||||
version:
|
snapshot:
|
||||||
echo "static const char version[] = \""`git describe --tags --long`"\";" \
|
echo "static const char SNAPSHOT[] = \""`date +%y%m%d`"\";" \
|
||||||
> include/version.h
|
> include/SNAPSHOT.h
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@for i in $(SUBDIRS) testsuite; \
|
@for i in $(SUBDIRS) doc; \
|
||||||
do $(MAKE) -C $$i clean; done
|
do $(MAKE) $(MFLAGS) -C $$i clean; done
|
||||||
|
|
||||||
clobber:
|
clobber:
|
||||||
touch config.mk
|
touch Config
|
||||||
$(MAKE) clean
|
$(MAKE) $(MFLAGS) clean
|
||||||
rm -f config.mk cscope.*
|
rm -f Config cscope.*
|
||||||
|
|
||||||
distclean: clobber
|
distclean: clobber
|
||||||
|
|
||||||
check: all
|
|
||||||
$(MAKE) -C testsuite
|
|
||||||
$(MAKE) -C testsuite alltests
|
|
||||||
@if command -v man >/dev/null 2>&1; then \
|
|
||||||
echo "Checking manpages for syntax errors..."; \
|
|
||||||
$(MAKE) -C man check; \
|
|
||||||
else \
|
|
||||||
echo "man not installed, skipping checks for syntax errors."; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
cscope:
|
cscope:
|
||||||
cscope -b -q -R -Iinclude -sip -slib -smisc -snetem -stc
|
cscope -b -q -R -Iinclude -sip -slib -smisc -snetem -stc
|
||||||
|
|
||||||
|
|
|
||||||
33
README
33
README
|
|
@ -1,39 +1,40 @@
|
||||||
This is a set of utilities for Linux networking.
|
This is a set of utilities for Linux networking.
|
||||||
|
|
||||||
Information:
|
Information:
|
||||||
https://wiki.linuxfoundation.org/networking/iproute2
|
http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2
|
||||||
|
|
||||||
Download:
|
Download:
|
||||||
http://www.kernel.org/pub/linux/utils/net/iproute2/
|
http://www.kernel.org/pub/linux/utils/net/iproute2/
|
||||||
|
|
||||||
Stable version repository:
|
Repository:
|
||||||
git://git.kernel.org/pub/scm/network/iproute2/iproute2.git
|
git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git
|
||||||
|
|
||||||
Development repository:
|
|
||||||
git://git.kernel.org/pub/scm/network/iproute2/iproute2-next.git
|
|
||||||
|
|
||||||
How to compile this.
|
How to compile this.
|
||||||
--------------------
|
--------------------
|
||||||
1. libdbm
|
1. libdbm
|
||||||
|
|
||||||
arpd needs to have the berkeleydb development libraries. For Debian
|
arpd needs to have the db4 development libraries. For Debian
|
||||||
users this is the package with a name like libdbX.X-dev.
|
users this is the package with a name like libdb4.x-dev.
|
||||||
DBM_INCLUDE points to the directory with db_185.h which
|
DBM_INCLUDE points to the directory with db_185.h which
|
||||||
is the include file used by arpd to get to the old format Berkeley
|
is the include file used by arpd to get to the old format Berkeley
|
||||||
database routines. Often this is in the db-devel package.
|
database routines. Often this is in the db-devel package.
|
||||||
|
|
||||||
2. make
|
2. make
|
||||||
|
|
||||||
The makefile will automatically build a config.mk file which
|
The makefile will automatically build a Config file which
|
||||||
contains definitions of libraries that may or may not be available
|
contains whether or not ATM is available, etc.
|
||||||
on the system such as: ATM, ELF, MNL, and SELINUX.
|
|
||||||
|
|
||||||
3. include/uapi
|
3. To make documentation, cd to doc/ directory , then
|
||||||
|
look at start of Makefile and set correct values for
|
||||||
|
PAGESIZE=a4 , ie: a4 , letter ... (string)
|
||||||
|
PAGESPERPAGE=2 , ie: 1 , 2 ... (numeric)
|
||||||
|
and make there. It assumes, that latex, dvips and psnup
|
||||||
|
are in your path.
|
||||||
|
|
||||||
This package includes matching sanitized kernel headers because
|
4. This package includes matching sanitized kernel headers because
|
||||||
the build environment may not have up to date versions. See Makefile
|
the build environment may not have up to date versions. See Makefile
|
||||||
if you have special requirements and need to point at different
|
if you have special requirements and need to point at different
|
||||||
kernel include files.
|
kernel include files.
|
||||||
|
|
||||||
Stephen Hemminger
|
Stephen Hemminger
|
||||||
stephen@networkplumber.org
|
stephen@networkplumber.org
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
|
||||||
|
Here are a few quick points about DECnet support...
|
||||||
|
|
||||||
|
o iproute2 is the tool of choice for configuring the DECnet support for
|
||||||
|
Linux. For many features, it is the only tool which can be used to
|
||||||
|
configure them.
|
||||||
|
|
||||||
|
o No name resolution is available as yet, all addresses must be
|
||||||
|
entered numerically.
|
||||||
|
|
||||||
|
o Remember to set the hardware address of the interface using:
|
||||||
|
|
||||||
|
ip link set ethX address xx:xx:xx:xx:xx:xx
|
||||||
|
(where xx:xx:xx:xx:xx:xx is the MAC address for your DECnet node
|
||||||
|
address)
|
||||||
|
|
||||||
|
if your Ethernet card won't listen to more than one unicast
|
||||||
|
mac address at once. If the Linux DECnet stack doesn't talk to
|
||||||
|
any other DECnet nodes, then check this with tcpdump and if its
|
||||||
|
a problem, change the mac address (but do this _before_ starting
|
||||||
|
any other network protocol on the interface)
|
||||||
|
|
||||||
|
o Whilst you can use ip addr add to add more than one DECnet address to an
|
||||||
|
interface, don't expect addresses which are not the same as the
|
||||||
|
kernels node address to work properly with 2.4 kernels. This should
|
||||||
|
be fine with 2.6 kernels as the routing code has been extensively
|
||||||
|
modified and improved.
|
||||||
|
|
||||||
|
o The DECnet support is currently self contained. It does not depend on
|
||||||
|
the libdnet library.
|
||||||
|
|
||||||
|
Steve Whitehouse <steve@chygwyn.com>
|
||||||
|
|
||||||
17
README.devel
17
README.devel
|
|
@ -4,15 +4,12 @@ development. Most new features require a kernel and a utility component.
|
||||||
Please submit both to the Linux networking mailing list
|
Please submit both to the Linux networking mailing list
|
||||||
<netdev@vger.kernel.org>
|
<netdev@vger.kernel.org>
|
||||||
|
|
||||||
The current source for the stable version is in the git repository:
|
The current source is in the git repository:
|
||||||
git://git.kernel.org/pub/scm/network/iproute2/iproute2.git
|
git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git
|
||||||
|
|
||||||
The development git repository is available at the following address:
|
The master branch contains the source corresponding to the current
|
||||||
git://git.kernel.org/pub/scm/network/iproute2/iproute2-next.git
|
code in the mainline Linux kernel (ie follows Linus). The net-next
|
||||||
|
branch is a temporary branch that tracks the code intended for the
|
||||||
|
next release; it corresponds with networking development branch in
|
||||||
|
the kernel.
|
||||||
|
|
||||||
The stable repository contains the source corresponding to the
|
|
||||||
current code in the Linux networking tree (net), which in turn is
|
|
||||||
aligned on the mainline Linux kernel (ie follows Linus).
|
|
||||||
The iproute2-next repository tracks the code intended for the next
|
|
||||||
release; it corresponds with networking development tree (net-next)
|
|
||||||
in the kernel.
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,95 @@
|
||||||
|
I. About the distribution tables
|
||||||
|
|
||||||
|
The table used for "synthesizing" the distribution is essentially a scaled,
|
||||||
|
translated, inverse to the cumulative distribution function.
|
||||||
|
|
||||||
|
Here's how to think about it: Let F() be the cumulative distribution
|
||||||
|
function for a probability distribution X. We'll assume we've scaled
|
||||||
|
things so that X has mean 0 and standard deviation 1, though that's not
|
||||||
|
so important here. Then:
|
||||||
|
|
||||||
|
F(x) = P(X <= x) = \int_{-inf}^x f
|
||||||
|
|
||||||
|
where f is the probability density function.
|
||||||
|
|
||||||
|
F is monotonically increasing, so has an inverse function G, with range
|
||||||
|
0 to 1. Here, G(t) = the x such that P(X <= x) = t. (In general, G may
|
||||||
|
have singularities if X has point masses, i.e., points x such that
|
||||||
|
P(X = x) > 0.)
|
||||||
|
|
||||||
|
Now we create a tabular representation of G as follows: Choose some table
|
||||||
|
size N, and for the ith entry, put in G(i/N). Let's call this table T.
|
||||||
|
|
||||||
|
The claim now is, I can create a (discrete) random variable Y whose
|
||||||
|
distribution has the same approximate "shape" as X, simply by letting
|
||||||
|
Y = T(U), where U is a discrete uniform random variable with range 1 to N.
|
||||||
|
To see this, it's enough to show that Y's cumulative distribution function,
|
||||||
|
(let's call it H), is a discrete approximation to F. But
|
||||||
|
|
||||||
|
H(x) = P(Y <= x)
|
||||||
|
= (# of entries in T <= x) / N -- as Y chosen uniformly from T
|
||||||
|
= i/N, where i is the largest integer such that G(i/N) <= x
|
||||||
|
= i/N, where i is the largest integer such that i/N <= F(x)
|
||||||
|
-- since G and F are inverse functions (and F is
|
||||||
|
increasing)
|
||||||
|
= floor(N*F(x))/N
|
||||||
|
|
||||||
|
as desired.
|
||||||
|
|
||||||
|
II. How to create distribution tables (in theory)
|
||||||
|
|
||||||
|
How can we create this table in practice? In some cases, F may have a
|
||||||
|
simple expression which allows evaluating its inverse directly. The
|
||||||
|
Pareto distribution is one example of this. In other cases, and
|
||||||
|
especially for matching an experimentally observed distribution, it's
|
||||||
|
easiest simply to create a table for F and "invert" it. Here, we give
|
||||||
|
a concrete example, namely how the new "experimental" distribution was
|
||||||
|
created.
|
||||||
|
|
||||||
|
1. Collect enough data points to characterize the distribution. Here, I
|
||||||
|
collected 25,000 "ping" roundtrip times to a "distant" point (time.nist.gov).
|
||||||
|
That's far more data than is really necessary, but it was fairly painless to
|
||||||
|
collect it, so...
|
||||||
|
|
||||||
|
2. Normalize the data so that it has mean 0 and standard deviation 1.
|
||||||
|
|
||||||
|
3. Determine the cumulative distribution. The code I wrote creates a table
|
||||||
|
covering the range -10 to +10, with granularity .00005. Obviously, this
|
||||||
|
is absurdly over-precise, but since it's a one-time only computation, I
|
||||||
|
figured it hardly mattered.
|
||||||
|
|
||||||
|
4. Invert the table: for each table entry F(x) = y, make the y*TABLESIZE
|
||||||
|
(here, 4096) entry be x*TABLEFACTOR (here, 8192). This creates a table
|
||||||
|
for the ("normalized") inverse of size TABLESIZE, covering its domain 0
|
||||||
|
to 1 with granularity 1/TABLESIZE. Note that even with the granularity
|
||||||
|
used in creating the table for F, it's possible not all the entries in
|
||||||
|
the table for G will be filled in. So, make a pass through the
|
||||||
|
inverse's table, filling in any missing entries by linear interpolation.
|
||||||
|
|
||||||
|
III. How to create distribution tables (in practice)
|
||||||
|
|
||||||
|
If you want to do all this yourself, I've provided several tools to help:
|
||||||
|
|
||||||
|
1. maketable does the steps 2-4 above, and then generates the appropriate
|
||||||
|
header file. So if you have your own time distribution, you can generate
|
||||||
|
the header simply by:
|
||||||
|
|
||||||
|
maketable < time.values > header.h
|
||||||
|
|
||||||
|
2. As explained in the other README file, the somewhat sleazy way I have
|
||||||
|
of generating correlated values needs correction. You can generate your
|
||||||
|
own correction tables by compiling makesigtable and makemutable with
|
||||||
|
your header file. Check the Makefile to see how this is done.
|
||||||
|
|
||||||
|
3. Warning: maketable, makesigtable and especially makemutable do
|
||||||
|
enormous amounts of floating point arithmetic. Don't try running
|
||||||
|
these on an old 486. (NIST Net itself will run fine on such a
|
||||||
|
system, since in operation, it just needs to do a few simple integral
|
||||||
|
calculations. But getting there takes some work.)
|
||||||
|
|
||||||
|
4. The tables produced are all normalized for mean 0 and standard
|
||||||
|
deviation 1. How do you know what values to use for real? Here, I've
|
||||||
|
provided a simple "stats" utility. Give it a series of floating point
|
||||||
|
values, and it will return their mean (mu), standard deviation (sigma),
|
||||||
|
and correlation coefficient (rho). You can then plug these values
|
||||||
|
directly into NIST Net.
|
||||||
|
|
@ -0,0 +1,123 @@
|
||||||
|
iproute2+tc*
|
||||||
|
|
||||||
|
It's the first release of Linux traffic control engine.
|
||||||
|
|
||||||
|
|
||||||
|
NOTES.
|
||||||
|
* csz scheduler is inoperational at the moment, and probably
|
||||||
|
never will be repaired but replaced with h-pfq scheduler.
|
||||||
|
* To use "fw" classifier you will need ipfwchains patch.
|
||||||
|
* No manual available. Ask me, if you have problems (only try to guess
|
||||||
|
answer yourself at first 8)).
|
||||||
|
|
||||||
|
|
||||||
|
Micro-manual how to start it the first time
|
||||||
|
-------------------------------------------
|
||||||
|
|
||||||
|
A. Attach CBQ to eth1:
|
||||||
|
|
||||||
|
tc qdisc add dev eth1 root handle 1: cbq bandwidth 10Mbit allot 1514 cell 8 \
|
||||||
|
avpkt 1000 mpu 64
|
||||||
|
|
||||||
|
B. Add root class:
|
||||||
|
|
||||||
|
tc class add dev eth1 parent 1:0 classid 1:1 cbq bandwidth 10Mbit rate 10Mbit \
|
||||||
|
allot 1514 cell 8 weight 1Mbit prio 8 maxburst 20 avpkt 1000
|
||||||
|
|
||||||
|
C. Add default interactive class:
|
||||||
|
|
||||||
|
tc class add dev eth1 parent 1:1 classid 1:2 cbq bandwidth 10Mbit rate 1Mbit \
|
||||||
|
allot 1514 cell 8 weight 100Kbit prio 3 maxburst 20 avpkt 1000 split 1:0 \
|
||||||
|
defmap c0
|
||||||
|
|
||||||
|
D. Add default class:
|
||||||
|
|
||||||
|
tc class add dev eth1 parent 1:1 classid 1:3 cbq bandwidth 10Mbit rate 8Mbit \
|
||||||
|
allot 1514 cell 8 weight 800Kbit prio 7 maxburst 20 avpkt 1000 split 1:0 \
|
||||||
|
defmap 3f
|
||||||
|
|
||||||
|
etc. etc. etc. Well, it is enough to start 8) The rest can be guessed 8)
|
||||||
|
Look also at more elaborated example, ready to start rsvpd,
|
||||||
|
in rsvp/cbqinit.eth1.
|
||||||
|
|
||||||
|
|
||||||
|
Terminology and advices about setting CBQ parameters may be found in Sally Floyd
|
||||||
|
papers.
|
||||||
|
|
||||||
|
|
||||||
|
Pairs X:Y are class handles, X:0 are qdisc handles.
|
||||||
|
weight should be proportional to rate for leaf classes
|
||||||
|
(I choosed it ten times less, but it is not necessary)
|
||||||
|
|
||||||
|
defmap is bitmap of logical priorities served by this class.
|
||||||
|
|
||||||
|
E. Another qdiscs are simpler. F.e. let's join TBF on class 1:2
|
||||||
|
|
||||||
|
tc qdisc add dev eth1 parent 1:2 tbf rate 64Kbit buffer 5Kb/8 limit 10Kb
|
||||||
|
|
||||||
|
F. Look at all that we created:
|
||||||
|
|
||||||
|
tc qdisc ls dev eth1
|
||||||
|
tc class ls dev eth1
|
||||||
|
|
||||||
|
G. Install "route" classifier on root of cbq and map destination from realm
|
||||||
|
1 to class 1:2
|
||||||
|
|
||||||
|
tc filter add dev eth1 parent 1:0 protocol ip prio 100 route to 1 classid 1:2
|
||||||
|
|
||||||
|
H. Assign routes to 10.11.12.0/24 to realm 1
|
||||||
|
|
||||||
|
ip route add 10.11.12.0/24 dev eth1 via whatever realm 1
|
||||||
|
|
||||||
|
etc. The same thing can be made with rules.
|
||||||
|
I still did not test ipchains, but they should work too.
|
||||||
|
|
||||||
|
|
||||||
|
Setup and code example of BPF classifier and action can be found under
|
||||||
|
examples/bpf/, which should explain everything for getting started.
|
||||||
|
|
||||||
|
|
||||||
|
Setup of rsvp and u32 classifiers is more hairy.
|
||||||
|
If you read RSVP specs, you will understand how rsvp classifier
|
||||||
|
works easily. What's about u32... That's example:
|
||||||
|
|
||||||
|
|
||||||
|
#! /bin/sh
|
||||||
|
|
||||||
|
TC=/home/root/tc
|
||||||
|
|
||||||
|
# Setup classifier root on eth1 root (it is cbq)
|
||||||
|
$TC filter add dev eth1 parent 1:0 prio 5 protocol ip u32
|
||||||
|
|
||||||
|
# Create hash table of 256 slots with ID 1:
|
||||||
|
$TC filter add dev eth1 parent 1:0 prio 5 handle 1: u32 divisor 256
|
||||||
|
|
||||||
|
# Add to 6th slot of hash table rule to select tcp/telnet to 193.233.7.75
|
||||||
|
# direct it to class 1:4 and prescribe to fall to best effort,
|
||||||
|
# if traffic violate TBF (32kbit,5K)
|
||||||
|
$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:6: \
|
||||||
|
match ip dst 193.233.7.75 \
|
||||||
|
match tcp dst 0x17 0xffff \
|
||||||
|
flowid 1:4 \
|
||||||
|
police rate 32kbit buffer 5kb/8 mpu 64 mtu 1514 index 1
|
||||||
|
|
||||||
|
# Add to 1th slot of hash table rule to select icmp to 193.233.7.75
|
||||||
|
# direct it to class 1:4 and prescribe to fall to best effort,
|
||||||
|
# if traffic violate TBF (10kbit,5K)
|
||||||
|
$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:: \
|
||||||
|
sample ip protocol 1 0xff \
|
||||||
|
match ip dst 193.233.7.75 \
|
||||||
|
flowid 1:4 \
|
||||||
|
police rate 10kbit buffer 5kb/8 mpu 64 mtu 1514 index 2
|
||||||
|
|
||||||
|
# Lookup hash table, if it is not fragmented frame
|
||||||
|
# Use protocol as hash key
|
||||||
|
$TC filter add dev eth1 parent 1:0 prio 5 handle ::1 u32 ht 800:: \
|
||||||
|
match ip nofrag \
|
||||||
|
offset mask 0x0F00 shift 6 \
|
||||||
|
hashkey mask 0x00ff0000 at 8 \
|
||||||
|
link 1:
|
||||||
|
|
||||||
|
|
||||||
|
Alexey Kuznetsov
|
||||||
|
kuznet@ms2.inr.ac.ru
|
||||||
|
|
@ -0,0 +1,81 @@
|
||||||
|
lnstat - linux networking statistics
|
||||||
|
(C) 2004 Harald Welte <laforge@gnumonks.org
|
||||||
|
======================================================================
|
||||||
|
|
||||||
|
This tool is a generalized and more feature-complete replacement for the old
|
||||||
|
'rtstat' program.
|
||||||
|
|
||||||
|
In addition to routing cache statistics, it supports any kind of statistics
|
||||||
|
the linux kernel exports via a file in /proc/net/stat. In a stock 2.6.9
|
||||||
|
kernel, this is
|
||||||
|
per-protocol neighbour cache statistics
|
||||||
|
(ipv4, ipv6, atm, decnet)
|
||||||
|
routing cache statistics
|
||||||
|
(ipv4)
|
||||||
|
connection tracking statistics
|
||||||
|
(ipv4)
|
||||||
|
|
||||||
|
Please note that lnstat will adopt to any additional statistics that might be
|
||||||
|
added to the kernel at some later point
|
||||||
|
|
||||||
|
I personally always like examples more than any reference documentation, so I
|
||||||
|
list the following examples. If somebody wants to do a manpage, feel free
|
||||||
|
to send me a patch :)
|
||||||
|
|
||||||
|
EXAMPLES:
|
||||||
|
|
||||||
|
In order to get a list of supported statistics files, you can run
|
||||||
|
|
||||||
|
lnstat -d
|
||||||
|
|
||||||
|
It will display something like
|
||||||
|
|
||||||
|
/proc/net/stat/arp_cache:
|
||||||
|
1: entries
|
||||||
|
2: allocs
|
||||||
|
3: destroys
|
||||||
|
[...]
|
||||||
|
/proc/net/stat/rt_cache:
|
||||||
|
1: entries
|
||||||
|
2: in_hit
|
||||||
|
3: in_slow_tot
|
||||||
|
|
||||||
|
You can now select the files/keys you are interested by something like
|
||||||
|
|
||||||
|
lnstat -k arp_cache:entries,rt_cache:in_hit,arp_cache:destroys
|
||||||
|
|
||||||
|
arp_cach|rt_cache|arp_cach|
|
||||||
|
entries| in_hit|destroys|
|
||||||
|
6| 6| 0|
|
||||||
|
6| 0| 0|
|
||||||
|
6| 2| 0|
|
||||||
|
|
||||||
|
|
||||||
|
You can specify the interval (e.g. 10 seconds) by:
|
||||||
|
|
||||||
|
lnstat -i 10
|
||||||
|
|
||||||
|
You can specify to only use one particular statistics file:
|
||||||
|
|
||||||
|
lnstat -f ip_conntrack
|
||||||
|
|
||||||
|
You can specify individual field widths
|
||||||
|
|
||||||
|
lnstat -k arp_cache:entries,rt_cache:entries -w 20,8
|
||||||
|
|
||||||
|
You can specify not to print a header at all
|
||||||
|
|
||||||
|
lnstat -s 0
|
||||||
|
|
||||||
|
You can specify to print a header only at start of the program
|
||||||
|
|
||||||
|
lnstat -s 1
|
||||||
|
|
||||||
|
You can specify to print a header at start and every 20 lines:
|
||||||
|
|
||||||
|
lnstat -s 20
|
||||||
|
|
||||||
|
You can specify the number of samples you want to take (e.g. 5):
|
||||||
|
|
||||||
|
lnstat -c 5
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -3,8 +3,8 @@
|
||||||
# Copyright 2016 Quentin Monnet <quentin.monnet@6wind.com>
|
# Copyright 2016 Quentin Monnet <quentin.monnet@6wind.com>
|
||||||
|
|
||||||
QDISC_KIND=' choke codel bfifo pfifo pfifo_head_drop fq fq_codel gred hhf \
|
QDISC_KIND=' choke codel bfifo pfifo pfifo_head_drop fq fq_codel gred hhf \
|
||||||
mqprio multiq netem pfifo_fast pie fq_pie red rr sfb sfq tbf atm \
|
mqprio multiq netem pfifo_fast pie red rr sfb sfq tbf atm cbq drr \
|
||||||
cbq drr dsmark hfsc htb prio qfq '
|
dsmark hfsc htb prio qfq '
|
||||||
FILTER_KIND=' basic bpf cgroup flow flower fw route rsvp tcindex u32 matchall '
|
FILTER_KIND=' basic bpf cgroup flow flower fw route rsvp tcindex u32 matchall '
|
||||||
ACTION_KIND=' gact mirred bpf sample '
|
ACTION_KIND=' gact mirred bpf sample '
|
||||||
|
|
||||||
|
|
@ -302,7 +302,7 @@ _tc_qdisc_options()
|
||||||
;;
|
;;
|
||||||
gred)
|
gred)
|
||||||
_tc_once_attr 'setup vqs default grio vq prio limit min max avpkt \
|
_tc_once_attr 'setup vqs default grio vq prio limit min max avpkt \
|
||||||
burst probability bandwidth ecn harddrop'
|
burst probability bandwidth'
|
||||||
return 0
|
return 0
|
||||||
;;
|
;;
|
||||||
hhf)
|
hhf)
|
||||||
|
|
@ -323,15 +323,6 @@ _tc_qdisc_options()
|
||||||
_tc_once_attr 'limit target tupdate alpha beta'
|
_tc_once_attr 'limit target tupdate alpha beta'
|
||||||
_tc_one_of_list 'bytemode nobytemode'
|
_tc_one_of_list 'bytemode nobytemode'
|
||||||
_tc_one_of_list 'ecn noecn'
|
_tc_one_of_list 'ecn noecn'
|
||||||
_tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
|
|
||||||
return 0
|
|
||||||
;;
|
|
||||||
fq_pie)
|
|
||||||
_tc_once_attr 'limit flows target tupdate \
|
|
||||||
alpha beta quantum memory_limit ecn_prob'
|
|
||||||
_tc_one_of_list 'ecn noecn'
|
|
||||||
_tc_one_of_list 'bytemode nobytemode'
|
|
||||||
_tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
|
|
||||||
return 0
|
return 0
|
||||||
;;
|
;;
|
||||||
red)
|
red)
|
||||||
|
|
@ -459,7 +450,7 @@ _tc_filter_options()
|
||||||
return 0
|
return 0
|
||||||
;;
|
;;
|
||||||
matchall)
|
matchall)
|
||||||
_tc_once_attr 'action classid skip_sw skip_hw'
|
_tc_once_attr 'action skip_sw skip_hw'
|
||||||
return 0
|
return 0
|
||||||
;;
|
;;
|
||||||
flower)
|
flower)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
|
||||||
BROBJ = bridge.o fdb.o monitor.o link.o mdb.o vlan.o
|
BROBJ = bridge.o fdb.o monitor.o link.o mdb.o vlan.o
|
||||||
|
|
||||||
include ../config.mk
|
include ../Config
|
||||||
|
|
||||||
|
ifeq ($(IP_CONFIG_SETNS),y)
|
||||||
|
CFLAGS += -DHAVE_SETNS
|
||||||
|
endif
|
||||||
|
|
||||||
all: bridge
|
all: bridge
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,31 +1,27 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
|
|
||||||
#define MDB_RTA(r) \
|
#define MDB_RTA(r) \
|
||||||
((struct rtattr *)(((char *)(r)) + RTA_ALIGN(sizeof(struct br_mdb_entry))))
|
((struct rtattr *)(((char *)(r)) + RTA_ALIGN(sizeof(struct br_mdb_entry))))
|
||||||
|
|
||||||
#define MDB_RTR_RTA(r) \
|
#define MDB_RTR_RTA(r) \
|
||||||
((struct rtattr *)(((char *)(r)) + RTA_ALIGN(sizeof(__u32))))
|
((struct rtattr *)(((char *)(r)) + RTA_ALIGN(sizeof(__u32))))
|
||||||
|
|
||||||
void print_vlan_info(struct rtattr *tb, int ifindex);
|
extern int print_linkinfo(const struct sockaddr_nl *who,
|
||||||
int print_linkinfo(struct nlmsghdr *n, void *arg);
|
struct nlmsghdr *n,
|
||||||
int print_mdb_mon(struct nlmsghdr *n, void *arg);
|
void *arg);
|
||||||
int print_fdb(struct nlmsghdr *n, void *arg);
|
extern int print_fdb(const struct sockaddr_nl *who,
|
||||||
void print_stp_state(__u8 state);
|
struct nlmsghdr *n, void *arg);
|
||||||
int parse_stp_state(const char *arg);
|
extern int print_mdb(const struct sockaddr_nl *who,
|
||||||
int print_vlan_rtm(struct nlmsghdr *n, void *arg, bool monitor,
|
struct nlmsghdr *n, void *arg);
|
||||||
bool global_only);
|
|
||||||
void br_print_router_port_stats(struct rtattr *pattr);
|
|
||||||
|
|
||||||
int do_fdb(int argc, char **argv);
|
extern int do_fdb(int argc, char **argv);
|
||||||
int do_mdb(int argc, char **argv);
|
extern int do_mdb(int argc, char **argv);
|
||||||
int do_monitor(int argc, char **argv);
|
extern int do_monitor(int argc, char **argv);
|
||||||
int do_vlan(int argc, char **argv);
|
extern int do_vlan(int argc, char **argv);
|
||||||
int do_link(int argc, char **argv);
|
extern int do_link(int argc, char **argv);
|
||||||
|
|
||||||
extern int preferred_family;
|
extern int preferred_family;
|
||||||
extern int show_stats;
|
extern int show_stats;
|
||||||
extern int show_details;
|
extern int show_details;
|
||||||
extern int timestamp;
|
extern int timestamp;
|
||||||
extern int compress_vlans;
|
extern int compress_vlans;
|
||||||
extern int json;
|
extern int json_output;
|
||||||
extern struct rtnl_handle rth;
|
extern struct rtnl_handle rth;
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
/*
|
/*
|
||||||
* Get/set/delete bridge with netlink
|
* Get/set/delete bridge with netlink
|
||||||
*
|
*
|
||||||
|
|
@ -12,23 +11,23 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include "version.h"
|
#include "SNAPSHOT.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "br_common.h"
|
#include "br_common.h"
|
||||||
#include "namespace.h"
|
#include "namespace.h"
|
||||||
#include "color.h"
|
|
||||||
|
|
||||||
struct rtnl_handle rth = { .fd = -1 };
|
struct rtnl_handle rth = { .fd = -1 };
|
||||||
int preferred_family = AF_UNSPEC;
|
int preferred_family = AF_UNSPEC;
|
||||||
|
int resolve_hosts;
|
||||||
int oneline;
|
int oneline;
|
||||||
int show_stats;
|
int show_stats;
|
||||||
int show_details;
|
int show_details;
|
||||||
static int color;
|
|
||||||
int compress_vlans;
|
int compress_vlans;
|
||||||
int json;
|
int json_output;
|
||||||
int timestamp;
|
int timestamp;
|
||||||
static const char *batch_file;
|
char *batch_file;
|
||||||
int force;
|
int force;
|
||||||
|
const char *_SL_;
|
||||||
|
|
||||||
static void usage(void) __attribute__((noreturn));
|
static void usage(void) __attribute__((noreturn));
|
||||||
|
|
||||||
|
|
@ -37,10 +36,10 @@ static void usage(void)
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n"
|
"Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n"
|
||||||
" bridge [ -force ] -batch filename\n"
|
" bridge [ -force ] -batch filename\n"
|
||||||
"where OBJECT := { link | fdb | mdb | vlan | monitor }\n"
|
"where OBJECT := { link | fdb | mdb | vlan | monitor }\n"
|
||||||
" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n"
|
" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n"
|
||||||
" -o[neline] | -t[imestamp] | -n[etns] name |\n"
|
" -o[neline] | -t[imestamp] | -n[etns] name |\n"
|
||||||
" -c[ompressvlans] -color -p[retty] -j[son] }\n");
|
" -c[ompressvlans] -j{son} }\n");
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -77,23 +76,45 @@ static int do_cmd(const char *argv0, int argc, char **argv)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int br_batch_cmd(int argc, char *argv[], void *data)
|
|
||||||
{
|
|
||||||
return do_cmd(argv[0], argc, argv);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int batch(const char *name)
|
static int batch(const char *name)
|
||||||
{
|
{
|
||||||
int ret;
|
char *line = NULL;
|
||||||
|
size_t len = 0;
|
||||||
|
int ret = EXIT_SUCCESS;
|
||||||
|
|
||||||
|
if (name && strcmp(name, "-") != 0) {
|
||||||
|
if (freopen(name, "r", stdin) == NULL) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Cannot open file \"%s\" for reading: %s\n",
|
||||||
|
name, strerror(errno));
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (rtnl_open(&rth, 0) < 0) {
|
if (rtnl_open(&rth, 0) < 0) {
|
||||||
fprintf(stderr, "Cannot open rtnetlink\n");
|
fprintf(stderr, "Cannot open rtnetlink\n");
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
rtnl_set_strict_dump(&rth);
|
cmdlineno = 0;
|
||||||
|
while (getcmdline(&line, &len, stdin) != -1) {
|
||||||
|
char *largv[100];
|
||||||
|
int largc;
|
||||||
|
|
||||||
ret = do_batch(name, force, br_batch_cmd, NULL);
|
largc = makeargs(line, largv, 100);
|
||||||
|
if (largc == 0)
|
||||||
|
continue; /* blank line */
|
||||||
|
|
||||||
|
if (do_cmd(largv[0], largc, largv)) {
|
||||||
|
fprintf(stderr, "Command failed %s:%d\n",
|
||||||
|
name, cmdlineno);
|
||||||
|
ret = EXIT_FAILURE;
|
||||||
|
if (!force)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (line)
|
||||||
|
free(line);
|
||||||
|
|
||||||
rtnl_close(&rth);
|
rtnl_close(&rth);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -117,7 +138,7 @@ main(int argc, char **argv)
|
||||||
if (matches(opt, "-help") == 0) {
|
if (matches(opt, "-help") == 0) {
|
||||||
usage();
|
usage();
|
||||||
} else if (matches(opt, "-Version") == 0) {
|
} else if (matches(opt, "-Version") == 0) {
|
||||||
printf("bridge utility, %s\n", version);
|
printf("bridge utility, 0.0\n");
|
||||||
exit(0);
|
exit(0);
|
||||||
} else if (matches(opt, "-stats") == 0 ||
|
} else if (matches(opt, "-stats") == 0 ||
|
||||||
matches(opt, "-statistics") == 0) {
|
matches(opt, "-statistics") == 0) {
|
||||||
|
|
@ -149,15 +170,12 @@ main(int argc, char **argv)
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
if (netns_switch(argv[1]))
|
if (netns_switch(argv[1]))
|
||||||
exit(-1);
|
exit(-1);
|
||||||
} else if (matches_color(opt, &color)) {
|
|
||||||
} else if (matches(opt, "-compressvlans") == 0) {
|
} else if (matches(opt, "-compressvlans") == 0) {
|
||||||
++compress_vlans;
|
++compress_vlans;
|
||||||
} else if (matches(opt, "-force") == 0) {
|
} else if (matches(opt, "-force") == 0) {
|
||||||
++force;
|
++force;
|
||||||
} else if (matches(opt, "-json") == 0) {
|
} else if (matches(opt, "-json") == 0) {
|
||||||
++json;
|
++json_output;
|
||||||
} else if (matches(opt, "-pretty") == 0) {
|
|
||||||
++pretty;
|
|
||||||
} else if (matches(opt, "-batch") == 0) {
|
} else if (matches(opt, "-batch") == 0) {
|
||||||
argc--;
|
argc--;
|
||||||
argv++;
|
argv++;
|
||||||
|
|
@ -175,16 +193,12 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
_SL_ = oneline ? "\\" : "\n";
|
_SL_ = oneline ? "\\" : "\n";
|
||||||
|
|
||||||
check_enable_color(color, json);
|
|
||||||
|
|
||||||
if (batch_file)
|
if (batch_file)
|
||||||
return batch(batch_file);
|
return batch(batch_file);
|
||||||
|
|
||||||
if (rtnl_open(&rth, 0) < 0)
|
if (rtnl_open(&rth, 0) < 0)
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|
||||||
rtnl_set_strict_dump(&rth);
|
|
||||||
|
|
||||||
if (argc > 1)
|
if (argc > 1)
|
||||||
return do_cmd(argv[1], argc-1, argv+1);
|
return do_cmd(argv[1], argc-1, argv+1);
|
||||||
|
|
||||||
|
|
|
||||||
529
bridge/fdb.c
529
bridge/fdb.c
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
/*
|
/*
|
||||||
* Get/set/delete fdb table with netlink
|
* Get/set/delete fdb table with netlink
|
||||||
*
|
*
|
||||||
|
|
@ -22,29 +21,25 @@
|
||||||
#include <linux/neighbour.h>
|
#include <linux/neighbour.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
#include <json_writer.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
#include "json_print.h"
|
|
||||||
#include "libnetlink.h"
|
#include "libnetlink.h"
|
||||||
#include "br_common.h"
|
#include "br_common.h"
|
||||||
#include "rt_names.h"
|
#include "rt_names.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
static unsigned int filter_index, filter_dynamic, filter_master,
|
static unsigned int filter_index, filter_vlan, filter_state;
|
||||||
filter_state, filter_vlan;
|
|
||||||
|
json_writer_t *jw_global;
|
||||||
|
|
||||||
static void usage(void)
|
static void usage(void)
|
||||||
{
|
{
|
||||||
fprintf(stderr,
|
fprintf(stderr, "Usage: bridge fdb { add | append | del | replace } ADDR dev DEV\n"
|
||||||
"Usage: bridge fdb { add | append | del | replace } ADDR dev DEV\n"
|
" [ self ] [ master ] [ use ] [ router ]\n"
|
||||||
" [ self ] [ master ] [ use ] [ router ] [ extern_learn ]\n"
|
" [ local | static | dynamic ] [ dst IPADDR ] [ vlan VID ]\n"
|
||||||
" [ sticky ] [ local | static | dynamic ] [ vlan VID ]\n"
|
" [ port PORT] [ vni VNI ] [ via DEV ]\n");
|
||||||
" { [ dst IPADDR ] [ port PORT] [ vni VNI ] | [ nhid NHID ] }\n"
|
fprintf(stderr, " bridge fdb [ show [ br BRDEV ] [ brport DEV ] [ vlan VID ] [ state STATE ] ]\n");
|
||||||
" [ via DEV ] [ src_vni VNI ]\n"
|
|
||||||
" bridge fdb [ show [ br BRDEV ] [ brport DEV ] [ vlan VID ]\n"
|
|
||||||
" [ state STATE ] [ dynamic ] ]\n"
|
|
||||||
" bridge fdb get [ to ] LLADDR [ br BRDEV ] { brport | dev } DEV\n"
|
|
||||||
" [ vlan VID ] [ vni VNI ] [ self ] [ master ] [ dynamic ]\n");
|
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -64,10 +59,7 @@ static const char *state_n2a(unsigned int s)
|
||||||
if (s & NUD_REACHABLE)
|
if (s & NUD_REACHABLE)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
if (is_json_context())
|
sprintf(buf, "state=%#x", s);
|
||||||
sprintf(buf, "%#x", s);
|
|
||||||
else
|
|
||||||
sprintf(buf, "state=%#x", s);
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -89,58 +81,24 @@ static int state_a2n(unsigned int *s, const char *arg)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fdb_print_flags(FILE *fp, unsigned int flags)
|
static void start_json_fdb_flags_array(bool *fdb_flags)
|
||||||
{
|
{
|
||||||
open_json_array(PRINT_JSON,
|
if (*fdb_flags)
|
||||||
is_json_context() ? "flags" : "");
|
return;
|
||||||
|
jsonw_name(jw_global, "flags");
|
||||||
if (flags & NTF_SELF)
|
jsonw_start_array(jw_global);
|
||||||
print_string(PRINT_ANY, NULL, "%s ", "self");
|
*fdb_flags = true;
|
||||||
|
|
||||||
if (flags & NTF_ROUTER)
|
|
||||||
print_string(PRINT_ANY, NULL, "%s ", "router");
|
|
||||||
|
|
||||||
if (flags & NTF_EXT_LEARNED)
|
|
||||||
print_string(PRINT_ANY, NULL, "%s ", "extern_learn");
|
|
||||||
|
|
||||||
if (flags & NTF_OFFLOADED)
|
|
||||||
print_string(PRINT_ANY, NULL, "%s ", "offload");
|
|
||||||
|
|
||||||
if (flags & NTF_MASTER)
|
|
||||||
print_string(PRINT_ANY, NULL, "%s ", "master");
|
|
||||||
|
|
||||||
if (flags & NTF_STICKY)
|
|
||||||
print_string(PRINT_ANY, NULL, "%s ", "sticky");
|
|
||||||
|
|
||||||
close_json_array(PRINT_JSON, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fdb_print_stats(FILE *fp, const struct nda_cacheinfo *ci)
|
int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
|
||||||
{
|
|
||||||
static int hz;
|
|
||||||
|
|
||||||
if (!hz)
|
|
||||||
hz = get_user_hz();
|
|
||||||
|
|
||||||
if (is_json_context()) {
|
|
||||||
print_uint(PRINT_JSON, "used", NULL,
|
|
||||||
ci->ndm_used / hz);
|
|
||||||
print_uint(PRINT_JSON, "updated", NULL,
|
|
||||||
ci->ndm_updated / hz);
|
|
||||||
} else {
|
|
||||||
fprintf(fp, "used %d/%d ", ci->ndm_used / hz,
|
|
||||||
ci->ndm_updated / hz);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int print_fdb(struct nlmsghdr *n, void *arg)
|
|
||||||
{
|
{
|
||||||
FILE *fp = arg;
|
FILE *fp = arg;
|
||||||
struct ndmsg *r = NLMSG_DATA(n);
|
struct ndmsg *r = NLMSG_DATA(n);
|
||||||
int len = n->nlmsg_len;
|
int len = n->nlmsg_len;
|
||||||
struct rtattr *tb[NDA_MAX+1];
|
struct rtattr *tb[NDA_MAX+1];
|
||||||
__u16 vid = 0;
|
__u16 vid = 0;
|
||||||
|
bool fdb_flags = false;
|
||||||
|
const char *state_s;
|
||||||
|
|
||||||
if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH) {
|
if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH) {
|
||||||
fprintf(stderr, "Not RTM_NEWNEIGH: %08x %08x %08x\n",
|
fprintf(stderr, "Not RTM_NEWNEIGH: %08x %08x %08x\n",
|
||||||
|
|
@ -172,143 +130,181 @@ int print_fdb(struct nlmsghdr *n, void *arg)
|
||||||
if (filter_vlan && filter_vlan != vid)
|
if (filter_vlan && filter_vlan != vid)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (filter_dynamic && (r->ndm_state & NUD_PERMANENT))
|
if (jw_global) {
|
||||||
return 0;
|
jsonw_pretty(jw_global, 1);
|
||||||
|
jsonw_start_object(jw_global);
|
||||||
|
}
|
||||||
|
|
||||||
open_json_object(NULL);
|
if (n->nlmsg_type == RTM_DELNEIGH) {
|
||||||
if (n->nlmsg_type == RTM_DELNEIGH)
|
if (jw_global)
|
||||||
print_bool(PRINT_ANY, "deleted", "Deleted ", true);
|
jsonw_string_field(jw_global, "opCode", "deleted");
|
||||||
|
else
|
||||||
|
fprintf(fp, "Deleted ");
|
||||||
|
}
|
||||||
|
|
||||||
if (tb[NDA_LLADDR]) {
|
if (tb[NDA_LLADDR]) {
|
||||||
const char *lladdr;
|
|
||||||
SPRINT_BUF(b1);
|
SPRINT_BUF(b1);
|
||||||
|
ll_addr_n2a(RTA_DATA(tb[NDA_LLADDR]),
|
||||||
lladdr = ll_addr_n2a(RTA_DATA(tb[NDA_LLADDR]),
|
RTA_PAYLOAD(tb[NDA_LLADDR]),
|
||||||
RTA_PAYLOAD(tb[NDA_LLADDR]),
|
ll_index_to_type(r->ndm_ifindex),
|
||||||
ll_index_to_type(r->ndm_ifindex),
|
b1, sizeof(b1));
|
||||||
b1, sizeof(b1));
|
if (jw_global)
|
||||||
|
jsonw_string_field(jw_global, "mac", b1);
|
||||||
print_color_string(PRINT_ANY, COLOR_MAC,
|
else
|
||||||
"mac", "%s ", lladdr);
|
fprintf(fp, "%s ", b1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!filter_index && r->ndm_ifindex) {
|
if (!filter_index && r->ndm_ifindex) {
|
||||||
print_string(PRINT_FP, NULL, "dev ", NULL);
|
if (jw_global)
|
||||||
|
jsonw_string_field(jw_global, "dev",
|
||||||
print_color_string(PRINT_ANY, COLOR_IFNAME,
|
ll_index_to_name(r->ndm_ifindex));
|
||||||
"ifname", "%s ",
|
else
|
||||||
ll_index_to_name(r->ndm_ifindex));
|
fprintf(fp, "dev %s ",
|
||||||
|
ll_index_to_name(r->ndm_ifindex));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tb[NDA_DST]) {
|
if (tb[NDA_DST]) {
|
||||||
int family = AF_INET;
|
int family = AF_INET;
|
||||||
const char *dst;
|
const char *abuf_s;
|
||||||
|
|
||||||
if (RTA_PAYLOAD(tb[NDA_DST]) == sizeof(struct in6_addr))
|
if (RTA_PAYLOAD(tb[NDA_DST]) == sizeof(struct in6_addr))
|
||||||
family = AF_INET6;
|
family = AF_INET6;
|
||||||
|
|
||||||
dst = format_host(family,
|
abuf_s = format_host(family,
|
||||||
RTA_PAYLOAD(tb[NDA_DST]),
|
RTA_PAYLOAD(tb[NDA_DST]),
|
||||||
RTA_DATA(tb[NDA_DST]));
|
RTA_DATA(tb[NDA_DST]));
|
||||||
|
if (jw_global)
|
||||||
print_string(PRINT_FP, NULL, "dst ", NULL);
|
jsonw_string_field(jw_global, "dst", abuf_s);
|
||||||
|
else
|
||||||
print_color_string(PRINT_ANY,
|
fprintf(fp, "dst %s ", abuf_s);
|
||||||
ifa_family_color(family),
|
|
||||||
"dst", "%s ", dst);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vid)
|
if (vid) {
|
||||||
print_uint(PRINT_ANY,
|
if (jw_global)
|
||||||
"vlan", "vlan %hu ", vid);
|
jsonw_uint_field(jw_global, "vlan", vid);
|
||||||
|
else
|
||||||
|
fprintf(fp, "vlan %hu ", vid);
|
||||||
|
}
|
||||||
|
|
||||||
if (tb[NDA_PORT])
|
if (tb[NDA_PORT]) {
|
||||||
print_uint(PRINT_ANY,
|
if (jw_global)
|
||||||
"port", "port %u ",
|
jsonw_uint_field(jw_global, "port",
|
||||||
rta_getattr_be16(tb[NDA_PORT]));
|
rta_getattr_be16(tb[NDA_PORT]));
|
||||||
|
else
|
||||||
|
fprintf(fp, "port %d ",
|
||||||
|
rta_getattr_be16(tb[NDA_PORT]));
|
||||||
|
}
|
||||||
|
|
||||||
if (tb[NDA_VNI])
|
if (tb[NDA_VNI]) {
|
||||||
print_uint(PRINT_ANY,
|
if (jw_global)
|
||||||
"vni", "vni %u ",
|
jsonw_uint_field(jw_global, "vni",
|
||||||
rta_getattr_u32(tb[NDA_VNI]));
|
rta_getattr_u32(tb[NDA_VNI]));
|
||||||
|
else
|
||||||
if (tb[NDA_SRC_VNI])
|
fprintf(fp, "vni %d ",
|
||||||
print_uint(PRINT_ANY,
|
rta_getattr_u32(tb[NDA_VNI]));
|
||||||
"src_vni", "src_vni %u ",
|
}
|
||||||
rta_getattr_u32(tb[NDA_SRC_VNI]));
|
|
||||||
|
|
||||||
if (tb[NDA_IFINDEX]) {
|
if (tb[NDA_IFINDEX]) {
|
||||||
unsigned int ifindex = rta_getattr_u32(tb[NDA_IFINDEX]);
|
unsigned int ifindex = rta_getattr_u32(tb[NDA_IFINDEX]);
|
||||||
|
|
||||||
if (tb[NDA_LINK_NETNSID])
|
if (ifindex) {
|
||||||
print_uint(PRINT_ANY,
|
char ifname[IF_NAMESIZE];
|
||||||
"viaIfIndex", "via ifindex %u ",
|
|
||||||
ifindex);
|
if (!tb[NDA_LINK_NETNSID] &&
|
||||||
|
if_indextoname(ifindex, ifname)) {
|
||||||
|
if (jw_global)
|
||||||
|
jsonw_string_field(jw_global, "viaIf",
|
||||||
|
ifname);
|
||||||
|
else
|
||||||
|
fprintf(fp, "via %s ", ifname);
|
||||||
|
} else {
|
||||||
|
if (jw_global)
|
||||||
|
jsonw_uint_field(jw_global, "viaIfIndex",
|
||||||
|
ifindex);
|
||||||
|
else
|
||||||
|
fprintf(fp, "via ifindex %u ", ifindex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tb[NDA_LINK_NETNSID]) {
|
||||||
|
if (jw_global)
|
||||||
|
jsonw_uint_field(jw_global, "linkNetNsId",
|
||||||
|
rta_getattr_u32(tb[NDA_LINK_NETNSID]));
|
||||||
else
|
else
|
||||||
print_string(PRINT_ANY,
|
fprintf(fp, "link-netnsid %d ",
|
||||||
"viaIf", "via %s ",
|
rta_getattr_u32(tb[NDA_LINK_NETNSID]));
|
||||||
ll_index_to_name(ifindex));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tb[NDA_NH_ID])
|
if (show_stats && tb[NDA_CACHEINFO]) {
|
||||||
print_uint(PRINT_ANY, "nhid", "nhid %u ",
|
struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]);
|
||||||
rta_getattr_u32(tb[NDA_NH_ID]));
|
int hz = get_user_hz();
|
||||||
|
|
||||||
if (tb[NDA_LINK_NETNSID])
|
if (jw_global) {
|
||||||
print_uint(PRINT_ANY,
|
jsonw_uint_field(jw_global, "used",
|
||||||
"linkNetNsId", "link-netnsid %d ",
|
ci->ndm_used/hz);
|
||||||
rta_getattr_u32(tb[NDA_LINK_NETNSID]));
|
jsonw_uint_field(jw_global, "updated",
|
||||||
|
ci->ndm_updated/hz);
|
||||||
if (show_stats && tb[NDA_CACHEINFO])
|
} else {
|
||||||
fdb_print_stats(fp, RTA_DATA(tb[NDA_CACHEINFO]));
|
fprintf(fp, "used %d/%d ", ci->ndm_used/hz,
|
||||||
|
ci->ndm_updated/hz);
|
||||||
fdb_print_flags(fp, r->ndm_flags);
|
}
|
||||||
|
|
||||||
|
|
||||||
if (tb[NDA_MASTER])
|
|
||||||
print_string(PRINT_ANY, "master", "master %s ",
|
|
||||||
ll_index_to_name(rta_getattr_u32(tb[NDA_MASTER])));
|
|
||||||
|
|
||||||
print_string(PRINT_ANY, "state", "%s\n",
|
|
||||||
state_n2a(r->ndm_state));
|
|
||||||
close_json_object();
|
|
||||||
fflush(fp);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int fdb_linkdump_filter(struct nlmsghdr *nlh, int reqlen)
|
|
||||||
{
|
|
||||||
int err;
|
|
||||||
|
|
||||||
if (filter_index) {
|
|
||||||
struct ifinfomsg *ifm = NLMSG_DATA(nlh);
|
|
||||||
|
|
||||||
ifm->ifi_index = filter_index;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (filter_master) {
|
if (jw_global) {
|
||||||
err = addattr32(nlh, reqlen, IFLA_MASTER, filter_master);
|
if (r->ndm_flags & NTF_SELF) {
|
||||||
if (err)
|
start_json_fdb_flags_array(&fdb_flags);
|
||||||
return err;
|
jsonw_string(jw_global, "self");
|
||||||
|
}
|
||||||
|
if (r->ndm_flags & NTF_ROUTER) {
|
||||||
|
start_json_fdb_flags_array(&fdb_flags);
|
||||||
|
jsonw_string(jw_global, "router");
|
||||||
|
}
|
||||||
|
if (r->ndm_flags & NTF_EXT_LEARNED) {
|
||||||
|
start_json_fdb_flags_array(&fdb_flags);
|
||||||
|
jsonw_string(jw_global, "extern_learn");
|
||||||
|
}
|
||||||
|
if (r->ndm_flags & NTF_OFFLOADED) {
|
||||||
|
start_json_fdb_flags_array(&fdb_flags);
|
||||||
|
jsonw_string(jw_global, "offload");
|
||||||
|
}
|
||||||
|
if (r->ndm_flags & NTF_MASTER)
|
||||||
|
jsonw_string(jw_global, "master");
|
||||||
|
if (fdb_flags)
|
||||||
|
jsonw_end_array(jw_global);
|
||||||
|
|
||||||
|
if (tb[NDA_MASTER])
|
||||||
|
jsonw_string_field(jw_global,
|
||||||
|
"master",
|
||||||
|
ll_index_to_name(rta_getattr_u32(tb[NDA_MASTER])));
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if (r->ndm_flags & NTF_SELF)
|
||||||
|
fprintf(fp, "self ");
|
||||||
|
if (r->ndm_flags & NTF_ROUTER)
|
||||||
|
fprintf(fp, "router ");
|
||||||
|
if (r->ndm_flags & NTF_EXT_LEARNED)
|
||||||
|
fprintf(fp, "extern_learn ");
|
||||||
|
if (r->ndm_flags & NTF_OFFLOADED)
|
||||||
|
fprintf(fp, "offload ");
|
||||||
|
if (tb[NDA_MASTER]) {
|
||||||
|
fprintf(fp, "master %s ",
|
||||||
|
ll_index_to_name(rta_getattr_u32(tb[NDA_MASTER])));
|
||||||
|
} else if (r->ndm_flags & NTF_MASTER) {
|
||||||
|
fprintf(fp, "master ");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
state_s = state_n2a(r->ndm_state);
|
||||||
}
|
if (jw_global) {
|
||||||
|
if (state_s[0])
|
||||||
|
jsonw_string_field(jw_global, "state", state_s);
|
||||||
|
|
||||||
static int fdb_dump_filter(struct nlmsghdr *nlh, int reqlen)
|
jsonw_end_object(jw_global);
|
||||||
{
|
} else {
|
||||||
int err;
|
fprintf(fp, "%s\n", state_s);
|
||||||
|
|
||||||
if (filter_index) {
|
fflush(fp);
|
||||||
struct ndmsg *ndm = NLMSG_DATA(nlh);
|
|
||||||
|
|
||||||
ndm->ndm_ifindex = filter_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (filter_master) {
|
|
||||||
err = addattr32(nlh, reqlen, NDA_MASTER, filter_master);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -316,9 +312,18 @@ static int fdb_dump_filter(struct nlmsghdr *nlh, int reqlen)
|
||||||
|
|
||||||
static int fdb_show(int argc, char **argv)
|
static int fdb_show(int argc, char **argv)
|
||||||
{
|
{
|
||||||
|
struct {
|
||||||
|
struct nlmsghdr n;
|
||||||
|
struct ifinfomsg ifm;
|
||||||
|
char buf[256];
|
||||||
|
} req = {
|
||||||
|
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
|
||||||
|
.ifm.ifi_family = PF_BRIDGE,
|
||||||
|
};
|
||||||
|
|
||||||
char *filter_dev = NULL;
|
char *filter_dev = NULL;
|
||||||
char *br = NULL;
|
char *br = NULL;
|
||||||
int rc;
|
int msg_size = sizeof(struct ifinfomsg);
|
||||||
|
|
||||||
while (argc > 0) {
|
while (argc > 0) {
|
||||||
if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
|
if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
|
||||||
|
|
@ -339,8 +344,6 @@ static int fdb_show(int argc, char **argv)
|
||||||
if (state_a2n(&state, *argv))
|
if (state_a2n(&state, *argv))
|
||||||
invarg("invalid state", *argv);
|
invarg("invalid state", *argv);
|
||||||
filter_state |= state;
|
filter_state |= state;
|
||||||
} else if (strcmp(*argv, "dynamic") == 0) {
|
|
||||||
filter_dynamic = 1;
|
|
||||||
} else {
|
} else {
|
||||||
if (matches(*argv, "help") == 0)
|
if (matches(*argv, "help") == 0)
|
||||||
usage();
|
usage();
|
||||||
|
|
@ -355,32 +358,42 @@ static int fdb_show(int argc, char **argv)
|
||||||
fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
|
fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
filter_master = br_ifindex;
|
addattr32(&req.n, sizeof(req), IFLA_MASTER, br_ifindex);
|
||||||
|
msg_size += RTA_LENGTH(4);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*we'll keep around filter_dev for older kernels */
|
/*we'll keep around filter_dev for older kernels */
|
||||||
if (filter_dev) {
|
if (filter_dev) {
|
||||||
filter_index = ll_name_to_index(filter_dev);
|
filter_index = if_nametoindex(filter_dev);
|
||||||
if (!filter_index)
|
if (filter_index == 0) {
|
||||||
return nodev(filter_dev);
|
fprintf(stderr, "Cannot find device \"%s\"\n",
|
||||||
|
filter_dev);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
req.ifm.ifi_index = filter_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rth.flags & RTNL_HANDLE_F_STRICT_CHK)
|
if (rtnl_dump_request(&rth, RTM_GETNEIGH, &req.ifm, msg_size) < 0) {
|
||||||
rc = rtnl_neighdump_req(&rth, PF_BRIDGE, fdb_dump_filter);
|
|
||||||
else
|
|
||||||
rc = rtnl_fdb_linkdump_req_filter_fn(&rth, fdb_linkdump_filter);
|
|
||||||
if (rc < 0) {
|
|
||||||
perror("Cannot send dump request");
|
perror("Cannot send dump request");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
new_json_obj(json);
|
if (json_output) {
|
||||||
|
jw_global = jsonw_new(stdout);
|
||||||
|
if (!jw_global) {
|
||||||
|
fprintf(stderr, "Error allocation json object\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
jsonw_start_array(jw_global);
|
||||||
|
}
|
||||||
if (rtnl_dump_filter(&rth, print_fdb, stdout) < 0) {
|
if (rtnl_dump_filter(&rth, print_fdb, stdout) < 0) {
|
||||||
fprintf(stderr, "Dump terminated\n");
|
fprintf(stderr, "Dump terminated\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
delete_json_obj();
|
if (jw_global) {
|
||||||
fflush(stdout);
|
jsonw_end_array(jw_global);
|
||||||
|
jsonw_destroy(&jw_global);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -405,11 +418,9 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
inet_prefix dst;
|
inet_prefix dst;
|
||||||
unsigned long port = 0;
|
unsigned long port = 0;
|
||||||
unsigned long vni = ~0;
|
unsigned long vni = ~0;
|
||||||
unsigned long src_vni = ~0;
|
|
||||||
unsigned int via = 0;
|
unsigned int via = 0;
|
||||||
char *endptr;
|
char *endptr;
|
||||||
short vid = -1;
|
short vid = -1;
|
||||||
__u32 nhid = 0;
|
|
||||||
|
|
||||||
while (argc > 0) {
|
while (argc > 0) {
|
||||||
if (strcmp(*argv, "dev") == 0) {
|
if (strcmp(*argv, "dev") == 0) {
|
||||||
|
|
@ -421,10 +432,6 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
duparg2("dst", *argv);
|
duparg2("dst", *argv);
|
||||||
get_addr(&dst, *argv, preferred_family);
|
get_addr(&dst, *argv, preferred_family);
|
||||||
dst_ok = 1;
|
dst_ok = 1;
|
||||||
} else if (strcmp(*argv, "nhid") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
if (get_u32(&nhid, *argv, 0))
|
|
||||||
invarg("\"id\" value is invalid\n", *argv);
|
|
||||||
} else if (strcmp(*argv, "port") == 0) {
|
} else if (strcmp(*argv, "port") == 0) {
|
||||||
|
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
|
|
@ -444,17 +451,11 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
if ((endptr && *endptr) ||
|
if ((endptr && *endptr) ||
|
||||||
(vni >> 24) || vni == ULONG_MAX)
|
(vni >> 24) || vni == ULONG_MAX)
|
||||||
invarg("invalid VNI\n", *argv);
|
invarg("invalid VNI\n", *argv);
|
||||||
} else if (strcmp(*argv, "src_vni") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
src_vni = strtoul(*argv, &endptr, 0);
|
|
||||||
if ((endptr && *endptr) ||
|
|
||||||
(src_vni >> 24) || src_vni == ULONG_MAX)
|
|
||||||
invarg("invalid src VNI\n", *argv);
|
|
||||||
} else if (strcmp(*argv, "via") == 0) {
|
} else if (strcmp(*argv, "via") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
via = ll_name_to_index(*argv);
|
via = if_nametoindex(*argv);
|
||||||
if (!via)
|
if (via == 0)
|
||||||
exit(nodev(*argv));
|
invarg("invalid device\n", *argv);
|
||||||
} else if (strcmp(*argv, "self") == 0) {
|
} else if (strcmp(*argv, "self") == 0) {
|
||||||
req.ndm.ndm_flags |= NTF_SELF;
|
req.ndm.ndm_flags |= NTF_SELF;
|
||||||
} else if (matches(*argv, "master") == 0) {
|
} else if (matches(*argv, "master") == 0) {
|
||||||
|
|
@ -477,14 +478,10 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
vid = atoi(*argv);
|
vid = atoi(*argv);
|
||||||
} else if (matches(*argv, "use") == 0) {
|
} else if (matches(*argv, "use") == 0) {
|
||||||
req.ndm.ndm_flags |= NTF_USE;
|
req.ndm.ndm_flags |= NTF_USE;
|
||||||
} else if (matches(*argv, "extern_learn") == 0) {
|
|
||||||
req.ndm.ndm_flags |= NTF_EXT_LEARNED;
|
|
||||||
} else if (matches(*argv, "sticky") == 0) {
|
|
||||||
req.ndm.ndm_flags |= NTF_STICKY;
|
|
||||||
} else {
|
} else {
|
||||||
if (strcmp(*argv, "to") == 0)
|
if (strcmp(*argv, "to") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
|
}
|
||||||
if (matches(*argv, "help") == 0)
|
if (matches(*argv, "help") == 0)
|
||||||
usage();
|
usage();
|
||||||
if (addr)
|
if (addr)
|
||||||
|
|
@ -499,11 +496,6 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nhid && (dst_ok || port || vni != ~0)) {
|
|
||||||
fprintf(stderr, "dst, port, vni are mutually exclusive with nhid\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Assume self */
|
/* Assume self */
|
||||||
if (!(req.ndm.ndm_flags&(NTF_SELF|NTF_MASTER)))
|
if (!(req.ndm.ndm_flags&(NTF_SELF|NTF_MASTER)))
|
||||||
req.ndm.ndm_flags |= NTF_SELF;
|
req.ndm.ndm_flags |= NTF_SELF;
|
||||||
|
|
@ -525,8 +517,6 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
|
|
||||||
if (vid >= 0)
|
if (vid >= 0)
|
||||||
addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
|
addattr16(&req.n, sizeof(req), NDA_VLAN, vid);
|
||||||
if (nhid > 0)
|
|
||||||
addattr32(&req.n, sizeof(req), NDA_NH_ID, nhid);
|
|
||||||
|
|
||||||
if (port) {
|
if (port) {
|
||||||
unsigned short dport;
|
unsigned short dport;
|
||||||
|
|
@ -536,132 +526,17 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
}
|
}
|
||||||
if (vni != ~0)
|
if (vni != ~0)
|
||||||
addattr32(&req.n, sizeof(req), NDA_VNI, vni);
|
addattr32(&req.n, sizeof(req), NDA_VNI, vni);
|
||||||
if (src_vni != ~0)
|
|
||||||
addattr32(&req.n, sizeof(req), NDA_SRC_VNI, src_vni);
|
|
||||||
if (via)
|
if (via)
|
||||||
addattr32(&req.n, sizeof(req), NDA_IFINDEX, via);
|
addattr32(&req.n, sizeof(req), NDA_IFINDEX, via);
|
||||||
|
|
||||||
req.ndm.ndm_ifindex = ll_name_to_index(d);
|
req.ndm.ndm_ifindex = ll_name_to_index(d);
|
||||||
if (!req.ndm.ndm_ifindex)
|
if (req.ndm.ndm_ifindex == 0) {
|
||||||
return nodev(d);
|
fprintf(stderr, "Cannot find device \"%s\"\n", d);
|
||||||
|
|
||||||
if (rtnl_talk(&rth, &req.n, NULL) < 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int fdb_get(int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct {
|
|
||||||
struct nlmsghdr n;
|
|
||||||
struct ndmsg ndm;
|
|
||||||
char buf[1024];
|
|
||||||
} req = {
|
|
||||||
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
|
|
||||||
.n.nlmsg_flags = NLM_F_REQUEST,
|
|
||||||
.n.nlmsg_type = RTM_GETNEIGH,
|
|
||||||
.ndm.ndm_family = AF_BRIDGE,
|
|
||||||
};
|
|
||||||
char *d = NULL, *br = NULL;
|
|
||||||
struct nlmsghdr *answer;
|
|
||||||
unsigned long vni = ~0;
|
|
||||||
char abuf[ETH_ALEN];
|
|
||||||
int br_ifindex = 0;
|
|
||||||
char *addr = NULL;
|
|
||||||
short vlan = -1;
|
|
||||||
char *endptr;
|
|
||||||
|
|
||||||
while (argc > 0) {
|
|
||||||
if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
d = *argv;
|
|
||||||
} else if (strcmp(*argv, "br") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
br = *argv;
|
|
||||||
} else if (strcmp(*argv, "dev") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
d = *argv;
|
|
||||||
} else if (strcmp(*argv, "vni") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
vni = strtoul(*argv, &endptr, 0);
|
|
||||||
if ((endptr && *endptr) ||
|
|
||||||
(vni >> 24) || vni == ULONG_MAX)
|
|
||||||
invarg("invalid VNI\n", *argv);
|
|
||||||
} else if (strcmp(*argv, "self") == 0) {
|
|
||||||
req.ndm.ndm_flags |= NTF_SELF;
|
|
||||||
} else if (matches(*argv, "master") == 0) {
|
|
||||||
req.ndm.ndm_flags |= NTF_MASTER;
|
|
||||||
} else if (matches(*argv, "vlan") == 0) {
|
|
||||||
if (vlan >= 0)
|
|
||||||
duparg2("vlan", *argv);
|
|
||||||
NEXT_ARG();
|
|
||||||
vlan = atoi(*argv);
|
|
||||||
} else if (matches(*argv, "dynamic") == 0) {
|
|
||||||
filter_dynamic = 1;
|
|
||||||
} else {
|
|
||||||
if (strcmp(*argv, "to") == 0)
|
|
||||||
NEXT_ARG();
|
|
||||||
|
|
||||||
if (matches(*argv, "help") == 0)
|
|
||||||
usage();
|
|
||||||
if (addr)
|
|
||||||
duparg2("to", *argv);
|
|
||||||
addr = *argv;
|
|
||||||
}
|
|
||||||
argc--; argv++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((d == NULL && br == NULL) || addr == NULL) {
|
|
||||||
fprintf(stderr, "Device or master and address are required arguments.\n");
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sscanf(addr, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
|
if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
|
||||||
abuf, abuf+1, abuf+2,
|
|
||||||
abuf+3, abuf+4, abuf+5) != 6) {
|
|
||||||
fprintf(stderr, "Invalid mac address %s\n", addr);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
|
||||||
|
|
||||||
addattr_l(&req.n, sizeof(req), NDA_LLADDR, abuf, ETH_ALEN);
|
|
||||||
|
|
||||||
if (vlan >= 0)
|
|
||||||
addattr16(&req.n, sizeof(req), NDA_VLAN, vlan);
|
|
||||||
|
|
||||||
if (vni != ~0)
|
|
||||||
addattr32(&req.n, sizeof(req), NDA_VNI, vni);
|
|
||||||
|
|
||||||
if (d) {
|
|
||||||
req.ndm.ndm_ifindex = ll_name_to_index(d);
|
|
||||||
if (!req.ndm.ndm_ifindex) {
|
|
||||||
fprintf(stderr, "Cannot find device \"%s\"\n", d);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (br) {
|
|
||||||
br_ifindex = ll_name_to_index(br);
|
|
||||||
if (!br_ifindex) {
|
|
||||||
fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
addattr32(&req.n, sizeof(req), NDA_MASTER, br_ifindex);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rtnl_talk(&rth, &req.n, &answer) < 0)
|
|
||||||
return -2;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize a json_writer and open an array object
|
|
||||||
* if -json was specified.
|
|
||||||
*/
|
|
||||||
new_json_obj(json);
|
|
||||||
if (print_fdb(answer, stdout) < 0) {
|
|
||||||
fprintf(stderr, "An error :-)\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
delete_json_obj();
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -679,8 +554,6 @@ int do_fdb(int argc, char **argv)
|
||||||
return fdb_modify(RTM_NEWNEIGH, NLM_F_CREATE|NLM_F_REPLACE, argc-1, argv+1);
|
return fdb_modify(RTM_NEWNEIGH, NLM_F_CREATE|NLM_F_REPLACE, argc-1, argv+1);
|
||||||
if (matches(*argv, "delete") == 0)
|
if (matches(*argv, "delete") == 0)
|
||||||
return fdb_modify(RTM_DELNEIGH, 0, argc-1, argv+1);
|
return fdb_modify(RTM_DELNEIGH, 0, argc-1, argv+1);
|
||||||
if (matches(*argv, "get") == 0)
|
|
||||||
return fdb_get(argc-1, argv+1);
|
|
||||||
if (matches(*argv, "show") == 0 ||
|
if (matches(*argv, "show") == 0 ||
|
||||||
matches(*argv, "lst") == 0 ||
|
matches(*argv, "lst") == 0 ||
|
||||||
matches(*argv, "list") == 0)
|
matches(*argv, "list") == 0)
|
||||||
|
|
|
||||||
474
bridge/link.c
474
bridge/link.c
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
@ -12,14 +11,13 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
#include "json_print.h"
|
|
||||||
#include "libnetlink.h"
|
#include "libnetlink.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "br_common.h"
|
#include "br_common.h"
|
||||||
|
|
||||||
static unsigned int filter_index;
|
static unsigned int filter_index;
|
||||||
|
|
||||||
static const char *stp_states[] = {
|
static const char *port_states[] = {
|
||||||
[BR_STATE_DISABLED] = "disabled",
|
[BR_STATE_DISABLED] = "disabled",
|
||||||
[BR_STATE_LISTENING] = "listening",
|
[BR_STATE_LISTENING] = "listening",
|
||||||
[BR_STATE_LEARNING] = "learning",
|
[BR_STATE_LEARNING] = "learning",
|
||||||
|
|
@ -27,21 +25,17 @@ static const char *stp_states[] = {
|
||||||
[BR_STATE_BLOCKING] = "blocking",
|
[BR_STATE_BLOCKING] = "blocking",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char *hw_mode[] = {
|
extern char *if_indextoname(unsigned int __ifindex, char *__ifname);
|
||||||
"VEB", "VEPA"
|
|
||||||
};
|
|
||||||
|
|
||||||
static void print_link_flags(FILE *fp, unsigned int flags, unsigned int mdown)
|
static void print_link_flags(FILE *fp, unsigned int flags)
|
||||||
{
|
{
|
||||||
open_json_array(PRINT_ANY, is_json_context() ? "flags" : "<");
|
fprintf(fp, "<");
|
||||||
if (flags & IFF_UP && !(flags & IFF_RUNNING))
|
if (flags & IFF_UP && !(flags & IFF_RUNNING))
|
||||||
print_string(PRINT_ANY, NULL,
|
fprintf(fp, "NO-CARRIER%s", flags ? "," : "");
|
||||||
flags ? "%s," : "%s", "NO-CARRIER");
|
|
||||||
flags &= ~IFF_RUNNING;
|
flags &= ~IFF_RUNNING;
|
||||||
|
#define _PF(f) if (flags&IFF_##f) { \
|
||||||
#define _PF(f) if (flags&IFF_##f) { \
|
flags &= ~IFF_##f ; \
|
||||||
flags &= ~IFF_##f ; \
|
fprintf(fp, #f "%s", flags ? "," : ""); }
|
||||||
print_string(PRINT_ANY, NULL, flags ? "%s," : "%s", #f); }
|
|
||||||
_PF(LOOPBACK);
|
_PF(LOOPBACK);
|
||||||
_PF(BROADCAST);
|
_PF(BROADCAST);
|
||||||
_PF(POINTOPOINT);
|
_PF(POINTOPOINT);
|
||||||
|
|
@ -62,152 +56,54 @@ static void print_link_flags(FILE *fp, unsigned int flags, unsigned int mdown)
|
||||||
_PF(ECHO);
|
_PF(ECHO);
|
||||||
#undef _PF
|
#undef _PF
|
||||||
if (flags)
|
if (flags)
|
||||||
print_hex(PRINT_ANY, NULL, "%x", flags);
|
fprintf(fp, "%x", flags);
|
||||||
if (mdown)
|
fprintf(fp, "> ");
|
||||||
print_string(PRINT_ANY, NULL, ",%s", "M-DOWN");
|
|
||||||
close_json_array(PRINT_ANY, "> ");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_stp_state(__u8 state)
|
static const char *oper_states[] = {
|
||||||
|
"UNKNOWN", "NOTPRESENT", "DOWN", "LOWERLAYERDOWN",
|
||||||
|
"TESTING", "DORMANT", "UP"
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *hw_mode[] = {"VEB", "VEPA"};
|
||||||
|
|
||||||
|
static void print_operstate(FILE *f, __u8 state)
|
||||||
|
{
|
||||||
|
if (state >= ARRAY_SIZE(oper_states))
|
||||||
|
fprintf(f, "state %#x ", state);
|
||||||
|
else
|
||||||
|
fprintf(f, "state %s ", oper_states[state]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_portstate(FILE *f, __u8 state)
|
||||||
{
|
{
|
||||||
if (state <= BR_STATE_BLOCKING)
|
if (state <= BR_STATE_BLOCKING)
|
||||||
print_string(PRINT_ANY, "state",
|
fprintf(f, "state %s ", port_states[state]);
|
||||||
"state %s ", stp_states[state]);
|
|
||||||
else
|
else
|
||||||
print_uint(PRINT_ANY, "state",
|
fprintf(f, "state (%d) ", state);
|
||||||
"state (%d) ", state);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int parse_stp_state(const char *arg)
|
static void print_onoff(FILE *f, char *flag, __u8 val)
|
||||||
{
|
{
|
||||||
size_t nstates = ARRAY_SIZE(stp_states);
|
fprintf(f, "%s %s ", flag, val ? "on" : "off");
|
||||||
int state;
|
|
||||||
|
|
||||||
for (state = 0; state < nstates; state++)
|
|
||||||
if (strcmp(stp_states[state], arg) == 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (state == nstates)
|
|
||||||
state = -1;
|
|
||||||
|
|
||||||
return state;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_hwmode(__u16 mode)
|
static void print_hwmode(FILE *f, __u16 mode)
|
||||||
{
|
{
|
||||||
if (mode >= ARRAY_SIZE(hw_mode))
|
if (mode >= ARRAY_SIZE(hw_mode))
|
||||||
print_0xhex(PRINT_ANY, "hwmode",
|
fprintf(f, "hwmode %#hx ", mode);
|
||||||
"hwmode %#llx ", mode);
|
|
||||||
else
|
else
|
||||||
print_string(PRINT_ANY, "hwmode",
|
fprintf(f, "hwmode %s ", hw_mode[mode]);
|
||||||
"hwmode %s ", hw_mode[mode]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_protinfo(FILE *fp, struct rtattr *attr)
|
int print_linkinfo(const struct sockaddr_nl *who,
|
||||||
{
|
struct nlmsghdr *n, void *arg)
|
||||||
if (attr->rta_type & NLA_F_NESTED) {
|
|
||||||
struct rtattr *prtb[IFLA_BRPORT_MAX + 1];
|
|
||||||
|
|
||||||
parse_rtattr_nested(prtb, IFLA_BRPORT_MAX, attr);
|
|
||||||
|
|
||||||
if (prtb[IFLA_BRPORT_STATE])
|
|
||||||
print_stp_state(rta_getattr_u8(prtb[IFLA_BRPORT_STATE]));
|
|
||||||
|
|
||||||
if (prtb[IFLA_BRPORT_PRIORITY])
|
|
||||||
print_uint(PRINT_ANY, "priority",
|
|
||||||
"priority %u ",
|
|
||||||
rta_getattr_u16(prtb[IFLA_BRPORT_PRIORITY]));
|
|
||||||
|
|
||||||
if (prtb[IFLA_BRPORT_COST])
|
|
||||||
print_uint(PRINT_ANY, "cost",
|
|
||||||
"cost %u ",
|
|
||||||
rta_getattr_u32(prtb[IFLA_BRPORT_COST]));
|
|
||||||
|
|
||||||
if (!show_details)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (!is_json_context())
|
|
||||||
fprintf(fp, "%s ", _SL_);
|
|
||||||
|
|
||||||
if (prtb[IFLA_BRPORT_MODE])
|
|
||||||
print_on_off(PRINT_ANY, "hairpin", "hairpin %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_MODE]));
|
|
||||||
if (prtb[IFLA_BRPORT_GUARD])
|
|
||||||
print_on_off(PRINT_ANY, "guard", "guard %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_GUARD]));
|
|
||||||
if (prtb[IFLA_BRPORT_PROTECT])
|
|
||||||
print_on_off(PRINT_ANY, "root_block", "root_block %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT]));
|
|
||||||
if (prtb[IFLA_BRPORT_FAST_LEAVE])
|
|
||||||
print_on_off(PRINT_ANY, "fastleave", "fastleave %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE]));
|
|
||||||
if (prtb[IFLA_BRPORT_LEARNING])
|
|
||||||
print_on_off(PRINT_ANY, "learning", "learning %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING]));
|
|
||||||
if (prtb[IFLA_BRPORT_LEARNING_SYNC])
|
|
||||||
print_on_off(PRINT_ANY, "learning_sync", "learning_sync %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC]));
|
|
||||||
if (prtb[IFLA_BRPORT_UNICAST_FLOOD])
|
|
||||||
print_on_off(PRINT_ANY, "flood", "flood %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD]));
|
|
||||||
if (prtb[IFLA_BRPORT_MCAST_FLOOD])
|
|
||||||
print_on_off(PRINT_ANY, "mcast_flood", "mcast_flood %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_FLOOD]));
|
|
||||||
if (prtb[IFLA_BRPORT_MCAST_TO_UCAST])
|
|
||||||
print_on_off(PRINT_ANY, "mcast_to_unicast", "mcast_to_unicast %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_TO_UCAST]));
|
|
||||||
if (prtb[IFLA_BRPORT_NEIGH_SUPPRESS])
|
|
||||||
print_on_off(PRINT_ANY, "neigh_suppress", "neigh_suppress %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_NEIGH_SUPPRESS]));
|
|
||||||
if (prtb[IFLA_BRPORT_VLAN_TUNNEL])
|
|
||||||
print_on_off(PRINT_ANY, "vlan_tunnel", "vlan_tunnel %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_VLAN_TUNNEL]));
|
|
||||||
|
|
||||||
if (prtb[IFLA_BRPORT_BACKUP_PORT]) {
|
|
||||||
int ifidx;
|
|
||||||
|
|
||||||
ifidx = rta_getattr_u32(prtb[IFLA_BRPORT_BACKUP_PORT]);
|
|
||||||
print_string(PRINT_ANY,
|
|
||||||
"backup_port", "backup_port %s ",
|
|
||||||
ll_index_to_name(ifidx));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (prtb[IFLA_BRPORT_ISOLATED])
|
|
||||||
print_on_off(PRINT_ANY, "isolated", "isolated %s ",
|
|
||||||
rta_getattr_u8(prtb[IFLA_BRPORT_ISOLATED]));
|
|
||||||
} else
|
|
||||||
print_stp_state(rta_getattr_u8(attr));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is reported by HW devices that have some bridging
|
|
||||||
* capabilities.
|
|
||||||
*/
|
|
||||||
static void print_af_spec(struct rtattr *attr, int ifindex)
|
|
||||||
{
|
|
||||||
struct rtattr *aftb[IFLA_BRIDGE_MAX+1];
|
|
||||||
|
|
||||||
parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, attr);
|
|
||||||
|
|
||||||
if (aftb[IFLA_BRIDGE_MODE])
|
|
||||||
print_hwmode(rta_getattr_u16(aftb[IFLA_BRIDGE_MODE]));
|
|
||||||
|
|
||||||
if (!show_details)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (aftb[IFLA_BRIDGE_VLAN_INFO])
|
|
||||||
print_vlan_info(aftb[IFLA_BRIDGE_VLAN_INFO], ifindex);
|
|
||||||
}
|
|
||||||
|
|
||||||
int print_linkinfo(struct nlmsghdr *n, void *arg)
|
|
||||||
{
|
{
|
||||||
FILE *fp = arg;
|
FILE *fp = arg;
|
||||||
|
int len = n->nlmsg_len;
|
||||||
struct ifinfomsg *ifi = NLMSG_DATA(n);
|
struct ifinfomsg *ifi = NLMSG_DATA(n);
|
||||||
struct rtattr *tb[IFLA_MAX+1];
|
struct rtattr *tb[IFLA_MAX+1];
|
||||||
unsigned int m_flag = 0;
|
char b1[IFNAMSIZ];
|
||||||
int len = n->nlmsg_len;
|
|
||||||
const char *name;
|
|
||||||
|
|
||||||
len -= NLMSG_LENGTH(sizeof(*ifi));
|
len -= NLMSG_LENGTH(sizeof(*ifi));
|
||||||
if (len < 0) {
|
if (len < 0) {
|
||||||
|
|
@ -223,65 +119,140 @@ int print_linkinfo(struct nlmsghdr *n, void *arg)
|
||||||
|
|
||||||
parse_rtattr_flags(tb, IFLA_MAX, IFLA_RTA(ifi), len, NLA_F_NESTED);
|
parse_rtattr_flags(tb, IFLA_MAX, IFLA_RTA(ifi), len, NLA_F_NESTED);
|
||||||
|
|
||||||
name = get_ifname_rta(ifi->ifi_index, tb[IFLA_IFNAME]);
|
if (tb[IFLA_IFNAME] == NULL) {
|
||||||
if (!name)
|
fprintf(stderr, "BUG: nil ifname\n");
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
open_json_object(NULL);
|
|
||||||
if (n->nlmsg_type == RTM_DELLINK)
|
|
||||||
print_bool(PRINT_ANY, "deleted", "Deleted ", true);
|
|
||||||
|
|
||||||
print_int(PRINT_ANY, "ifindex", "%d: ", ifi->ifi_index);
|
|
||||||
m_flag = print_name_and_link("%s: ", name, tb);
|
|
||||||
print_link_flags(fp, ifi->ifi_flags, m_flag);
|
|
||||||
|
|
||||||
if (tb[IFLA_MTU])
|
|
||||||
print_int(PRINT_ANY,
|
|
||||||
"mtu", "mtu %u ",
|
|
||||||
rta_getattr_u32(tb[IFLA_MTU]));
|
|
||||||
|
|
||||||
if (tb[IFLA_MASTER]) {
|
|
||||||
int master = rta_getattr_u32(tb[IFLA_MASTER]);
|
|
||||||
|
|
||||||
print_string(PRINT_ANY, "master", "master %s ",
|
|
||||||
ll_index_to_name(master));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tb[IFLA_PROTINFO])
|
if (n->nlmsg_type == RTM_DELLINK)
|
||||||
print_protinfo(fp, tb[IFLA_PROTINFO]);
|
fprintf(fp, "Deleted ");
|
||||||
|
|
||||||
if (tb[IFLA_AF_SPEC])
|
fprintf(fp, "%d: %s ", ifi->ifi_index,
|
||||||
print_af_spec(tb[IFLA_AF_SPEC], ifi->ifi_index);
|
tb[IFLA_IFNAME] ? rta_getattr_str(tb[IFLA_IFNAME]) : "<nil>");
|
||||||
|
|
||||||
print_string(PRINT_FP, NULL, "%s", "\n");
|
if (tb[IFLA_OPERSTATE])
|
||||||
close_json_object();
|
print_operstate(fp, rta_getattr_u8(tb[IFLA_OPERSTATE]));
|
||||||
|
|
||||||
|
if (tb[IFLA_LINK]) {
|
||||||
|
SPRINT_BUF(b1);
|
||||||
|
int iflink = rta_getattr_u32(tb[IFLA_LINK]);
|
||||||
|
|
||||||
|
if (iflink == 0)
|
||||||
|
fprintf(fp, "@NONE: ");
|
||||||
|
else
|
||||||
|
fprintf(fp, "@%s: ",
|
||||||
|
if_indextoname(iflink, b1));
|
||||||
|
} else
|
||||||
|
fprintf(fp, ": ");
|
||||||
|
|
||||||
|
print_link_flags(fp, ifi->ifi_flags);
|
||||||
|
|
||||||
|
if (tb[IFLA_MTU])
|
||||||
|
fprintf(fp, "mtu %u ", rta_getattr_u32(tb[IFLA_MTU]));
|
||||||
|
|
||||||
|
if (tb[IFLA_MASTER])
|
||||||
|
fprintf(fp, "master %s ",
|
||||||
|
if_indextoname(rta_getattr_u32(tb[IFLA_MASTER]), b1));
|
||||||
|
|
||||||
|
if (tb[IFLA_PROTINFO]) {
|
||||||
|
if (tb[IFLA_PROTINFO]->rta_type & NLA_F_NESTED) {
|
||||||
|
struct rtattr *prtb[IFLA_BRPORT_MAX+1];
|
||||||
|
|
||||||
|
parse_rtattr_nested(prtb, IFLA_BRPORT_MAX,
|
||||||
|
tb[IFLA_PROTINFO]);
|
||||||
|
|
||||||
|
if (prtb[IFLA_BRPORT_STATE])
|
||||||
|
print_portstate(fp,
|
||||||
|
rta_getattr_u8(prtb[IFLA_BRPORT_STATE]));
|
||||||
|
if (prtb[IFLA_BRPORT_PRIORITY])
|
||||||
|
fprintf(fp, "priority %hu ",
|
||||||
|
rta_getattr_u16(prtb[IFLA_BRPORT_PRIORITY]));
|
||||||
|
if (prtb[IFLA_BRPORT_COST])
|
||||||
|
fprintf(fp, "cost %u ",
|
||||||
|
rta_getattr_u32(prtb[IFLA_BRPORT_COST]));
|
||||||
|
|
||||||
|
if (show_details) {
|
||||||
|
fprintf(fp, "%s ", _SL_);
|
||||||
|
|
||||||
|
if (prtb[IFLA_BRPORT_MODE])
|
||||||
|
print_onoff(fp, "hairpin",
|
||||||
|
rta_getattr_u8(prtb[IFLA_BRPORT_MODE]));
|
||||||
|
if (prtb[IFLA_BRPORT_GUARD])
|
||||||
|
print_onoff(fp, "guard",
|
||||||
|
rta_getattr_u8(prtb[IFLA_BRPORT_GUARD]));
|
||||||
|
if (prtb[IFLA_BRPORT_PROTECT])
|
||||||
|
print_onoff(fp, "root_block",
|
||||||
|
rta_getattr_u8(prtb[IFLA_BRPORT_PROTECT]));
|
||||||
|
if (prtb[IFLA_BRPORT_FAST_LEAVE])
|
||||||
|
print_onoff(fp, "fastleave",
|
||||||
|
rta_getattr_u8(prtb[IFLA_BRPORT_FAST_LEAVE]));
|
||||||
|
if (prtb[IFLA_BRPORT_LEARNING])
|
||||||
|
print_onoff(fp, "learning",
|
||||||
|
rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING]));
|
||||||
|
if (prtb[IFLA_BRPORT_LEARNING_SYNC])
|
||||||
|
print_onoff(fp, "learning_sync",
|
||||||
|
rta_getattr_u8(prtb[IFLA_BRPORT_LEARNING_SYNC]));
|
||||||
|
if (prtb[IFLA_BRPORT_UNICAST_FLOOD])
|
||||||
|
print_onoff(fp, "flood",
|
||||||
|
rta_getattr_u8(prtb[IFLA_BRPORT_UNICAST_FLOOD]));
|
||||||
|
if (prtb[IFLA_BRPORT_MCAST_FLOOD])
|
||||||
|
print_onoff(fp, "mcast_flood",
|
||||||
|
rta_getattr_u8(prtb[IFLA_BRPORT_MCAST_FLOOD]));
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
print_portstate(fp, rta_getattr_u8(tb[IFLA_PROTINFO]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tb[IFLA_AF_SPEC]) {
|
||||||
|
/* This is reported by HW devices that have some bridging
|
||||||
|
* capabilities.
|
||||||
|
*/
|
||||||
|
struct rtattr *aftb[IFLA_BRIDGE_MAX+1];
|
||||||
|
|
||||||
|
parse_rtattr_nested(aftb, IFLA_BRIDGE_MAX, tb[IFLA_AF_SPEC]);
|
||||||
|
|
||||||
|
if (aftb[IFLA_BRIDGE_MODE])
|
||||||
|
print_hwmode(fp, rta_getattr_u16(aftb[IFLA_BRIDGE_MODE]));
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(fp, "\n");
|
||||||
fflush(fp);
|
fflush(fp);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void usage(void)
|
static void usage(void)
|
||||||
{
|
{
|
||||||
fprintf(stderr,
|
fprintf(stderr, "Usage: bridge link set dev DEV [ cost COST ] [ priority PRIO ] [ state STATE ]\n");
|
||||||
"Usage: bridge link set dev DEV [ cost COST ] [ priority PRIO ] [ state STATE ]\n"
|
fprintf(stderr, " [ guard {on | off} ]\n");
|
||||||
" [ guard {on | off} ]\n"
|
fprintf(stderr, " [ hairpin {on | off} ]\n");
|
||||||
" [ hairpin {on | off} ]\n"
|
fprintf(stderr, " [ fastleave {on | off} ]\n");
|
||||||
" [ fastleave {on | off} ]\n"
|
fprintf(stderr, " [ root_block {on | off} ]\n");
|
||||||
" [ root_block {on | off} ]\n"
|
fprintf(stderr, " [ learning {on | off} ]\n");
|
||||||
" [ learning {on | off} ]\n"
|
fprintf(stderr, " [ learning_sync {on | off} ]\n");
|
||||||
" [ learning_sync {on | off} ]\n"
|
fprintf(stderr, " [ flood {on | off} ]\n");
|
||||||
" [ flood {on | off} ]\n"
|
fprintf(stderr, " [ mcast_flood {on | off} ]\n");
|
||||||
" [ mcast_flood {on | off} ]\n"
|
fprintf(stderr, " [ hwmode {vepa | veb} ]\n");
|
||||||
" [ mcast_to_unicast {on | off} ]\n"
|
fprintf(stderr, " [ self ] [ master ]\n");
|
||||||
" [ neigh_suppress {on | off} ]\n"
|
fprintf(stderr, " bridge link show [dev DEV]\n");
|
||||||
" [ vlan_tunnel {on | off} ]\n"
|
|
||||||
" [ isolated {on | off} ]\n"
|
|
||||||
" [ hwmode {vepa | veb} ]\n"
|
|
||||||
" [ backup_port DEVICE ] [ nobackup_port ]\n"
|
|
||||||
" [ self ] [ master ]\n"
|
|
||||||
" bridge link show [dev DEV]\n");
|
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool on_off(char *arg, __s8 *attr, char *val)
|
||||||
|
{
|
||||||
|
if (strcmp(val, "on") == 0)
|
||||||
|
*attr = 1;
|
||||||
|
else if (strcmp(val, "off") == 0)
|
||||||
|
*attr = 0;
|
||||||
|
else {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Error: argument of \"%s\" must be \"on\" or \"off\"\n",
|
||||||
|
arg);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static int brlink_modify(int argc, char **argv)
|
static int brlink_modify(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct {
|
struct {
|
||||||
|
|
@ -295,15 +266,10 @@ static int brlink_modify(int argc, char **argv)
|
||||||
.ifm.ifi_family = PF_BRIDGE,
|
.ifm.ifi_family = PF_BRIDGE,
|
||||||
};
|
};
|
||||||
char *d = NULL;
|
char *d = NULL;
|
||||||
int backup_port_idx = -1;
|
|
||||||
__s8 neigh_suppress = -1;
|
|
||||||
__s8 learning = -1;
|
__s8 learning = -1;
|
||||||
__s8 learning_sync = -1;
|
__s8 learning_sync = -1;
|
||||||
__s8 flood = -1;
|
__s8 flood = -1;
|
||||||
__s8 vlan_tunnel = -1;
|
|
||||||
__s8 mcast_flood = -1;
|
__s8 mcast_flood = -1;
|
||||||
__s8 mcast_to_unicast = -1;
|
|
||||||
__s8 isolated = -1;
|
|
||||||
__s8 hairpin = -1;
|
__s8 hairpin = -1;
|
||||||
__s8 bpdu_guard = -1;
|
__s8 bpdu_guard = -1;
|
||||||
__s8 fast_leave = -1;
|
__s8 fast_leave = -1;
|
||||||
|
|
@ -314,7 +280,6 @@ static int brlink_modify(int argc, char **argv)
|
||||||
__s16 mode = -1;
|
__s16 mode = -1;
|
||||||
__u16 flags = 0;
|
__u16 flags = 0;
|
||||||
struct rtattr *nest;
|
struct rtattr *nest;
|
||||||
int ret;
|
|
||||||
|
|
||||||
while (argc > 0) {
|
while (argc > 0) {
|
||||||
if (strcmp(*argv, "dev") == 0) {
|
if (strcmp(*argv, "dev") == 0) {
|
||||||
|
|
@ -322,49 +287,36 @@ static int brlink_modify(int argc, char **argv)
|
||||||
d = *argv;
|
d = *argv;
|
||||||
} else if (strcmp(*argv, "guard") == 0) {
|
} else if (strcmp(*argv, "guard") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
bpdu_guard = parse_on_off("guard", *argv, &ret);
|
if (!on_off("guard", &bpdu_guard, *argv))
|
||||||
if (ret)
|
return -1;
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "hairpin") == 0) {
|
} else if (strcmp(*argv, "hairpin") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
hairpin = parse_on_off("hairpin", *argv, &ret);
|
if (!on_off("hairping", &hairpin, *argv))
|
||||||
if (ret)
|
return -1;
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "fastleave") == 0) {
|
} else if (strcmp(*argv, "fastleave") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
fast_leave = parse_on_off("fastleave", *argv, &ret);
|
if (!on_off("fastleave", &fast_leave, *argv))
|
||||||
if (ret)
|
return -1;
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "root_block") == 0) {
|
} else if (strcmp(*argv, "root_block") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
root_block = parse_on_off("root_block", *argv, &ret);
|
if (!on_off("root_block", &root_block, *argv))
|
||||||
if (ret)
|
return -1;
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "learning") == 0) {
|
} else if (strcmp(*argv, "learning") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
learning = parse_on_off("learning", *argv, &ret);
|
if (!on_off("learning", &learning, *argv))
|
||||||
if (ret)
|
return -1;
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "learning_sync") == 0) {
|
} else if (strcmp(*argv, "learning_sync") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
learning_sync = parse_on_off("learning_sync", *argv, &ret);
|
if (!on_off("learning_sync", &learning_sync, *argv))
|
||||||
if (ret)
|
return -1;
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "flood") == 0) {
|
} else if (strcmp(*argv, "flood") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
flood = parse_on_off("flood", *argv, &ret);
|
if (!on_off("flood", &flood, *argv))
|
||||||
if (ret)
|
return -1;
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "mcast_flood") == 0) {
|
} else if (strcmp(*argv, "mcast_flood") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
mcast_flood = parse_on_off("mcast_flood", *argv, &ret);
|
if (!on_off("mcast_flood", &mcast_flood, *argv))
|
||||||
if (ret)
|
return -1;
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "mcast_to_unicast") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
mcast_to_unicast = parse_on_off("mcast_to_unicast", *argv, &ret);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "cost") == 0) {
|
} else if (strcmp(*argv, "cost") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
cost = atoi(*argv);
|
cost = atoi(*argv);
|
||||||
|
|
@ -374,11 +326,14 @@ static int brlink_modify(int argc, char **argv)
|
||||||
} else if (strcmp(*argv, "state") == 0) {
|
} else if (strcmp(*argv, "state") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
char *endptr;
|
char *endptr;
|
||||||
|
size_t nstates = ARRAY_SIZE(port_states);
|
||||||
|
|
||||||
state = strtol(*argv, &endptr, 10);
|
state = strtol(*argv, &endptr, 10);
|
||||||
if (!(**argv != '\0' && *endptr == '\0')) {
|
if (!(**argv != '\0' && *endptr == '\0')) {
|
||||||
state = parse_stp_state(*argv);
|
for (state = 0; state < nstates; state++)
|
||||||
if (state == -1) {
|
if (strcmp(port_states[state], *argv) == 0)
|
||||||
|
break;
|
||||||
|
if (state == nstates) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Error: invalid STP port state\n");
|
"Error: invalid STP port state\n");
|
||||||
return -1;
|
return -1;
|
||||||
|
|
@ -400,31 +355,6 @@ static int brlink_modify(int argc, char **argv)
|
||||||
flags |= BRIDGE_FLAGS_SELF;
|
flags |= BRIDGE_FLAGS_SELF;
|
||||||
} else if (strcmp(*argv, "master") == 0) {
|
} else if (strcmp(*argv, "master") == 0) {
|
||||||
flags |= BRIDGE_FLAGS_MASTER;
|
flags |= BRIDGE_FLAGS_MASTER;
|
||||||
} else if (strcmp(*argv, "neigh_suppress") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
neigh_suppress = parse_on_off("neigh_suppress", *argv, &ret);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "vlan_tunnel") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
vlan_tunnel = parse_on_off("vlan_tunnel", *argv, &ret);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "isolated") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
isolated = parse_on_off("isolated", *argv, &ret);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
} else if (strcmp(*argv, "backup_port") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
backup_port_idx = ll_name_to_index(*argv);
|
|
||||||
if (!backup_port_idx) {
|
|
||||||
fprintf(stderr, "Error: device %s does not exist\n",
|
|
||||||
*argv);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
} else if (strcmp(*argv, "nobackup_port") == 0) {
|
|
||||||
backup_port_idx = 0;
|
|
||||||
} else {
|
} else {
|
||||||
usage();
|
usage();
|
||||||
}
|
}
|
||||||
|
|
@ -462,9 +392,6 @@ static int brlink_modify(int argc, char **argv)
|
||||||
if (mcast_flood >= 0)
|
if (mcast_flood >= 0)
|
||||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_MCAST_FLOOD,
|
addattr8(&req.n, sizeof(req), IFLA_BRPORT_MCAST_FLOOD,
|
||||||
mcast_flood);
|
mcast_flood);
|
||||||
if (mcast_to_unicast >= 0)
|
|
||||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_MCAST_TO_UCAST,
|
|
||||||
mcast_to_unicast);
|
|
||||||
if (learning >= 0)
|
if (learning >= 0)
|
||||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_LEARNING, learning);
|
addattr8(&req.n, sizeof(req), IFLA_BRPORT_LEARNING, learning);
|
||||||
if (learning_sync >= 0)
|
if (learning_sync >= 0)
|
||||||
|
|
@ -480,19 +407,6 @@ static int brlink_modify(int argc, char **argv)
|
||||||
if (state >= 0)
|
if (state >= 0)
|
||||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_STATE, state);
|
addattr8(&req.n, sizeof(req), IFLA_BRPORT_STATE, state);
|
||||||
|
|
||||||
if (neigh_suppress != -1)
|
|
||||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_NEIGH_SUPPRESS,
|
|
||||||
neigh_suppress);
|
|
||||||
if (vlan_tunnel != -1)
|
|
||||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_VLAN_TUNNEL,
|
|
||||||
vlan_tunnel);
|
|
||||||
if (isolated != -1)
|
|
||||||
addattr8(&req.n, sizeof(req), IFLA_BRPORT_ISOLATED, isolated);
|
|
||||||
|
|
||||||
if (backup_port_idx != -1)
|
|
||||||
addattr32(&req.n, sizeof(req), IFLA_BRPORT_BACKUP_PORT,
|
|
||||||
backup_port_idx);
|
|
||||||
|
|
||||||
addattr_nest_end(&req.n, nest);
|
addattr_nest_end(&req.n, nest);
|
||||||
|
|
||||||
/* IFLA_AF_SPEC nested attribute. Contains IFLA_BRIDGE_FLAGS that
|
/* IFLA_AF_SPEC nested attribute. Contains IFLA_BRIDGE_FLAGS that
|
||||||
|
|
@ -512,7 +426,7 @@ static int brlink_modify(int argc, char **argv)
|
||||||
addattr_nest_end(&req.n, nest);
|
addattr_nest_end(&req.n, nest);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rtnl_talk(&rth, &req.n, NULL) < 0)
|
if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -533,34 +447,22 @@ static int brlink_show(int argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (filter_dev) {
|
if (filter_dev) {
|
||||||
filter_index = ll_name_to_index(filter_dev);
|
if ((filter_index = ll_name_to_index(filter_dev)) == 0) {
|
||||||
if (!filter_index)
|
fprintf(stderr, "Cannot find device \"%s\"\n",
|
||||||
return nodev(filter_dev);
|
filter_dev);
|
||||||
}
|
return -1;
|
||||||
|
|
||||||
if (show_details) {
|
|
||||||
if (rtnl_linkdump_req_filter(&rth, PF_BRIDGE,
|
|
||||||
(compress_vlans ?
|
|
||||||
RTEXT_FILTER_BRVLAN_COMPRESSED :
|
|
||||||
RTEXT_FILTER_BRVLAN)) < 0) {
|
|
||||||
perror("Cannon send dump request");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (rtnl_linkdump_req(&rth, PF_BRIDGE) < 0) {
|
|
||||||
perror("Cannon send dump request");
|
|
||||||
exit(1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
new_json_obj(json);
|
if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETLINK) < 0) {
|
||||||
|
perror("Cannon send dump request");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
if (rtnl_dump_filter(&rth, print_linkinfo, stdout) < 0) {
|
if (rtnl_dump_filter(&rth, print_linkinfo, stdout) < 0) {
|
||||||
fprintf(stderr, "Dump terminated\n");
|
fprintf(stderr, "Dump terminated\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
delete_json_obj();
|
|
||||||
fflush(stdout);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
469
bridge/mdb.c
469
bridge/mdb.c
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
/*
|
/*
|
||||||
* Get mdb table with netlink
|
* Get mdb table with netlink
|
||||||
*/
|
*/
|
||||||
|
|
@ -16,10 +15,9 @@
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
|
|
||||||
#include "libnetlink.h"
|
#include "libnetlink.h"
|
||||||
#include "utils.h"
|
|
||||||
#include "br_common.h"
|
#include "br_common.h"
|
||||||
#include "rt_names.h"
|
#include "rt_names.h"
|
||||||
#include "json_print.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#ifndef MDBA_RTA
|
#ifndef MDBA_RTA
|
||||||
#define MDBA_RTA(r) \
|
#define MDBA_RTA(r) \
|
||||||
|
|
@ -30,9 +28,8 @@ static unsigned int filter_index, filter_vlan;
|
||||||
|
|
||||||
static void usage(void)
|
static void usage(void)
|
||||||
{
|
{
|
||||||
fprintf(stderr,
|
fprintf(stderr, "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [permanent | temp] [vid VID]\n");
|
||||||
"Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [src SOURCE] [permanent | temp] [vid VID]\n"
|
fprintf(stderr, " bridge mdb {show} [ dev DEV ] [ vid VID ]\n");
|
||||||
" bridge mdb {show} [ dev DEV ] [ vid VID ]\n");
|
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -41,213 +38,81 @@ static bool is_temp_mcast_rtr(__u8 type)
|
||||||
return type == MDB_RTR_TYPE_TEMP_QUERY || type == MDB_RTR_TYPE_TEMP;
|
return type == MDB_RTR_TYPE_TEMP_QUERY || type == MDB_RTR_TYPE_TEMP;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *format_timer(__u32 ticks, int align)
|
static void __print_router_port_stats(FILE *f, struct rtattr *pattr)
|
||||||
{
|
|
||||||
struct timeval tv;
|
|
||||||
static char tbuf[32];
|
|
||||||
|
|
||||||
__jiffies_to_tv(&tv, ticks);
|
|
||||||
if (align)
|
|
||||||
snprintf(tbuf, sizeof(tbuf), "%4lu.%.2lu",
|
|
||||||
(unsigned long)tv.tv_sec,
|
|
||||||
(unsigned long)tv.tv_usec / 10000);
|
|
||||||
else
|
|
||||||
snprintf(tbuf, sizeof(tbuf), "%lu.%.2lu",
|
|
||||||
(unsigned long)tv.tv_sec,
|
|
||||||
(unsigned long)tv.tv_usec / 10000);
|
|
||||||
|
|
||||||
return tbuf;
|
|
||||||
}
|
|
||||||
|
|
||||||
void br_print_router_port_stats(struct rtattr *pattr)
|
|
||||||
{
|
{
|
||||||
struct rtattr *tb[MDBA_ROUTER_PATTR_MAX + 1];
|
struct rtattr *tb[MDBA_ROUTER_PATTR_MAX + 1];
|
||||||
|
struct timeval tv;
|
||||||
|
__u8 type;
|
||||||
|
|
||||||
parse_rtattr(tb, MDBA_ROUTER_PATTR_MAX, MDB_RTR_RTA(RTA_DATA(pattr)),
|
parse_rtattr(tb, MDBA_ROUTER_PATTR_MAX, MDB_RTR_RTA(RTA_DATA(pattr)),
|
||||||
RTA_PAYLOAD(pattr) - RTA_ALIGN(sizeof(uint32_t)));
|
RTA_PAYLOAD(pattr) - RTA_ALIGN(sizeof(uint32_t)));
|
||||||
|
|
||||||
if (tb[MDBA_ROUTER_PATTR_TIMER]) {
|
if (tb[MDBA_ROUTER_PATTR_TIMER]) {
|
||||||
__u32 timer = rta_getattr_u32(tb[MDBA_ROUTER_PATTR_TIMER]);
|
__jiffies_to_tv(&tv,
|
||||||
|
rta_getattr_u32(tb[MDBA_ROUTER_PATTR_TIMER]));
|
||||||
print_string(PRINT_ANY, "timer", " %s",
|
fprintf(f, " %4i.%.2i",
|
||||||
format_timer(timer, 1));
|
(int)tv.tv_sec, (int)tv.tv_usec/10000);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tb[MDBA_ROUTER_PATTR_TYPE]) {
|
if (tb[MDBA_ROUTER_PATTR_TYPE]) {
|
||||||
__u8 type = rta_getattr_u8(tb[MDBA_ROUTER_PATTR_TYPE]);
|
type = rta_getattr_u8(tb[MDBA_ROUTER_PATTR_TYPE]);
|
||||||
|
fprintf(f, " %s",
|
||||||
print_string(PRINT_ANY, "type", " %s",
|
is_temp_mcast_rtr(type) ? "temp" : "permanent");
|
||||||
is_temp_mcast_rtr(type) ? "temp" : "permanent");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void br_print_router_ports(FILE *f, struct rtattr *attr,
|
static void br_print_router_ports(FILE *f, struct rtattr *attr, __u32 brifidx)
|
||||||
const char *brifname)
|
|
||||||
{
|
{
|
||||||
int rem = RTA_PAYLOAD(attr);
|
uint32_t *port_ifindex;
|
||||||
struct rtattr *i;
|
struct rtattr *i;
|
||||||
|
int rem;
|
||||||
if (is_json_context())
|
|
||||||
open_json_array(PRINT_JSON, brifname);
|
|
||||||
else if (!show_stats)
|
|
||||||
fprintf(f, "router ports on %s: ", brifname);
|
|
||||||
|
|
||||||
for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
|
|
||||||
uint32_t *port_ifindex = RTA_DATA(i);
|
|
||||||
const char *port_ifname = ll_index_to_name(*port_ifindex);
|
|
||||||
|
|
||||||
if (is_json_context()) {
|
|
||||||
open_json_object(NULL);
|
|
||||||
print_string(PRINT_JSON, "port", NULL, port_ifname);
|
|
||||||
|
|
||||||
if (show_stats)
|
|
||||||
br_print_router_port_stats(i);
|
|
||||||
close_json_object();
|
|
||||||
} else if (show_stats) {
|
|
||||||
fprintf(f, "router ports on %s: %s",
|
|
||||||
brifname, port_ifname);
|
|
||||||
|
|
||||||
br_print_router_port_stats(i);
|
|
||||||
fprintf(f, "\n");
|
|
||||||
} else {
|
|
||||||
fprintf(f, "%s ", port_ifname);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!show_stats)
|
if (!show_stats)
|
||||||
print_nl();
|
fprintf(f, "router ports on %s: ", ll_index_to_name(brifidx));
|
||||||
|
|
||||||
close_json_array(PRINT_JSON, NULL);
|
rem = RTA_PAYLOAD(attr);
|
||||||
|
for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
|
||||||
|
port_ifindex = RTA_DATA(i);
|
||||||
|
if (show_stats) {
|
||||||
|
fprintf(f, "router ports on %s: %s",
|
||||||
|
ll_index_to_name(brifidx),
|
||||||
|
ll_index_to_name(*port_ifindex));
|
||||||
|
__print_router_port_stats(f, i);
|
||||||
|
fprintf(f, "\n");
|
||||||
|
} else {
|
||||||
|
fprintf(f, "%s ", ll_index_to_name(*port_ifindex));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!show_stats)
|
||||||
|
fprintf(f, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_src_entry(struct rtattr *src_attr, int af, const char *sep)
|
static void print_mdb_entry(FILE *f, int ifindex, struct br_mdb_entry *e,
|
||||||
{
|
|
||||||
struct rtattr *stb[MDBA_MDB_SRCATTR_MAX + 1];
|
|
||||||
SPRINT_BUF(abuf);
|
|
||||||
const char *addr;
|
|
||||||
__u32 timer_val;
|
|
||||||
|
|
||||||
parse_rtattr_nested(stb, MDBA_MDB_SRCATTR_MAX, src_attr);
|
|
||||||
if (!stb[MDBA_MDB_SRCATTR_ADDRESS] || !stb[MDBA_MDB_SRCATTR_TIMER])
|
|
||||||
return;
|
|
||||||
|
|
||||||
addr = inet_ntop(af, RTA_DATA(stb[MDBA_MDB_SRCATTR_ADDRESS]), abuf,
|
|
||||||
sizeof(abuf));
|
|
||||||
if (!addr)
|
|
||||||
return;
|
|
||||||
timer_val = rta_getattr_u32(stb[MDBA_MDB_SRCATTR_TIMER]);
|
|
||||||
|
|
||||||
open_json_object(NULL);
|
|
||||||
print_string(PRINT_FP, NULL, "%s", sep);
|
|
||||||
print_color_string(PRINT_ANY, ifa_family_color(af),
|
|
||||||
"address", "%s", addr);
|
|
||||||
print_string(PRINT_ANY, "timer", "/%s", format_timer(timer_val, 0));
|
|
||||||
close_json_object();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void print_mdb_entry(FILE *f, int ifindex, const struct br_mdb_entry *e,
|
|
||||||
struct nlmsghdr *n, struct rtattr **tb)
|
struct nlmsghdr *n, struct rtattr **tb)
|
||||||
{
|
{
|
||||||
const void *grp, *src;
|
|
||||||
const char *addr;
|
|
||||||
SPRINT_BUF(abuf);
|
SPRINT_BUF(abuf);
|
||||||
const char *dev;
|
const void *src;
|
||||||
int af;
|
int af;
|
||||||
|
|
||||||
if (filter_vlan && e->vid != filter_vlan)
|
if (filter_vlan && e->vid != filter_vlan)
|
||||||
return;
|
return;
|
||||||
|
af = e->addr.proto == htons(ETH_P_IP) ? AF_INET : AF_INET6;
|
||||||
if (!e->addr.proto) {
|
src = af == AF_INET ? (const void *)&e->addr.u.ip4 :
|
||||||
af = AF_PACKET;
|
(const void *)&e->addr.u.ip6;
|
||||||
grp = &e->addr.u.mac_addr;
|
if (n->nlmsg_type == RTM_DELMDB)
|
||||||
} else if (e->addr.proto == htons(ETH_P_IP)) {
|
fprintf(f, "Deleted ");
|
||||||
af = AF_INET;
|
fprintf(f, "dev %s port %s grp %s %s %s", ll_index_to_name(ifindex),
|
||||||
grp = &e->addr.u.ip4;
|
ll_index_to_name(e->ifindex),
|
||||||
} else {
|
inet_ntop(af, src, abuf, sizeof(abuf)),
|
||||||
af = AF_INET6;
|
(e->state & MDB_PERMANENT) ? "permanent" : "temp",
|
||||||
grp = &e->addr.u.ip6;
|
(e->flags & MDB_FLAGS_OFFLOAD) ? "offload" : "");
|
||||||
}
|
|
||||||
dev = ll_index_to_name(ifindex);
|
|
||||||
|
|
||||||
open_json_object(NULL);
|
|
||||||
|
|
||||||
print_int(PRINT_JSON, "index", NULL, ifindex);
|
|
||||||
print_color_string(PRINT_ANY, COLOR_IFNAME, "dev", "dev %s", dev);
|
|
||||||
print_string(PRINT_ANY, "port", " port %s",
|
|
||||||
ll_index_to_name(e->ifindex));
|
|
||||||
|
|
||||||
/* The ETH_ALEN argument is ignored for all cases but AF_PACKET */
|
|
||||||
addr = rt_addr_n2a_r(af, ETH_ALEN, grp, abuf, sizeof(abuf));
|
|
||||||
if (!addr)
|
|
||||||
return;
|
|
||||||
|
|
||||||
print_color_string(PRINT_ANY, ifa_family_color(af),
|
|
||||||
"grp", " grp %s", addr);
|
|
||||||
|
|
||||||
if (tb && tb[MDBA_MDB_EATTR_SOURCE]) {
|
|
||||||
src = (const void *)RTA_DATA(tb[MDBA_MDB_EATTR_SOURCE]);
|
|
||||||
print_color_string(PRINT_ANY, ifa_family_color(af),
|
|
||||||
"src", " src %s",
|
|
||||||
inet_ntop(af, src, abuf, sizeof(abuf)));
|
|
||||||
}
|
|
||||||
print_string(PRINT_ANY, "state", " %s",
|
|
||||||
(e->state & MDB_PERMANENT) ? "permanent" : "temp");
|
|
||||||
if (show_details && tb) {
|
|
||||||
if (tb[MDBA_MDB_EATTR_GROUP_MODE]) {
|
|
||||||
__u8 mode = rta_getattr_u8(tb[MDBA_MDB_EATTR_GROUP_MODE]);
|
|
||||||
|
|
||||||
print_string(PRINT_ANY, "filter_mode", " filter_mode %s",
|
|
||||||
mode == MCAST_INCLUDE ? "include" :
|
|
||||||
"exclude");
|
|
||||||
}
|
|
||||||
if (tb[MDBA_MDB_EATTR_SRC_LIST]) {
|
|
||||||
struct rtattr *i, *attr = tb[MDBA_MDB_EATTR_SRC_LIST];
|
|
||||||
const char *sep = " ";
|
|
||||||
int rem;
|
|
||||||
|
|
||||||
open_json_array(PRINT_ANY, is_json_context() ?
|
|
||||||
"source_list" :
|
|
||||||
" source_list");
|
|
||||||
rem = RTA_PAYLOAD(attr);
|
|
||||||
for (i = RTA_DATA(attr); RTA_OK(i, rem);
|
|
||||||
i = RTA_NEXT(i, rem)) {
|
|
||||||
print_src_entry(i, af, sep);
|
|
||||||
sep = ",";
|
|
||||||
}
|
|
||||||
close_json_array(PRINT_JSON, NULL);
|
|
||||||
}
|
|
||||||
if (tb[MDBA_MDB_EATTR_RTPROT]) {
|
|
||||||
__u8 rtprot = rta_getattr_u8(tb[MDBA_MDB_EATTR_RTPROT]);
|
|
||||||
SPRINT_BUF(rtb);
|
|
||||||
|
|
||||||
print_string(PRINT_ANY, "protocol", " proto %s ",
|
|
||||||
rtnl_rtprot_n2a(rtprot, rtb, sizeof(rtb)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
open_json_array(PRINT_JSON, "flags");
|
|
||||||
if (e->flags & MDB_FLAGS_OFFLOAD)
|
|
||||||
print_string(PRINT_ANY, NULL, " %s", "offload");
|
|
||||||
if (e->flags & MDB_FLAGS_FAST_LEAVE)
|
|
||||||
print_string(PRINT_ANY, NULL, " %s", "fast_leave");
|
|
||||||
if (e->flags & MDB_FLAGS_STAR_EXCL)
|
|
||||||
print_string(PRINT_ANY, NULL, " %s", "added_by_star_ex");
|
|
||||||
if (e->flags & MDB_FLAGS_BLOCKED)
|
|
||||||
print_string(PRINT_ANY, NULL, " %s", "blocked");
|
|
||||||
close_json_array(PRINT_JSON, NULL);
|
|
||||||
|
|
||||||
if (e->vid)
|
if (e->vid)
|
||||||
print_uint(PRINT_ANY, "vid", " vid %u", e->vid);
|
fprintf(f, " vid %hu", e->vid);
|
||||||
|
|
||||||
if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) {
|
if (show_stats && tb && tb[MDBA_MDB_EATTR_TIMER]) {
|
||||||
__u32 timer = rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]);
|
struct timeval tv;
|
||||||
|
|
||||||
print_string(PRINT_ANY, "timer", " %s",
|
__jiffies_to_tv(&tv, rta_getattr_u32(tb[MDBA_MDB_EATTR_TIMER]));
|
||||||
format_timer(timer, 1));
|
fprintf(f, "%4i.%.2i", (int)tv.tv_sec, (int)tv.tv_usec/10000);
|
||||||
}
|
}
|
||||||
|
fprintf(f, "\n");
|
||||||
print_nl();
|
|
||||||
close_json_object();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr,
|
static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr,
|
||||||
|
|
@ -261,61 +126,21 @@ static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr,
|
||||||
rem = RTA_PAYLOAD(attr);
|
rem = RTA_PAYLOAD(attr);
|
||||||
for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
|
for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
|
||||||
e = RTA_DATA(i);
|
e = RTA_DATA(i);
|
||||||
parse_rtattr_flags(etb, MDBA_MDB_EATTR_MAX, MDB_RTA(RTA_DATA(i)),
|
parse_rtattr(etb, MDBA_MDB_EATTR_MAX, MDB_RTA(RTA_DATA(i)),
|
||||||
RTA_PAYLOAD(i) - RTA_ALIGN(sizeof(*e)),
|
RTA_PAYLOAD(i) - RTA_ALIGN(sizeof(*e)));
|
||||||
NLA_F_NESTED);
|
|
||||||
print_mdb_entry(f, ifindex, e, n, etb);
|
print_mdb_entry(f, ifindex, e, n, etb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_mdb_entries(FILE *fp, struct nlmsghdr *n,
|
int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
|
||||||
int ifindex, struct rtattr *mdb)
|
|
||||||
{
|
|
||||||
int rem = RTA_PAYLOAD(mdb);
|
|
||||||
struct rtattr *i;
|
|
||||||
|
|
||||||
for (i = RTA_DATA(mdb); RTA_OK(i, rem); i = RTA_NEXT(i, rem))
|
|
||||||
br_print_mdb_entry(fp, ifindex, i, n);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void print_router_entries(FILE *fp, struct nlmsghdr *n,
|
|
||||||
int ifindex, struct rtattr *router)
|
|
||||||
{
|
|
||||||
const char *brifname = ll_index_to_name(ifindex);
|
|
||||||
|
|
||||||
if (n->nlmsg_type == RTM_GETMDB) {
|
|
||||||
if (show_details)
|
|
||||||
br_print_router_ports(fp, router, brifname);
|
|
||||||
} else {
|
|
||||||
struct rtattr *i = RTA_DATA(router);
|
|
||||||
uint32_t *port_ifindex = RTA_DATA(i);
|
|
||||||
const char *port_name = ll_index_to_name(*port_ifindex);
|
|
||||||
|
|
||||||
if (is_json_context()) {
|
|
||||||
open_json_array(PRINT_JSON, brifname);
|
|
||||||
open_json_object(NULL);
|
|
||||||
|
|
||||||
print_string(PRINT_JSON, "port", NULL,
|
|
||||||
port_name);
|
|
||||||
close_json_object();
|
|
||||||
close_json_array(PRINT_JSON, NULL);
|
|
||||||
} else {
|
|
||||||
fprintf(fp, "router port dev %s master %s\n",
|
|
||||||
port_name, brifname);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __parse_mdb_nlmsg(struct nlmsghdr *n, struct rtattr **tb)
|
|
||||||
{
|
{
|
||||||
|
FILE *fp = arg;
|
||||||
struct br_port_msg *r = NLMSG_DATA(n);
|
struct br_port_msg *r = NLMSG_DATA(n);
|
||||||
int len = n->nlmsg_len;
|
int len = n->nlmsg_len;
|
||||||
|
struct rtattr *tb[MDBA_MAX+1], *i;
|
||||||
|
|
||||||
if (n->nlmsg_type != RTM_GETMDB &&
|
if (n->nlmsg_type != RTM_GETMDB && n->nlmsg_type != RTM_NEWMDB && n->nlmsg_type != RTM_DELMDB) {
|
||||||
n->nlmsg_type != RTM_NEWMDB &&
|
fprintf(stderr, "Not RTM_GETMDB, RTM_NEWMDB or RTM_DELMDB: %08x %08x %08x\n",
|
||||||
n->nlmsg_type != RTM_DELMDB) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"Not RTM_GETMDB, RTM_NEWMDB or RTM_DELMDB: %08x %08x %08x\n",
|
|
||||||
n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
|
n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -332,62 +157,32 @@ static int __parse_mdb_nlmsg(struct nlmsghdr *n, struct rtattr **tb)
|
||||||
|
|
||||||
parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
|
parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
|
||||||
|
|
||||||
return 1;
|
if (tb[MDBA_MDB]) {
|
||||||
}
|
int rem = RTA_PAYLOAD(tb[MDBA_MDB]);
|
||||||
|
|
||||||
static int print_mdbs(struct nlmsghdr *n, void *arg)
|
for (i = RTA_DATA(tb[MDBA_MDB]); RTA_OK(i, rem); i = RTA_NEXT(i, rem))
|
||||||
{
|
br_print_mdb_entry(fp, r->ifindex, i, n);
|
||||||
struct br_port_msg *r = NLMSG_DATA(n);
|
}
|
||||||
struct rtattr *tb[MDBA_MAX+1];
|
|
||||||
FILE *fp = arg;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = __parse_mdb_nlmsg(n, tb);
|
if (tb[MDBA_ROUTER]) {
|
||||||
if (ret != 1)
|
if (n->nlmsg_type == RTM_GETMDB) {
|
||||||
return ret;
|
if (show_details)
|
||||||
|
br_print_router_ports(fp, tb[MDBA_ROUTER],
|
||||||
|
r->ifindex);
|
||||||
|
} else {
|
||||||
|
uint32_t *port_ifindex;
|
||||||
|
|
||||||
if (tb[MDBA_MDB])
|
i = RTA_DATA(tb[MDBA_ROUTER]);
|
||||||
print_mdb_entries(fp, n, r->ifindex, tb[MDBA_MDB]);
|
port_ifindex = RTA_DATA(i);
|
||||||
|
if (n->nlmsg_type == RTM_DELMDB)
|
||||||
|
fprintf(fp, "Deleted ");
|
||||||
|
fprintf(fp, "router port dev %s master %s\n",
|
||||||
|
ll_index_to_name(*port_ifindex),
|
||||||
|
ll_index_to_name(r->ifindex));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
fflush(fp);
|
||||||
}
|
|
||||||
|
|
||||||
static int print_rtrs(struct nlmsghdr *n, void *arg)
|
|
||||||
{
|
|
||||||
struct br_port_msg *r = NLMSG_DATA(n);
|
|
||||||
struct rtattr *tb[MDBA_MAX+1];
|
|
||||||
FILE *fp = arg;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = __parse_mdb_nlmsg(n, tb);
|
|
||||||
if (ret != 1)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
if (tb[MDBA_ROUTER])
|
|
||||||
print_router_entries(fp, n, r->ifindex, tb[MDBA_ROUTER]);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int print_mdb_mon(struct nlmsghdr *n, void *arg)
|
|
||||||
{
|
|
||||||
struct br_port_msg *r = NLMSG_DATA(n);
|
|
||||||
struct rtattr *tb[MDBA_MAX+1];
|
|
||||||
FILE *fp = arg;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = __parse_mdb_nlmsg(n, tb);
|
|
||||||
if (ret != 1)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
if (n->nlmsg_type == RTM_DELMDB)
|
|
||||||
print_bool(PRINT_ANY, "deleted", "Deleted ", true);
|
|
||||||
|
|
||||||
if (tb[MDBA_MDB])
|
|
||||||
print_mdb_entries(fp, n, r->ifindex, tb[MDBA_MDB]);
|
|
||||||
|
|
||||||
if (tb[MDBA_ROUTER])
|
|
||||||
print_router_entries(fp, n, r->ifindex, tb[MDBA_ROUTER]);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -412,66 +207,27 @@ static int mdb_show(int argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (filter_dev) {
|
if (filter_dev) {
|
||||||
filter_index = ll_name_to_index(filter_dev);
|
filter_index = if_nametoindex(filter_dev);
|
||||||
if (!filter_index)
|
if (filter_index == 0) {
|
||||||
return nodev(filter_dev);
|
fprintf(stderr, "Cannot find device \"%s\"\n",
|
||||||
|
filter_dev);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
new_json_obj(json);
|
if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETMDB) < 0) {
|
||||||
open_json_object(NULL);
|
|
||||||
|
|
||||||
/* get mdb entries */
|
|
||||||
if (rtnl_mdbdump_req(&rth, PF_BRIDGE) < 0) {
|
|
||||||
perror("Cannot send dump request");
|
perror("Cannot send dump request");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
open_json_array(PRINT_JSON, "mdb");
|
if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) {
|
||||||
if (rtnl_dump_filter(&rth, print_mdbs, stdout) < 0) {
|
|
||||||
fprintf(stderr, "Dump terminated\n");
|
fprintf(stderr, "Dump terminated\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
close_json_array(PRINT_JSON, NULL);
|
|
||||||
|
|
||||||
/* get router ports */
|
|
||||||
if (rtnl_mdbdump_req(&rth, PF_BRIDGE) < 0) {
|
|
||||||
perror("Cannot send dump request");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
open_json_object("router");
|
|
||||||
if (rtnl_dump_filter(&rth, print_rtrs, stdout) < 0) {
|
|
||||||
fprintf(stderr, "Dump terminated\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
close_json_object();
|
|
||||||
|
|
||||||
close_json_object();
|
|
||||||
delete_json_obj();
|
|
||||||
fflush(stdout);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mdb_parse_grp(const char *grp, struct br_mdb_entry *e)
|
|
||||||
{
|
|
||||||
if (inet_pton(AF_INET, grp, &e->addr.u.ip4)) {
|
|
||||||
e->addr.proto = htons(ETH_P_IP);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (inet_pton(AF_INET6, grp, &e->addr.u.ip6)) {
|
|
||||||
e->addr.proto = htons(ETH_P_IPV6);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (ll_addr_a2n((char *)e->addr.u.mac_addr, sizeof(e->addr.u.mac_addr),
|
|
||||||
grp) == ETH_ALEN) {
|
|
||||||
e->addr.proto = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int mdb_modify(int cmd, int flags, int argc, char **argv)
|
static int mdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct {
|
struct {
|
||||||
|
|
@ -484,8 +240,8 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
.n.nlmsg_type = cmd,
|
.n.nlmsg_type = cmd,
|
||||||
.bpm.family = PF_BRIDGE,
|
.bpm.family = PF_BRIDGE,
|
||||||
};
|
};
|
||||||
char *d = NULL, *p = NULL, *grp = NULL, *src = NULL;
|
|
||||||
struct br_mdb_entry entry = {};
|
struct br_mdb_entry entry = {};
|
||||||
|
char *d = NULL, *p = NULL, *grp = NULL;
|
||||||
short vid = 0;
|
short vid = 0;
|
||||||
|
|
||||||
while (argc > 0) {
|
while (argc > 0) {
|
||||||
|
|
@ -506,9 +262,6 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
} else if (strcmp(*argv, "vid") == 0) {
|
} else if (strcmp(*argv, "vid") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
vid = atoi(*argv);
|
vid = atoi(*argv);
|
||||||
} else if (strcmp(*argv, "src") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
src = *argv;
|
|
||||||
} else {
|
} else {
|
||||||
if (matches(*argv, "help") == 0)
|
if (matches(*argv, "help") == 0)
|
||||||
usage();
|
usage();
|
||||||
|
|
@ -522,40 +275,30 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
req.bpm.ifindex = ll_name_to_index(d);
|
req.bpm.ifindex = ll_name_to_index(d);
|
||||||
if (!req.bpm.ifindex)
|
if (req.bpm.ifindex == 0) {
|
||||||
return nodev(d);
|
fprintf(stderr, "Cannot find device \"%s\"\n", d);
|
||||||
|
|
||||||
entry.ifindex = ll_name_to_index(p);
|
|
||||||
if (!entry.ifindex)
|
|
||||||
return nodev(p);
|
|
||||||
|
|
||||||
if (mdb_parse_grp(grp, &entry)) {
|
|
||||||
fprintf(stderr, "Invalid address \"%s\"\n", grp);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
entry.vid = vid;
|
entry.ifindex = ll_name_to_index(p);
|
||||||
addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry));
|
if (entry.ifindex == 0) {
|
||||||
if (src) {
|
fprintf(stderr, "Cannot find device \"%s\"\n", p);
|
||||||
struct rtattr *nest = addattr_nest(&req.n, sizeof(req),
|
return -1;
|
||||||
MDBA_SET_ENTRY_ATTRS);
|
|
||||||
struct in6_addr src_ip6;
|
|
||||||
__be32 src_ip4;
|
|
||||||
|
|
||||||
nest->rta_type |= NLA_F_NESTED;
|
|
||||||
if (!inet_pton(AF_INET, src, &src_ip4)) {
|
|
||||||
if (!inet_pton(AF_INET6, src, &src_ip6)) {
|
|
||||||
fprintf(stderr, "Invalid source address \"%s\"\n", src);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
addattr_l(&req.n, sizeof(req), MDBE_ATTR_SOURCE, &src_ip6, sizeof(src_ip6));
|
|
||||||
} else {
|
|
||||||
addattr32(&req.n, sizeof(req), MDBE_ATTR_SOURCE, src_ip4);
|
|
||||||
}
|
|
||||||
addattr_nest_end(&req.n, nest);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rtnl_talk(&rth, &req.n, NULL) < 0)
|
if (!inet_pton(AF_INET, grp, &entry.addr.u.ip4)) {
|
||||||
|
if (!inet_pton(AF_INET6, grp, &entry.addr.u.ip6)) {
|
||||||
|
fprintf(stderr, "Invalid address \"%s\"\n", grp);
|
||||||
|
return -1;
|
||||||
|
} else
|
||||||
|
entry.addr.proto = htons(ETH_P_IPV6);
|
||||||
|
} else
|
||||||
|
entry.addr.proto = htons(ETH_P_IP);
|
||||||
|
|
||||||
|
entry.vid = vid;
|
||||||
|
addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry));
|
||||||
|
|
||||||
|
if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -27,15 +27,16 @@
|
||||||
|
|
||||||
|
|
||||||
static void usage(void) __attribute__((noreturn));
|
static void usage(void) __attribute__((noreturn));
|
||||||
static int prefix_banner;
|
int prefix_banner;
|
||||||
|
|
||||||
static void usage(void)
|
static void usage(void)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Usage: bridge monitor [file | link | fdb | mdb | vlan | all]\n");
|
fprintf(stderr, "Usage: bridge monitor [file | link | fdb | mdb | all]\n");
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int accept_msg(struct rtnl_ctrl_data *ctrl,
|
static int accept_msg(const struct sockaddr_nl *who,
|
||||||
|
struct rtnl_ctrl_data *ctrl,
|
||||||
struct nlmsghdr *n, void *arg)
|
struct nlmsghdr *n, void *arg)
|
||||||
{
|
{
|
||||||
FILE *fp = arg;
|
FILE *fp = arg;
|
||||||
|
|
@ -49,30 +50,24 @@ static int accept_msg(struct rtnl_ctrl_data *ctrl,
|
||||||
if (prefix_banner)
|
if (prefix_banner)
|
||||||
fprintf(fp, "[LINK]");
|
fprintf(fp, "[LINK]");
|
||||||
|
|
||||||
return print_linkinfo(n, arg);
|
return print_linkinfo(who, n, arg);
|
||||||
|
|
||||||
case RTM_NEWNEIGH:
|
case RTM_NEWNEIGH:
|
||||||
case RTM_DELNEIGH:
|
case RTM_DELNEIGH:
|
||||||
if (prefix_banner)
|
if (prefix_banner)
|
||||||
fprintf(fp, "[NEIGH]");
|
fprintf(fp, "[NEIGH]");
|
||||||
return print_fdb(n, arg);
|
return print_fdb(who, n, arg);
|
||||||
|
|
||||||
case RTM_NEWMDB:
|
case RTM_NEWMDB:
|
||||||
case RTM_DELMDB:
|
case RTM_DELMDB:
|
||||||
if (prefix_banner)
|
if (prefix_banner)
|
||||||
fprintf(fp, "[MDB]");
|
fprintf(fp, "[MDB]");
|
||||||
return print_mdb_mon(n, arg);
|
return print_mdb(who, n, arg);
|
||||||
|
|
||||||
case NLMSG_TSTAMP:
|
case NLMSG_TSTAMP:
|
||||||
print_nlmsg_timestamp(fp, n);
|
print_nlmsg_timestamp(fp, n);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case RTM_NEWVLAN:
|
|
||||||
case RTM_DELVLAN:
|
|
||||||
if (prefix_banner)
|
|
||||||
fprintf(fp, "[VLAN]");
|
|
||||||
return print_vlan_rtm(n, arg, true, false);
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -85,7 +80,6 @@ int do_monitor(int argc, char **argv)
|
||||||
int llink = 0;
|
int llink = 0;
|
||||||
int lneigh = 0;
|
int lneigh = 0;
|
||||||
int lmdb = 0;
|
int lmdb = 0;
|
||||||
int lvlan = 0;
|
|
||||||
|
|
||||||
rtnl_close(&rth);
|
rtnl_close(&rth);
|
||||||
|
|
||||||
|
|
@ -102,12 +96,8 @@ int do_monitor(int argc, char **argv)
|
||||||
} else if (matches(*argv, "mdb") == 0) {
|
} else if (matches(*argv, "mdb") == 0) {
|
||||||
lmdb = 1;
|
lmdb = 1;
|
||||||
groups = 0;
|
groups = 0;
|
||||||
} else if (matches(*argv, "vlan") == 0) {
|
|
||||||
lvlan = 1;
|
|
||||||
groups = 0;
|
|
||||||
} else if (strcmp(*argv, "all") == 0) {
|
} else if (strcmp(*argv, "all") == 0) {
|
||||||
groups = ~RTMGRP_TC;
|
groups = ~RTMGRP_TC;
|
||||||
lvlan = 1;
|
|
||||||
prefix_banner = 1;
|
prefix_banner = 1;
|
||||||
} else if (matches(*argv, "help") == 0) {
|
} else if (matches(*argv, "help") == 0) {
|
||||||
usage();
|
usage();
|
||||||
|
|
@ -145,12 +135,6 @@ int do_monitor(int argc, char **argv)
|
||||||
|
|
||||||
if (rtnl_open(&rth, groups) < 0)
|
if (rtnl_open(&rth, groups) < 0)
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|
||||||
if (lvlan && rtnl_add_nl_group(&rth, RTNLGRP_BRVLAN) < 0) {
|
|
||||||
fprintf(stderr, "Failed to add bridge vlan group to list\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
ll_init_map(&rth);
|
ll_init_map(&rth);
|
||||||
|
|
||||||
if (rtnl_listen(&rth, accept_msg, stdout) < 0)
|
if (rtnl_listen(&rth, accept_msg, stdout) < 0)
|
||||||
|
|
|
||||||
1263
bridge/vlan.c
1263
bridge/vlan.c
File diff suppressed because it is too large
Load Diff
|
|
@ -1,28 +1,38 @@
|
||||||
#!/bin/sh
|
#! /bin/bash
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
|
||||||
# This is not an autoconf generated configure
|
# This is not an autoconf generated configure
|
||||||
|
#
|
||||||
INCLUDE="$PWD/include"
|
INCLUDE=${1:-"$PWD/include"}
|
||||||
PREFIX="/usr"
|
|
||||||
LIBDIR="\${prefix}/lib"
|
|
||||||
|
|
||||||
# Output file which is input to Makefile
|
|
||||||
CONFIG=config.mk
|
|
||||||
|
|
||||||
# Make a temp directory in build tree.
|
# Make a temp directory in build tree.
|
||||||
TMPDIR=$(mktemp -d config.XXXXXX)
|
TMPDIR=$(mktemp -d config.XXXXXX)
|
||||||
trap 'status=$?; rm -rf $TMPDIR; exit $status' EXIT HUP INT QUIT TERM
|
trap 'status=$?; rm -rf $TMPDIR; exit $status' EXIT HUP INT QUIT TERM
|
||||||
|
|
||||||
|
check_prog()
|
||||||
|
{
|
||||||
|
echo -n "$2"
|
||||||
|
command -v $1 >/dev/null 2>&1 && (echo "$3:=y" >> Config; echo "yes") || (echo "no"; return 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
check_docs()
|
||||||
|
{
|
||||||
|
if check_prog latex " latex: " HAVE_LATEX; then
|
||||||
|
check_prog pdflatex " pdflatex: " HAVE_PDFLATEX || echo " WARNING: no PDF docs can be built from LaTeX files"
|
||||||
|
check_prog sgml2latex " sgml2latex: " HAVE_SGML2LATEX || echo " WARNING: no LaTeX files can be build from SGML files"
|
||||||
|
else
|
||||||
|
echo " WARNING: no docs can be built from LaTeX files"
|
||||||
|
fi
|
||||||
|
|
||||||
|
check_prog sgml2html " sgml2html: " HAVE_SGML2HTML || echo " WARNING: no HTML docs can be built from SGML"
|
||||||
|
}
|
||||||
|
|
||||||
check_toolchain()
|
check_toolchain()
|
||||||
{
|
{
|
||||||
: ${PKG_CONFIG:=pkg-config}
|
: ${PKG_CONFIG:=pkg-config}
|
||||||
: ${AR=ar}
|
: ${AR=ar}
|
||||||
: ${CC=gcc}
|
: ${CC=gcc}
|
||||||
: ${YACC=bison}
|
echo "PKG_CONFIG:=${PKG_CONFIG}" >>Config
|
||||||
echo "PKG_CONFIG:=${PKG_CONFIG}" >>$CONFIG
|
echo "AR:=${AR}" >>Config
|
||||||
echo "AR:=${AR}" >>$CONFIG
|
echo "CC:=${CC}" >>Config
|
||||||
echo "CC:=${CC}" >>$CONFIG
|
|
||||||
echo "YACC:=${YACC}" >>$CONFIG
|
|
||||||
}
|
}
|
||||||
|
|
||||||
check_atm()
|
check_atm()
|
||||||
|
|
@ -36,8 +46,10 @@ int main(int argc, char **argv) {
|
||||||
}
|
}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
if $CC -I$INCLUDE -o $TMPDIR/atmtest $TMPDIR/atmtest.c -latm >/dev/null 2>&1; then
|
$CC -I$INCLUDE -o $TMPDIR/atmtest $TMPDIR/atmtest.c -latm >/dev/null 2>&1
|
||||||
echo "TC_CONFIG_ATM:=y" >>$CONFIG
|
if [ $? -eq 0 ]
|
||||||
|
then
|
||||||
|
echo "TC_CONFIG_ATM:=y" >>Config
|
||||||
echo yes
|
echo yes
|
||||||
else
|
else
|
||||||
echo no
|
echo no
|
||||||
|
|
@ -47,8 +59,9 @@ EOF
|
||||||
|
|
||||||
check_xtables()
|
check_xtables()
|
||||||
{
|
{
|
||||||
if ! ${PKG_CONFIG} xtables --exists; then
|
if ! ${PKG_CONFIG} xtables --exists
|
||||||
echo "TC_CONFIG_NO_XT:=y" >>$CONFIG
|
then
|
||||||
|
echo "TC_CONFIG_NO_XT:=y" >>Config
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -75,8 +88,9 @@ int main(int argc, char **argv)
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL \
|
if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL \
|
||||||
$(${PKG_CONFIG} xtables --cflags --libs) -ldl >/dev/null 2>&1; then
|
$(${PKG_CONFIG} xtables --cflags --libs) -ldl >/dev/null 2>&1
|
||||||
echo "TC_CONFIG_XT:=y" >>$CONFIG
|
then
|
||||||
|
echo "TC_CONFIG_XT:=y" >>Config
|
||||||
echo "using xtables"
|
echo "using xtables"
|
||||||
fi
|
fi
|
||||||
rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
|
rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
|
||||||
|
|
@ -84,10 +98,13 @@ EOF
|
||||||
|
|
||||||
check_xt_old()
|
check_xt_old()
|
||||||
{
|
{
|
||||||
# bail if previous XT checks has already succeeded.
|
# bail if previous XT checks has already succeded.
|
||||||
grep -q TC_CONFIG_XT $CONFIG && return
|
if grep -q TC_CONFIG_XT Config
|
||||||
|
then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
#check if we don't need our internal header ..
|
#check if we dont need our internal header ..
|
||||||
cat >$TMPDIR/ipttest.c <<EOF
|
cat >$TMPDIR/ipttest.c <<EOF
|
||||||
#include <xtables.h>
|
#include <xtables.h>
|
||||||
char *lib_dir;
|
char *lib_dir;
|
||||||
|
|
@ -109,8 +126,10 @@ int main(int argc, char **argv) {
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1; then
|
$CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1
|
||||||
echo "TC_CONFIG_XT_OLD:=y" >>$CONFIG
|
if [ $? -eq 0 ]
|
||||||
|
then
|
||||||
|
echo "TC_CONFIG_XT_OLD:=y" >>Config
|
||||||
echo "using old xtables (no need for xt-internal.h)"
|
echo "using old xtables (no need for xt-internal.h)"
|
||||||
fi
|
fi
|
||||||
rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
|
rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
|
||||||
|
|
@ -118,8 +137,11 @@ EOF
|
||||||
|
|
||||||
check_xt_old_internal_h()
|
check_xt_old_internal_h()
|
||||||
{
|
{
|
||||||
# bail if previous XT checks has already succeeded.
|
# bail if previous XT checks has already succeded.
|
||||||
grep -q TC_CONFIG_XT $CONFIG && return
|
if grep -q TC_CONFIG_XT Config
|
||||||
|
then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
#check if we need our own internal.h
|
#check if we need our own internal.h
|
||||||
cat >$TMPDIR/ipttest.c <<EOF
|
cat >$TMPDIR/ipttest.c <<EOF
|
||||||
|
|
@ -143,25 +165,20 @@ int main(int argc, char **argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
if $CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1; then
|
$CC -I$INCLUDE $IPTC -o $TMPDIR/ipttest $TMPDIR/ipttest.c $IPTL -ldl >/dev/null 2>&1
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]
|
||||||
|
then
|
||||||
echo "using old xtables with xt-internal.h"
|
echo "using old xtables with xt-internal.h"
|
||||||
echo "TC_CONFIG_XT_OLD_H:=y" >>$CONFIG
|
echo "TC_CONFIG_XT_OLD_H:=y" >>Config
|
||||||
fi
|
fi
|
||||||
rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
|
rm -f $TMPDIR/ipttest.c $TMPDIR/ipttest
|
||||||
}
|
}
|
||||||
|
|
||||||
check_lib_dir()
|
|
||||||
{
|
|
||||||
LIBDIR=$(echo $LIBDIR | sed "s|\${prefix}|$PREFIX|")
|
|
||||||
|
|
||||||
echo -n "lib directory: "
|
|
||||||
echo "$LIBDIR"
|
|
||||||
echo "LIBDIR:=$LIBDIR" >> $CONFIG
|
|
||||||
}
|
|
||||||
|
|
||||||
check_ipt()
|
check_ipt()
|
||||||
{
|
{
|
||||||
if ! grep TC_CONFIG_XT $CONFIG > /dev/null; then
|
if ! grep TC_CONFIG_XT Config > /dev/null
|
||||||
|
then
|
||||||
echo "using iptables"
|
echo "using iptables"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
@ -171,16 +188,16 @@ check_ipt_lib_dir()
|
||||||
IPT_LIB_DIR=$(${PKG_CONFIG} --variable=xtlibdir xtables)
|
IPT_LIB_DIR=$(${PKG_CONFIG} --variable=xtlibdir xtables)
|
||||||
if [ -n "$IPT_LIB_DIR" ]; then
|
if [ -n "$IPT_LIB_DIR" ]; then
|
||||||
echo $IPT_LIB_DIR
|
echo $IPT_LIB_DIR
|
||||||
echo "IPT_LIB_DIR:=$IPT_LIB_DIR" >> $CONFIG
|
echo "IPT_LIB_DIR:=$IPT_LIB_DIR" >> Config
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for dir in /lib /usr/lib /usr/local/lib; do
|
for dir in /lib /usr/lib /usr/local/lib
|
||||||
for file in "xtables" "iptables"; do
|
do
|
||||||
file="$dir/$file/lib*t_*so"
|
for file in $dir/{xtables,iptables}/lib*t_*so ; do
|
||||||
if [ -f $file ]; then
|
if [ -f $file ]; then
|
||||||
echo ${file%/*}
|
echo ${file%/*}
|
||||||
echo "IPT_LIB_DIR:=${file%/*}" >> $CONFIG
|
echo "IPT_LIB_DIR:=${file%/*}" >> Config
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
@ -198,41 +215,17 @@ int main(int argc, char **argv)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EOF
|
EOF
|
||||||
if $CC -I$INCLUDE -o $TMPDIR/setnstest $TMPDIR/setnstest.c >/dev/null 2>&1; then
|
$CC -I$INCLUDE -o $TMPDIR/setnstest $TMPDIR/setnstest.c >/dev/null 2>&1
|
||||||
echo "IP_CONFIG_SETNS:=y" >>$CONFIG
|
if [ $? -eq 0 ]
|
||||||
|
then
|
||||||
|
echo "IP_CONFIG_SETNS:=y" >>Config
|
||||||
echo "yes"
|
echo "yes"
|
||||||
echo "CFLAGS += -DHAVE_SETNS" >>$CONFIG
|
|
||||||
else
|
else
|
||||||
echo "no"
|
echo "no"
|
||||||
fi
|
fi
|
||||||
rm -f $TMPDIR/setnstest.c $TMPDIR/setnstest
|
rm -f $TMPDIR/setnstest.c $TMPDIR/setnstest
|
||||||
}
|
}
|
||||||
|
|
||||||
check_name_to_handle_at()
|
|
||||||
{
|
|
||||||
cat >$TMPDIR/name_to_handle_at_test.c <<EOF
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct file_handle *fhp;
|
|
||||||
int mount_id, flags, dirfd;
|
|
||||||
char *pathname;
|
|
||||||
name_to_handle_at(dirfd, pathname, fhp, &mount_id, flags);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
EOF
|
|
||||||
if $CC -I$INCLUDE -o $TMPDIR/name_to_handle_at_test $TMPDIR/name_to_handle_at_test.c >/dev/null 2>&1; then
|
|
||||||
echo "yes"
|
|
||||||
echo "CFLAGS += -DHAVE_HANDLE_AT" >>$CONFIG
|
|
||||||
else
|
|
||||||
echo "no"
|
|
||||||
fi
|
|
||||||
rm -f $TMPDIR/name_to_handle_at_test.c $TMPDIR/name_to_handle_at_test
|
|
||||||
}
|
|
||||||
|
|
||||||
check_ipset()
|
check_ipset()
|
||||||
{
|
{
|
||||||
cat >$TMPDIR/ipsettest.c <<EOF
|
cat >$TMPDIR/ipsettest.c <<EOF
|
||||||
|
|
@ -244,7 +237,7 @@ typedef unsigned short ip_set_id_t;
|
||||||
#include <linux/netfilter/xt_set.h>
|
#include <linux/netfilter/xt_set.h>
|
||||||
|
|
||||||
struct xt_set_info info;
|
struct xt_set_info info;
|
||||||
#if IPSET_PROTOCOL == 6 || IPSET_PROTOCOL == 7
|
#if IPSET_PROTOCOL == 6
|
||||||
int main(void)
|
int main(void)
|
||||||
{
|
{
|
||||||
return IPSET_MAXNAMELEN;
|
return IPSET_MAXNAMELEN;
|
||||||
|
|
@ -254,8 +247,9 @@ int main(void)
|
||||||
#endif
|
#endif
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
if $CC -I$INCLUDE -o $TMPDIR/ipsettest $TMPDIR/ipsettest.c >/dev/null 2>&1; then
|
if $CC -I$INCLUDE -o $TMPDIR/ipsettest $TMPDIR/ipsettest.c >/dev/null 2>&1
|
||||||
echo "TC_CONFIG_IPSET:=y" >>$CONFIG
|
then
|
||||||
|
echo "TC_CONFIG_IPSET:=y" >>Config
|
||||||
echo "yes"
|
echo "yes"
|
||||||
else
|
else
|
||||||
echo "no"
|
echo "no"
|
||||||
|
|
@ -265,131 +259,34 @@ EOF
|
||||||
|
|
||||||
check_elf()
|
check_elf()
|
||||||
{
|
{
|
||||||
if ${PKG_CONFIG} libelf --exists; then
|
cat >$TMPDIR/elftest.c <<EOF
|
||||||
echo "HAVE_ELF:=y" >>$CONFIG
|
#include <libelf.h>
|
||||||
echo "yes"
|
#include <gelf.h>
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
Elf_Scn *scn __attribute__((__unused__));
|
||||||
|
GElf_Shdr shdr __attribute__((__unused__));;
|
||||||
|
return elf_version(EV_CURRENT);
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
echo 'CFLAGS += -DHAVE_ELF' `${PKG_CONFIG} libelf --cflags` >> $CONFIG
|
if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1
|
||||||
echo 'LDLIBS += ' `${PKG_CONFIG} libelf --libs` >>$CONFIG
|
then
|
||||||
|
echo "HAVE_ELF:=y" >>Config
|
||||||
|
echo "yes"
|
||||||
else
|
else
|
||||||
echo "no"
|
echo "no"
|
||||||
fi
|
fi
|
||||||
}
|
rm -f $TMPDIR/elftest.c $TMPDIR/elftest
|
||||||
|
|
||||||
have_libbpf_basic()
|
|
||||||
{
|
|
||||||
cat >$TMPDIR/libbpf_test.c <<EOF
|
|
||||||
#include <bpf/libbpf.h>
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
bpf_program__set_autoload(NULL, false);
|
|
||||||
bpf_map__ifindex(NULL);
|
|
||||||
bpf_map__set_pin_path(NULL, NULL);
|
|
||||||
bpf_object__open_file(NULL, NULL);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
EOF
|
|
||||||
|
|
||||||
$CC -o $TMPDIR/libbpf_test $TMPDIR/libbpf_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1
|
|
||||||
local ret=$?
|
|
||||||
|
|
||||||
rm -f $TMPDIR/libbpf_test.c $TMPDIR/libbpf_test
|
|
||||||
return $ret
|
|
||||||
}
|
|
||||||
|
|
||||||
have_libbpf_sec_name()
|
|
||||||
{
|
|
||||||
cat >$TMPDIR/libbpf_sec_test.c <<EOF
|
|
||||||
#include <bpf/libbpf.h>
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
void *ptr;
|
|
||||||
bpf_program__section_name(NULL);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
EOF
|
|
||||||
|
|
||||||
$CC -o $TMPDIR/libbpf_sec_test $TMPDIR/libbpf_sec_test.c $LIBBPF_CFLAGS $LIBBPF_LDLIBS >/dev/null 2>&1
|
|
||||||
local ret=$?
|
|
||||||
|
|
||||||
rm -f $TMPDIR/libbpf_sec_test.c $TMPDIR/libbpf_sec_test
|
|
||||||
return $ret
|
|
||||||
}
|
|
||||||
|
|
||||||
check_force_libbpf_on()
|
|
||||||
{
|
|
||||||
# if set LIBBPF_FORCE=on but no libbpf support, just exist the config
|
|
||||||
# process to make sure we don't build without libbpf.
|
|
||||||
if [ "$LIBBPF_FORCE" = on ]; then
|
|
||||||
echo " LIBBPF_FORCE=on set, but couldn't find a usable libbpf"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
check_libbpf()
|
|
||||||
{
|
|
||||||
# if set LIBBPF_FORCE=off, disable libbpf entirely
|
|
||||||
if [ "$LIBBPF_FORCE" = off ]; then
|
|
||||||
echo "no"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
if ! ${PKG_CONFIG} libbpf --exists && [ -z "$LIBBPF_DIR" ] ; then
|
|
||||||
echo "no"
|
|
||||||
check_force_libbpf_on
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $(uname -m) = x86_64 ]; then
|
|
||||||
local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib64"
|
|
||||||
else
|
|
||||||
local LIBBPF_LIBDIR="${LIBBPF_DIR}/usr/lib"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -n "$LIBBPF_DIR" ]; then
|
|
||||||
LIBBPF_CFLAGS="-I${LIBBPF_DIR}/usr/include"
|
|
||||||
LIBBPF_LDLIBS="${LIBBPF_LIBDIR}/libbpf.a -lz -lelf"
|
|
||||||
LIBBPF_VERSION=$(PKG_CONFIG_LIBDIR=${LIBBPF_LIBDIR}/pkgconfig ${PKG_CONFIG} libbpf --modversion)
|
|
||||||
else
|
|
||||||
LIBBPF_CFLAGS=$(${PKG_CONFIG} libbpf --cflags)
|
|
||||||
LIBBPF_LDLIBS=$(${PKG_CONFIG} libbpf --libs)
|
|
||||||
LIBBPF_VERSION=$(${PKG_CONFIG} libbpf --modversion)
|
|
||||||
fi
|
|
||||||
|
|
||||||
if ! have_libbpf_basic; then
|
|
||||||
echo "no"
|
|
||||||
echo " libbpf version $LIBBPF_VERSION is too low, please update it to at least 0.1.0"
|
|
||||||
check_force_libbpf_on
|
|
||||||
return
|
|
||||||
else
|
|
||||||
echo "HAVE_LIBBPF:=y" >> $CONFIG
|
|
||||||
echo 'CFLAGS += -DHAVE_LIBBPF ' $LIBBPF_CFLAGS >> $CONFIG
|
|
||||||
echo "CFLAGS += -DLIBBPF_VERSION=\\\"$LIBBPF_VERSION\\\"" >> $CONFIG
|
|
||||||
echo 'LDLIBS += ' $LIBBPF_LDLIBS >> $CONFIG
|
|
||||||
|
|
||||||
if [ -z "$LIBBPF_DIR" ]; then
|
|
||||||
echo "CFLAGS += -DLIBBPF_DYNAMIC" >> $CONFIG
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# bpf_program__title() is deprecated since libbpf 0.2.0, use
|
|
||||||
# bpf_program__section_name() instead if we support
|
|
||||||
if have_libbpf_sec_name; then
|
|
||||||
echo "HAVE_LIBBPF_SECTION_NAME:=y" >> $CONFIG
|
|
||||||
echo 'CFLAGS += -DHAVE_LIBBPF_SECTION_NAME ' >> $CONFIG
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "yes"
|
|
||||||
echo " libbpf version $LIBBPF_VERSION"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
check_selinux()
|
check_selinux()
|
||||||
# SELinux is a compile time option in the ss utility
|
# SELinux is a compile time option in the ss utility
|
||||||
{
|
{
|
||||||
if ${PKG_CONFIG} libselinux --exists; then
|
if ${PKG_CONFIG} libselinux --exists
|
||||||
echo "HAVE_SELINUX:=y" >>$CONFIG
|
then
|
||||||
|
echo "HAVE_SELINUX:=y" >>Config
|
||||||
echo "yes"
|
echo "yes"
|
||||||
|
|
||||||
echo 'LDLIBS +=' `${PKG_CONFIG} --libs libselinux` >>$CONFIG
|
|
||||||
echo 'CFLAGS += -DHAVE_SELINUX' `${PKG_CONFIG} --cflags libselinux` >>$CONFIG
|
|
||||||
else
|
else
|
||||||
echo "no"
|
echo "no"
|
||||||
fi
|
fi
|
||||||
|
|
@ -397,12 +294,10 @@ check_selinux()
|
||||||
|
|
||||||
check_mnl()
|
check_mnl()
|
||||||
{
|
{
|
||||||
if ${PKG_CONFIG} libmnl --exists; then
|
if ${PKG_CONFIG} libmnl --exists
|
||||||
echo "HAVE_MNL:=y" >>$CONFIG
|
then
|
||||||
|
echo "HAVE_MNL:=y" >>Config
|
||||||
echo "yes"
|
echo "yes"
|
||||||
|
|
||||||
echo 'CFLAGS += -DHAVE_LIBMNL' `${PKG_CONFIG} libmnl --cflags` >>$CONFIG
|
|
||||||
echo 'LDLIBS +=' `${PKG_CONFIG} libmnl --libs` >> $CONFIG
|
|
||||||
else
|
else
|
||||||
echo "no"
|
echo "no"
|
||||||
fi
|
fi
|
||||||
|
|
@ -419,8 +314,10 @@ int main(int argc, char **argv) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EOF
|
EOF
|
||||||
if $CC -I$INCLUDE -o $TMPDIR/dbtest $TMPDIR/dbtest.c -ldb >/dev/null 2>&1; then
|
$CC -I$INCLUDE -o $TMPDIR/dbtest $TMPDIR/dbtest.c -ldb >/dev/null 2>&1
|
||||||
echo "HAVE_BERKELEY_DB:=y" >>$CONFIG
|
if [ $? -eq 0 ]
|
||||||
|
then
|
||||||
|
echo "HAVE_BERKELEY_DB:=y" >>Config
|
||||||
echo "yes"
|
echo "yes"
|
||||||
else
|
else
|
||||||
echo "no"
|
echo "no"
|
||||||
|
|
@ -428,44 +325,6 @@ EOF
|
||||||
rm -f $TMPDIR/dbtest.c $TMPDIR/dbtest
|
rm -f $TMPDIR/dbtest.c $TMPDIR/dbtest
|
||||||
}
|
}
|
||||||
|
|
||||||
check_strlcpy()
|
|
||||||
{
|
|
||||||
cat >$TMPDIR/strtest.c <<EOF
|
|
||||||
#include <string.h>
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
char dst[10];
|
|
||||||
strlcpy(dst, "test", sizeof(dst));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
EOF
|
|
||||||
if $CC -I$INCLUDE -o $TMPDIR/strtest $TMPDIR/strtest.c >/dev/null 2>&1; then
|
|
||||||
echo "no"
|
|
||||||
else
|
|
||||||
if ${PKG_CONFIG} libbsd --exists; then
|
|
||||||
echo 'CFLAGS += -DHAVE_LIBBSD' `${PKG_CONFIG} libbsd --cflags` >>$CONFIG
|
|
||||||
echo 'LDLIBS +=' `${PKG_CONFIG} libbsd --libs` >> $CONFIG
|
|
||||||
echo "no"
|
|
||||||
else
|
|
||||||
echo 'CFLAGS += -DNEED_STRLCPY' >>$CONFIG
|
|
||||||
echo "yes"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
rm -f $TMPDIR/strtest.c $TMPDIR/strtest
|
|
||||||
}
|
|
||||||
|
|
||||||
check_cap()
|
|
||||||
{
|
|
||||||
if ${PKG_CONFIG} libcap --exists; then
|
|
||||||
echo "HAVE_CAP:=y" >>$CONFIG
|
|
||||||
echo "yes"
|
|
||||||
|
|
||||||
echo 'CFLAGS += -DHAVE_LIBCAP' `${PKG_CONFIG} libcap --cflags` >>$CONFIG
|
|
||||||
echo 'LDLIBS +=' `${PKG_CONFIG} libcap --libs` >> $CONFIG
|
|
||||||
else
|
|
||||||
echo "no"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
quiet_config()
|
quiet_config()
|
||||||
{
|
{
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
|
|
@ -492,78 +351,8 @@ endif
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
usage()
|
echo "# Generated config based on" $INCLUDE >Config
|
||||||
{
|
quiet_config >> Config
|
||||||
cat <<EOF
|
|
||||||
Usage: $0 [OPTIONS]
|
|
||||||
--include_dir <dir> Path to iproute2 include dir
|
|
||||||
--libdir <dir> Path to iproute2 lib dir
|
|
||||||
--libbpf_dir <dir> Path to libbpf DESTDIR
|
|
||||||
--libbpf_force <on|off> Enable/disable libbpf by force. Available options:
|
|
||||||
on: require link against libbpf, quit config if no libbpf support
|
|
||||||
off: disable libbpf probing
|
|
||||||
--prefix <dir> Path prefix of the lib files to install
|
|
||||||
-h | --help Show this usage info
|
|
||||||
EOF
|
|
||||||
exit $1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Compat with the old INCLUDE path setting method.
|
|
||||||
if [ $# -eq 1 ] && [ "$(echo $1 | cut -c 1)" != '-' ]; then
|
|
||||||
INCLUDE="$1"
|
|
||||||
else
|
|
||||||
while [ "$#" -gt 0 ]; do
|
|
||||||
case "$1" in
|
|
||||||
--include_dir)
|
|
||||||
shift
|
|
||||||
INCLUDE="$1" ;;
|
|
||||||
--include_dir=*)
|
|
||||||
INCLUDE="${1#*=}" ;;
|
|
||||||
--libdir)
|
|
||||||
shift
|
|
||||||
LIBDIR="$1" ;;
|
|
||||||
--libdir=*)
|
|
||||||
LIBDIR="${1#*=}" ;;
|
|
||||||
--libbpf_dir)
|
|
||||||
shift
|
|
||||||
LIBBPF_DIR="$1" ;;
|
|
||||||
--libbpf_dir=*)
|
|
||||||
LIBBPF_DIR="${1#*=}" ;;
|
|
||||||
--libbpf_force)
|
|
||||||
shift
|
|
||||||
LIBBPF_FORCE="$1" ;;
|
|
||||||
--libbpf_force=*)
|
|
||||||
LIBBPF_FORCE="${1#*=}" ;;
|
|
||||||
--prefix)
|
|
||||||
shift
|
|
||||||
PREFIX="$1" ;;
|
|
||||||
--prefix=*)
|
|
||||||
PREFIX="${1#*=}" ;;
|
|
||||||
-h | --help)
|
|
||||||
usage 0 ;;
|
|
||||||
--*)
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
usage 1 ;;
|
|
||||||
esac
|
|
||||||
[ "$#" -gt 0 ] && shift
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
[ -d "$INCLUDE" ] || usage 1
|
|
||||||
if [ "${LIBBPF_DIR-unused}" != "unused" ]; then
|
|
||||||
[ -d "$LIBBPF_DIR" ] || usage 1
|
|
||||||
fi
|
|
||||||
if [ "${LIBBPF_FORCE-unused}" != "unused" ]; then
|
|
||||||
if [ "$LIBBPF_FORCE" != 'on' ] && [ "$LIBBPF_FORCE" != 'off' ]; then
|
|
||||||
usage 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
[ -z "$PREFIX" ] && usage 1
|
|
||||||
[ -z "$LIBDIR" ] && usage 1
|
|
||||||
|
|
||||||
echo "# Generated config based on" $INCLUDE >$CONFIG
|
|
||||||
quiet_config >> $CONFIG
|
|
||||||
|
|
||||||
check_toolchain
|
check_toolchain
|
||||||
|
|
||||||
|
|
@ -573,7 +362,8 @@ echo -n " ATM "
|
||||||
check_atm
|
check_atm
|
||||||
|
|
||||||
check_xtables
|
check_xtables
|
||||||
if ! grep -q TC_CONFIG_NO_XT $CONFIG; then
|
if ! grep -q TC_CONFIG_NO_XT Config
|
||||||
|
then
|
||||||
echo -n " IPT "
|
echo -n " IPT "
|
||||||
check_xt
|
check_xt
|
||||||
check_xt_old
|
check_xt_old
|
||||||
|
|
@ -585,8 +375,8 @@ if ! grep -q TC_CONFIG_NO_XT $CONFIG; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo
|
echo
|
||||||
check_lib_dir
|
if ! grep -q TC_CONFIG_NO_XT Config
|
||||||
if ! grep -q TC_CONFIG_NO_XT $CONFIG; then
|
then
|
||||||
echo -n "iptables modules directory: "
|
echo -n "iptables modules directory: "
|
||||||
check_ipt_lib_dir
|
check_ipt_lib_dir
|
||||||
fi
|
fi
|
||||||
|
|
@ -594,15 +384,9 @@ fi
|
||||||
echo -n "libc has setns: "
|
echo -n "libc has setns: "
|
||||||
check_setns
|
check_setns
|
||||||
|
|
||||||
echo -n "libc has name_to_handle_at: "
|
|
||||||
check_name_to_handle_at
|
|
||||||
|
|
||||||
echo -n "SELinux support: "
|
echo -n "SELinux support: "
|
||||||
check_selinux
|
check_selinux
|
||||||
|
|
||||||
echo -n "libbpf support: "
|
|
||||||
check_libbpf
|
|
||||||
|
|
||||||
echo -n "ELF support: "
|
echo -n "ELF support: "
|
||||||
check_elf
|
check_elf
|
||||||
|
|
||||||
|
|
@ -612,12 +396,11 @@ check_mnl
|
||||||
echo -n "Berkeley DB: "
|
echo -n "Berkeley DB: "
|
||||||
check_berkeley_db
|
check_berkeley_db
|
||||||
|
|
||||||
echo -n "need for strlcpy: "
|
echo
|
||||||
check_strlcpy
|
echo -n "docs:"
|
||||||
|
check_docs
|
||||||
|
echo
|
||||||
|
|
||||||
echo -n "libcap support: "
|
echo >> Config
|
||||||
check_cap
|
echo "%.o: %.c" >> Config
|
||||||
|
echo ' $(QUIET_CC)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $<' >> Config
|
||||||
echo >> $CONFIG
|
|
||||||
echo "%.o: %.c" >> $CONFIG
|
|
||||||
echo ' $(QUIET_CC)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(CPPFLAGS) -c -o $@ $<' >> $CONFIG
|
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
dcb
|
|
||||||
31
dcb/Makefile
31
dcb/Makefile
|
|
@ -1,31 +0,0 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
|
||||||
include ../config.mk
|
|
||||||
|
|
||||||
TARGETS :=
|
|
||||||
|
|
||||||
ifeq ($(HAVE_MNL),y)
|
|
||||||
|
|
||||||
DCBOBJ = dcb.o \
|
|
||||||
dcb_app.o \
|
|
||||||
dcb_buffer.o \
|
|
||||||
dcb_dcbx.o \
|
|
||||||
dcb_ets.o \
|
|
||||||
dcb_maxrate.o \
|
|
||||||
dcb_pfc.o
|
|
||||||
TARGETS += dcb
|
|
||||||
LDLIBS += -lm
|
|
||||||
|
|
||||||
endif
|
|
||||||
|
|
||||||
all: $(TARGETS) $(LIBS)
|
|
||||||
|
|
||||||
dcb: $(DCBOBJ) $(LIBNETLINK)
|
|
||||||
$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
|
|
||||||
|
|
||||||
install: all
|
|
||||||
for i in $(TARGETS); \
|
|
||||||
do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \
|
|
||||||
done
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f $(DCBOBJ) $(TARGETS)
|
|
||||||
611
dcb/dcb.c
611
dcb/dcb.c
|
|
@ -1,611 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0+
|
|
||||||
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <linux/dcbnl.h>
|
|
||||||
#include <libmnl/libmnl.h>
|
|
||||||
#include <getopt.h>
|
|
||||||
|
|
||||||
#include "dcb.h"
|
|
||||||
#include "mnl_utils.h"
|
|
||||||
#include "namespace.h"
|
|
||||||
#include "utils.h"
|
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
static int dcb_init(struct dcb *dcb)
|
|
||||||
{
|
|
||||||
dcb->buf = malloc(MNL_SOCKET_BUFFER_SIZE);
|
|
||||||
if (dcb->buf == NULL) {
|
|
||||||
perror("Netlink buffer allocation");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
dcb->nl = mnlu_socket_open(NETLINK_ROUTE);
|
|
||||||
if (dcb->nl == NULL) {
|
|
||||||
perror("Open netlink socket");
|
|
||||||
goto err_socket_open;
|
|
||||||
}
|
|
||||||
|
|
||||||
new_json_obj_plain(dcb->json_output);
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
err_socket_open:
|
|
||||||
free(dcb->buf);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_fini(struct dcb *dcb)
|
|
||||||
{
|
|
||||||
delete_json_obj_plain();
|
|
||||||
mnl_socket_close(dcb->nl);
|
|
||||||
free(dcb->buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct dcb *dcb_alloc(void)
|
|
||||||
{
|
|
||||||
struct dcb *dcb;
|
|
||||||
|
|
||||||
dcb = calloc(1, sizeof(*dcb));
|
|
||||||
if (!dcb)
|
|
||||||
return NULL;
|
|
||||||
return dcb;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_free(struct dcb *dcb)
|
|
||||||
{
|
|
||||||
free(dcb);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct dcb_get_attribute {
|
|
||||||
struct dcb *dcb;
|
|
||||||
int attr;
|
|
||||||
void *payload;
|
|
||||||
__u16 payload_len;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int dcb_get_attribute_attr_ieee_cb(const struct nlattr *attr, void *data)
|
|
||||||
{
|
|
||||||
struct dcb_get_attribute *ga = data;
|
|
||||||
|
|
||||||
if (mnl_attr_get_type(attr) != ga->attr)
|
|
||||||
return MNL_CB_OK;
|
|
||||||
|
|
||||||
ga->payload = mnl_attr_get_payload(attr);
|
|
||||||
ga->payload_len = mnl_attr_get_payload_len(attr);
|
|
||||||
return MNL_CB_STOP;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_get_attribute_attr_cb(const struct nlattr *attr, void *data)
|
|
||||||
{
|
|
||||||
if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE)
|
|
||||||
return MNL_CB_OK;
|
|
||||||
|
|
||||||
return mnl_attr_parse_nested(attr, dcb_get_attribute_attr_ieee_cb, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_get_attribute_cb(const struct nlmsghdr *nlh, void *data)
|
|
||||||
{
|
|
||||||
return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_get_attribute_attr_cb, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_get_attribute_bare_cb(const struct nlmsghdr *nlh, void *data)
|
|
||||||
{
|
|
||||||
/* Bare attributes (e.g. DCB_ATTR_DCBX) are not wrapped inside an IEEE
|
|
||||||
* container, so this does not have to go through unpacking in
|
|
||||||
* dcb_get_attribute_attr_cb().
|
|
||||||
*/
|
|
||||||
return mnl_attr_parse(nlh, sizeof(struct dcbmsg),
|
|
||||||
dcb_get_attribute_attr_ieee_cb, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct dcb_set_attribute_response {
|
|
||||||
int response_attr;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int dcb_set_attribute_attr_cb(const struct nlattr *attr, void *data)
|
|
||||||
{
|
|
||||||
struct dcb_set_attribute_response *resp = data;
|
|
||||||
uint16_t len;
|
|
||||||
uint8_t err;
|
|
||||||
|
|
||||||
if (mnl_attr_get_type(attr) != resp->response_attr)
|
|
||||||
return MNL_CB_OK;
|
|
||||||
|
|
||||||
len = mnl_attr_get_payload_len(attr);
|
|
||||||
if (len != 1) {
|
|
||||||
fprintf(stderr, "Response attribute expected to have size 1, not %d\n", len);
|
|
||||||
return MNL_CB_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = mnl_attr_get_u8(attr);
|
|
||||||
if (err) {
|
|
||||||
fprintf(stderr, "Error when attempting to set attribute: %s\n",
|
|
||||||
strerror(err));
|
|
||||||
return MNL_CB_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return MNL_CB_STOP;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_set_attribute_cb(const struct nlmsghdr *nlh, void *data)
|
|
||||||
{
|
|
||||||
return mnl_attr_parse(nlh, sizeof(struct dcbmsg), dcb_set_attribute_attr_cb, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_talk(struct dcb *dcb, struct nlmsghdr *nlh, mnl_cb_t cb, void *data)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = mnl_socket_sendto(dcb->nl, nlh, nlh->nlmsg_len);
|
|
||||||
if (ret < 0) {
|
|
||||||
perror("mnl_socket_sendto");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return mnlu_socket_recv_run(dcb->nl, nlh->nlmsg_seq, dcb->buf, MNL_SOCKET_BUFFER_SIZE,
|
|
||||||
cb, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct nlmsghdr *dcb_prepare(struct dcb *dcb, const char *dev,
|
|
||||||
uint32_t nlmsg_type, uint8_t dcb_cmd)
|
|
||||||
{
|
|
||||||
struct dcbmsg dcbm = {
|
|
||||||
.cmd = dcb_cmd,
|
|
||||||
};
|
|
||||||
struct nlmsghdr *nlh;
|
|
||||||
|
|
||||||
nlh = mnlu_msg_prepare(dcb->buf, nlmsg_type, NLM_F_REQUEST, &dcbm, sizeof(dcbm));
|
|
||||||
mnl_attr_put_strz(nlh, DCB_ATTR_IFNAME, dev);
|
|
||||||
return nlh;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __dcb_get_attribute(struct dcb *dcb, int command,
|
|
||||||
const char *dev, int attr,
|
|
||||||
void **payload_p, __u16 *payload_len_p,
|
|
||||||
int (*get_attribute_cb)(const struct nlmsghdr *nlh,
|
|
||||||
void *data))
|
|
||||||
{
|
|
||||||
struct dcb_get_attribute ga;
|
|
||||||
struct nlmsghdr *nlh;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
nlh = dcb_prepare(dcb, dev, RTM_GETDCB, command);
|
|
||||||
|
|
||||||
ga = (struct dcb_get_attribute) {
|
|
||||||
.dcb = dcb,
|
|
||||||
.attr = attr,
|
|
||||||
.payload = NULL,
|
|
||||||
};
|
|
||||||
ret = dcb_talk(dcb, nlh, get_attribute_cb, &ga);
|
|
||||||
if (ret) {
|
|
||||||
perror("Attribute read");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
if (ga.payload == NULL) {
|
|
||||||
perror("Attribute not found");
|
|
||||||
return -ENOENT;
|
|
||||||
}
|
|
||||||
|
|
||||||
*payload_p = ga.payload;
|
|
||||||
*payload_len_p = ga.payload_len;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr,
|
|
||||||
void **payload_p, __u16 *payload_len_p)
|
|
||||||
{
|
|
||||||
return __dcb_get_attribute(dcb, DCB_CMD_IEEE_GET, dev, attr,
|
|
||||||
payload_p, payload_len_p,
|
|
||||||
dcb_get_attribute_cb);
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr,
|
|
||||||
void **payload_p, __u16 *payload_len_p)
|
|
||||||
{
|
|
||||||
return __dcb_get_attribute(dcb, cmd, dev, attr,
|
|
||||||
payload_p, payload_len_p,
|
|
||||||
dcb_get_attribute_bare_cb);
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr, void *data, size_t data_len)
|
|
||||||
{
|
|
||||||
__u16 payload_len;
|
|
||||||
void *payload;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_get_attribute_va(dcb, dev, attr, &payload, &payload_len);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
if (payload_len != data_len) {
|
|
||||||
fprintf(stderr, "Wrong len %d, expected %zd\n", payload_len, data_len);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(data, payload, data_len);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __dcb_set_attribute(struct dcb *dcb, int command, const char *dev,
|
|
||||||
int (*cb)(struct dcb *, struct nlmsghdr *, void *),
|
|
||||||
void *data, int response_attr)
|
|
||||||
{
|
|
||||||
struct dcb_set_attribute_response resp = {
|
|
||||||
.response_attr = response_attr,
|
|
||||||
};
|
|
||||||
struct nlmsghdr *nlh;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
nlh = dcb_prepare(dcb, dev, RTM_SETDCB, command);
|
|
||||||
|
|
||||||
ret = cb(dcb, nlh, data);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = dcb_talk(dcb, nlh, dcb_set_attribute_cb, &resp);
|
|
||||||
if (ret) {
|
|
||||||
perror("Attribute write");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct dcb_set_attribute_ieee_cb {
|
|
||||||
int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data);
|
|
||||||
void *data;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int dcb_set_attribute_ieee_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
|
|
||||||
{
|
|
||||||
struct dcb_set_attribute_ieee_cb *ieee_data = data;
|
|
||||||
struct nlattr *nest;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE);
|
|
||||||
ret = ieee_data->cb(dcb, nlh, ieee_data->data);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
mnl_attr_nest_end(nlh, nest);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev,
|
|
||||||
int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data),
|
|
||||||
void *data)
|
|
||||||
{
|
|
||||||
struct dcb_set_attribute_ieee_cb ieee_data = {
|
|
||||||
.cb = cb,
|
|
||||||
.data = data,
|
|
||||||
};
|
|
||||||
|
|
||||||
return __dcb_set_attribute(dcb, command, dev,
|
|
||||||
&dcb_set_attribute_ieee_cb, &ieee_data,
|
|
||||||
DCB_ATTR_IEEE);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct dcb_set_attribute {
|
|
||||||
int attr;
|
|
||||||
const void *data;
|
|
||||||
size_t data_len;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int dcb_set_attribute_put(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
|
|
||||||
{
|
|
||||||
struct dcb_set_attribute *dsa = data;
|
|
||||||
|
|
||||||
mnl_attr_put(nlh, dsa->attr, dsa->data_len, dsa->data);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr, const void *data, size_t data_len)
|
|
||||||
{
|
|
||||||
struct dcb_set_attribute dsa = {
|
|
||||||
.attr = attr,
|
|
||||||
.data = data,
|
|
||||||
.data_len = data_len,
|
|
||||||
};
|
|
||||||
|
|
||||||
return dcb_set_attribute_va(dcb, DCB_CMD_IEEE_SET, dev,
|
|
||||||
&dcb_set_attribute_put, &dsa);
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev,
|
|
||||||
int attr, const void *data, size_t data_len,
|
|
||||||
int response_attr)
|
|
||||||
{
|
|
||||||
struct dcb_set_attribute dsa = {
|
|
||||||
.attr = attr,
|
|
||||||
.data = data,
|
|
||||||
.data_len = data_len,
|
|
||||||
};
|
|
||||||
|
|
||||||
return __dcb_set_attribute(dcb, command, dev,
|
|
||||||
&dcb_set_attribute_put, &dsa, response_attr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void dcb_print_array_u8(const __u8 *array, size_t size)
|
|
||||||
{
|
|
||||||
SPRINT_BUF(b);
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++) {
|
|
||||||
snprintf(b, sizeof(b), "%zd:%%d ", i);
|
|
||||||
print_uint(PRINT_ANY, NULL, b, array[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void dcb_print_array_u64(const __u64 *array, size_t size)
|
|
||||||
{
|
|
||||||
SPRINT_BUF(b);
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++) {
|
|
||||||
snprintf(b, sizeof(b), "%zd:%%" PRIu64 " ", i);
|
|
||||||
print_u64(PRINT_ANY, NULL, b, array[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void dcb_print_array_on_off(const __u8 *array, size_t size)
|
|
||||||
{
|
|
||||||
SPRINT_BUF(b);
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++) {
|
|
||||||
snprintf(b, sizeof(b), "%zd:%%s ", i);
|
|
||||||
print_on_off(PRINT_ANY, NULL, b, array[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void dcb_print_array_kw(const __u8 *array, size_t array_size,
|
|
||||||
const char *const kw[], size_t kw_size)
|
|
||||||
{
|
|
||||||
SPRINT_BUF(b);
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < array_size; i++) {
|
|
||||||
__u8 emt = array[i];
|
|
||||||
|
|
||||||
snprintf(b, sizeof(b), "%zd:%%s ", i);
|
|
||||||
if (emt < kw_size && kw[emt])
|
|
||||||
print_string(PRINT_ANY, NULL, b, kw[emt]);
|
|
||||||
else
|
|
||||||
print_string(PRINT_ANY, NULL, b, "???");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void dcb_print_named_array(const char *json_name, const char *fp_name,
|
|
||||||
const __u8 *array, size_t size,
|
|
||||||
void (*print_array)(const __u8 *, size_t))
|
|
||||||
{
|
|
||||||
open_json_array(PRINT_JSON, json_name);
|
|
||||||
print_string(PRINT_FP, NULL, "%s ", fp_name);
|
|
||||||
print_array(array, size);
|
|
||||||
close_json_array(PRINT_JSON, json_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key,
|
|
||||||
const char *what_value, __u64 value, __u64 max_value,
|
|
||||||
void (*set_array)(__u32 index, __u64 value, void *data),
|
|
||||||
void *set_array_data)
|
|
||||||
{
|
|
||||||
bool is_all = key == (__u32) -1;
|
|
||||||
|
|
||||||
if (!is_all && key > max_key) {
|
|
||||||
fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%d\n",
|
|
||||||
what_key, what_value, what_key, max_key);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (value > max_value) {
|
|
||||||
fprintf(stderr, "In %s:%s mapping, %s is expected to be 0..%llu\n",
|
|
||||||
what_key, what_value, what_value, max_value);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_all) {
|
|
||||||
for (key = 0; key <= max_key; key++)
|
|
||||||
set_array(key, value, set_array_data);
|
|
||||||
} else {
|
|
||||||
set_array(key, value, set_array_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void dcb_set_u8(__u32 key, __u64 value, void *data)
|
|
||||||
{
|
|
||||||
__u8 *array = data;
|
|
||||||
|
|
||||||
array[key] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
void dcb_set_u32(__u32 key, __u64 value, void *data)
|
|
||||||
{
|
|
||||||
__u32 *array = data;
|
|
||||||
|
|
||||||
array[key] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
void dcb_set_u64(__u32 key, __u64 value, void *data)
|
|
||||||
{
|
|
||||||
__u64 *array = data;
|
|
||||||
|
|
||||||
array[key] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv,
|
|
||||||
int (*and_then)(struct dcb *dcb, const char *dev,
|
|
||||||
int argc, char **argv),
|
|
||||||
void (*help)(void))
|
|
||||||
{
|
|
||||||
const char *dev;
|
|
||||||
|
|
||||||
if (!argc || matches(*argv, "help") == 0) {
|
|
||||||
help();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "dev") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
dev = *argv;
|
|
||||||
if (check_ifname(dev)) {
|
|
||||||
invarg("not a valid ifname", *argv);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return and_then(dcb, dev, argc, argv);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "Expected `dev DEV', not `%s'", *argv);
|
|
||||||
help();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_help(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb [ OPTIONS ] OBJECT { COMMAND | help }\n"
|
|
||||||
" dcb [ -f | --force ] { -b | --batch } filename [ -n | --netns ] netnsname\n"
|
|
||||||
"where OBJECT := { app | buffer | dcbx | ets | maxrate | pfc }\n"
|
|
||||||
" OPTIONS := [ -V | --Version | -i | --iec | -j | --json\n"
|
|
||||||
" | -N | --Numeric | -p | --pretty\n"
|
|
||||||
" | -s | --statistics | -v | --verbose]\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd(struct dcb *dcb, int argc, char **argv)
|
|
||||||
{
|
|
||||||
if (!argc || matches(*argv, "help") == 0) {
|
|
||||||
dcb_help();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "app") == 0) {
|
|
||||||
return dcb_cmd_app(dcb, argc - 1, argv + 1);
|
|
||||||
} else if (matches(*argv, "buffer") == 0) {
|
|
||||||
return dcb_cmd_buffer(dcb, argc - 1, argv + 1);
|
|
||||||
} else if (matches(*argv, "dcbx") == 0) {
|
|
||||||
return dcb_cmd_dcbx(dcb, argc - 1, argv + 1);
|
|
||||||
} else if (matches(*argv, "ets") == 0) {
|
|
||||||
return dcb_cmd_ets(dcb, argc - 1, argv + 1);
|
|
||||||
} else if (matches(*argv, "maxrate") == 0) {
|
|
||||||
return dcb_cmd_maxrate(dcb, argc - 1, argv + 1);
|
|
||||||
} else if (matches(*argv, "pfc") == 0) {
|
|
||||||
return dcb_cmd_pfc(dcb, argc - 1, argv + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "Object \"%s\" is unknown\n", *argv);
|
|
||||||
return -ENOENT;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_batch_cmd(int argc, char *argv[], void *data)
|
|
||||||
{
|
|
||||||
struct dcb *dcb = data;
|
|
||||||
|
|
||||||
return dcb_cmd(dcb, argc, argv);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_batch(struct dcb *dcb, const char *name, bool force)
|
|
||||||
{
|
|
||||||
return do_batch(name, force, dcb_batch_cmd, dcb);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
static const struct option long_options[] = {
|
|
||||||
{ "Version", no_argument, NULL, 'V' },
|
|
||||||
{ "force", no_argument, NULL, 'f' },
|
|
||||||
{ "batch", required_argument, NULL, 'b' },
|
|
||||||
{ "iec", no_argument, NULL, 'i' },
|
|
||||||
{ "json", no_argument, NULL, 'j' },
|
|
||||||
{ "Numeric", no_argument, NULL, 'N' },
|
|
||||||
{ "pretty", no_argument, NULL, 'p' },
|
|
||||||
{ "statistics", no_argument, NULL, 's' },
|
|
||||||
{ "netns", required_argument, NULL, 'n' },
|
|
||||||
{ "help", no_argument, NULL, 'h' },
|
|
||||||
{ NULL, 0, NULL, 0 }
|
|
||||||
};
|
|
||||||
const char *batch_file = NULL;
|
|
||||||
bool force = false;
|
|
||||||
struct dcb *dcb;
|
|
||||||
int opt;
|
|
||||||
int err;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
dcb = dcb_alloc();
|
|
||||||
if (!dcb) {
|
|
||||||
fprintf(stderr, "Failed to allocate memory for dcb\n");
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((opt = getopt_long(argc, argv, "b:fhijn:psvNV",
|
|
||||||
long_options, NULL)) >= 0) {
|
|
||||||
|
|
||||||
switch (opt) {
|
|
||||||
case 'V':
|
|
||||||
printf("dcb utility, iproute2-%s\n", version);
|
|
||||||
ret = EXIT_SUCCESS;
|
|
||||||
goto dcb_free;
|
|
||||||
case 'f':
|
|
||||||
force = true;
|
|
||||||
break;
|
|
||||||
case 'b':
|
|
||||||
batch_file = optarg;
|
|
||||||
break;
|
|
||||||
case 'j':
|
|
||||||
dcb->json_output = true;
|
|
||||||
break;
|
|
||||||
case 'N':
|
|
||||||
dcb->numeric = true;
|
|
||||||
break;
|
|
||||||
case 'p':
|
|
||||||
pretty = true;
|
|
||||||
break;
|
|
||||||
case 's':
|
|
||||||
dcb->stats = true;
|
|
||||||
break;
|
|
||||||
case 'n':
|
|
||||||
if (netns_switch(optarg)) {
|
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
goto dcb_free;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 'i':
|
|
||||||
dcb->use_iec = true;
|
|
||||||
break;
|
|
||||||
case 'h':
|
|
||||||
dcb_help();
|
|
||||||
ret = EXIT_SUCCESS;
|
|
||||||
goto dcb_free;
|
|
||||||
default:
|
|
||||||
fprintf(stderr, "Unknown option.\n");
|
|
||||||
dcb_help();
|
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
goto dcb_free;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
argc -= optind;
|
|
||||||
argv += optind;
|
|
||||||
|
|
||||||
err = dcb_init(dcb);
|
|
||||||
if (err) {
|
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
goto dcb_free;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (batch_file)
|
|
||||||
err = dcb_batch(dcb, batch_file, force);
|
|
||||||
else
|
|
||||||
err = dcb_cmd(dcb, argc, argv);
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
goto dcb_fini;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = EXIT_SUCCESS;
|
|
||||||
|
|
||||||
dcb_fini:
|
|
||||||
dcb_fini(dcb);
|
|
||||||
dcb_free:
|
|
||||||
dcb_free(dcb);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
81
dcb/dcb.h
81
dcb/dcb.h
|
|
@ -1,81 +0,0 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
#ifndef __DCB_H__
|
|
||||||
#define __DCB_H__ 1
|
|
||||||
|
|
||||||
#include <libmnl/libmnl.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
/* dcb.c */
|
|
||||||
|
|
||||||
struct dcb {
|
|
||||||
char *buf;
|
|
||||||
struct mnl_socket *nl;
|
|
||||||
bool json_output;
|
|
||||||
bool stats;
|
|
||||||
bool use_iec;
|
|
||||||
bool numeric;
|
|
||||||
};
|
|
||||||
|
|
||||||
int dcb_parse_mapping(const char *what_key, __u32 key, __u32 max_key,
|
|
||||||
const char *what_value, __u64 value, __u64 max_value,
|
|
||||||
void (*set_array)(__u32 index, __u64 value, void *data),
|
|
||||||
void *set_array_data);
|
|
||||||
int dcb_cmd_parse_dev(struct dcb *dcb, int argc, char **argv,
|
|
||||||
int (*and_then)(struct dcb *dcb, const char *dev,
|
|
||||||
int argc, char **argv),
|
|
||||||
void (*help)(void));
|
|
||||||
|
|
||||||
void dcb_set_u8(__u32 key, __u64 value, void *data);
|
|
||||||
void dcb_set_u32(__u32 key, __u64 value, void *data);
|
|
||||||
void dcb_set_u64(__u32 key, __u64 value, void *data);
|
|
||||||
|
|
||||||
int dcb_get_attribute(struct dcb *dcb, const char *dev, int attr,
|
|
||||||
void *data, size_t data_len);
|
|
||||||
int dcb_set_attribute(struct dcb *dcb, const char *dev, int attr,
|
|
||||||
const void *data, size_t data_len);
|
|
||||||
int dcb_get_attribute_va(struct dcb *dcb, const char *dev, int attr,
|
|
||||||
void **payload_p, __u16 *payload_len_p);
|
|
||||||
int dcb_set_attribute_va(struct dcb *dcb, int command, const char *dev,
|
|
||||||
int (*cb)(struct dcb *dcb, struct nlmsghdr *nlh, void *data),
|
|
||||||
void *data);
|
|
||||||
int dcb_get_attribute_bare(struct dcb *dcb, int cmd, const char *dev, int attr,
|
|
||||||
void **payload_p, __u16 *payload_len_p);
|
|
||||||
int dcb_set_attribute_bare(struct dcb *dcb, int command, const char *dev,
|
|
||||||
int attr, const void *data, size_t data_len,
|
|
||||||
int response_attr);
|
|
||||||
|
|
||||||
void dcb_print_named_array(const char *json_name, const char *fp_name,
|
|
||||||
const __u8 *array, size_t size,
|
|
||||||
void (*print_array)(const __u8 *, size_t));
|
|
||||||
void dcb_print_array_u8(const __u8 *array, size_t size);
|
|
||||||
void dcb_print_array_u64(const __u64 *array, size_t size);
|
|
||||||
void dcb_print_array_on_off(const __u8 *array, size_t size);
|
|
||||||
void dcb_print_array_kw(const __u8 *array, size_t array_size,
|
|
||||||
const char *const kw[], size_t kw_size);
|
|
||||||
|
|
||||||
/* dcb_app.c */
|
|
||||||
|
|
||||||
int dcb_cmd_app(struct dcb *dcb, int argc, char **argv);
|
|
||||||
|
|
||||||
/* dcb_buffer.c */
|
|
||||||
|
|
||||||
int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv);
|
|
||||||
|
|
||||||
/* dcb_dcbx.c */
|
|
||||||
|
|
||||||
int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv);
|
|
||||||
|
|
||||||
/* dcb_ets.c */
|
|
||||||
|
|
||||||
int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv);
|
|
||||||
|
|
||||||
/* dcb_maxrate.c */
|
|
||||||
|
|
||||||
int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv);
|
|
||||||
|
|
||||||
/* dcb_pfc.c */
|
|
||||||
|
|
||||||
int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv);
|
|
||||||
|
|
||||||
#endif /* __DCB_H__ */
|
|
||||||
795
dcb/dcb_app.c
795
dcb/dcb_app.c
|
|
@ -1,795 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0+
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <libmnl/libmnl.h>
|
|
||||||
#include <linux/dcbnl.h>
|
|
||||||
|
|
||||||
#include "dcb.h"
|
|
||||||
#include "utils.h"
|
|
||||||
#include "rt_names.h"
|
|
||||||
|
|
||||||
static void dcb_app_help_add(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb app { add | del | replace } dev STRING\n"
|
|
||||||
" [ default-prio PRIO ]\n"
|
|
||||||
" [ ethtype-prio ET:PRIO ]\n"
|
|
||||||
" [ stream-port-prio PORT:PRIO ]\n"
|
|
||||||
" [ dgram-port-prio PORT:PRIO ]\n"
|
|
||||||
" [ port-prio PORT:PRIO ]\n"
|
|
||||||
" [ dscp-prio INTEGER:PRIO ]\n"
|
|
||||||
"\n"
|
|
||||||
" where PRIO := { 0 .. 7 }\n"
|
|
||||||
" ET := { 0x600 .. 0xffff }\n"
|
|
||||||
" PORT := { 1 .. 65535 }\n"
|
|
||||||
" DSCP := { 0 .. 63 }\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_help_show_flush(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb app { show | flush } dev STRING\n"
|
|
||||||
" [ default-prio ]\n"
|
|
||||||
" [ ethtype-prio ]\n"
|
|
||||||
" [ stream-port-prio ]\n"
|
|
||||||
" [ dgram-port-prio ]\n"
|
|
||||||
" [ port-prio ]\n"
|
|
||||||
" [ dscp-prio ]\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_help(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb app help\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
dcb_app_help_show_flush();
|
|
||||||
dcb_app_help_add();
|
|
||||||
}
|
|
||||||
|
|
||||||
struct dcb_app_table {
|
|
||||||
struct dcb_app *apps;
|
|
||||||
size_t n_apps;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void dcb_app_table_fini(struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
free(tab->apps);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_table_push(struct dcb_app_table *tab, struct dcb_app *app)
|
|
||||||
{
|
|
||||||
struct dcb_app *apps = realloc(tab->apps, (tab->n_apps + 1) * sizeof(*tab->apps));
|
|
||||||
|
|
||||||
if (apps == NULL) {
|
|
||||||
perror("Cannot allocate APP table");
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
tab->apps = apps;
|
|
||||||
tab->apps[tab->n_apps++] = *app;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_table_remove_existing(struct dcb_app_table *a,
|
|
||||||
const struct dcb_app_table *b)
|
|
||||||
{
|
|
||||||
size_t ia, ja;
|
|
||||||
size_t ib;
|
|
||||||
|
|
||||||
for (ia = 0, ja = 0; ia < a->n_apps; ia++) {
|
|
||||||
struct dcb_app *aa = &a->apps[ia];
|
|
||||||
bool found = false;
|
|
||||||
|
|
||||||
for (ib = 0; ib < b->n_apps; ib++) {
|
|
||||||
const struct dcb_app *ab = &b->apps[ib];
|
|
||||||
|
|
||||||
if (aa->selector == ab->selector &&
|
|
||||||
aa->protocol == ab->protocol &&
|
|
||||||
aa->priority == ab->priority) {
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!found)
|
|
||||||
a->apps[ja++] = *aa;
|
|
||||||
}
|
|
||||||
|
|
||||||
a->n_apps = ja;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_table_remove_replaced(struct dcb_app_table *a,
|
|
||||||
const struct dcb_app_table *b)
|
|
||||||
{
|
|
||||||
size_t ia, ja;
|
|
||||||
size_t ib;
|
|
||||||
|
|
||||||
for (ia = 0, ja = 0; ia < a->n_apps; ia++) {
|
|
||||||
struct dcb_app *aa = &a->apps[ia];
|
|
||||||
bool present = false;
|
|
||||||
bool found = false;
|
|
||||||
|
|
||||||
for (ib = 0; ib < b->n_apps; ib++) {
|
|
||||||
const struct dcb_app *ab = &b->apps[ib];
|
|
||||||
|
|
||||||
if (aa->selector == ab->selector &&
|
|
||||||
aa->protocol == ab->protocol)
|
|
||||||
present = true;
|
|
||||||
else
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (aa->priority == ab->priority) {
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Entries that remain in A will be removed, so keep in the
|
|
||||||
* table only APP entries whose sel/pid is mentioned in B,
|
|
||||||
* but that do not have the full sel/pid/prio match.
|
|
||||||
*/
|
|
||||||
if (present && !found)
|
|
||||||
a->apps[ja++] = *aa;
|
|
||||||
}
|
|
||||||
|
|
||||||
a->n_apps = ja;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_table_copy(struct dcb_app_table *a,
|
|
||||||
const struct dcb_app_table *b)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
for (i = 0; i < b->n_apps; i++) {
|
|
||||||
ret = dcb_app_table_push(a, &b->apps[i]);
|
|
||||||
if (ret != 0)
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_cmp(const struct dcb_app *a, const struct dcb_app *b)
|
|
||||||
{
|
|
||||||
if (a->protocol < b->protocol)
|
|
||||||
return -1;
|
|
||||||
if (a->protocol > b->protocol)
|
|
||||||
return 1;
|
|
||||||
return a->priority - b->priority;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_cmp_cb(const void *a, const void *b)
|
|
||||||
{
|
|
||||||
return dcb_app_cmp(a, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_table_sort(struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
qsort(tab->apps, tab->n_apps, sizeof(*tab->apps), dcb_app_cmp_cb);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct dcb_app_parse_mapping {
|
|
||||||
__u8 selector;
|
|
||||||
struct dcb_app_table *tab;
|
|
||||||
int err;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void dcb_app_parse_mapping_cb(__u32 key, __u64 value, void *data)
|
|
||||||
{
|
|
||||||
struct dcb_app_parse_mapping *pm = data;
|
|
||||||
struct dcb_app app = {
|
|
||||||
.selector = pm->selector,
|
|
||||||
.priority = value,
|
|
||||||
.protocol = key,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (pm->err)
|
|
||||||
return;
|
|
||||||
|
|
||||||
pm->err = dcb_app_table_push(pm->tab, &app);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_parse_mapping_ethtype_prio(__u32 key, char *value, void *data)
|
|
||||||
{
|
|
||||||
__u8 prio;
|
|
||||||
|
|
||||||
if (key < 0x600) {
|
|
||||||
fprintf(stderr, "Protocol IDs < 0x600 are reserved for EtherType\n");
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (get_u8(&prio, value, 0))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return dcb_parse_mapping("ETHTYPE", key, 0xffff,
|
|
||||||
"PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
|
|
||||||
dcb_app_parse_mapping_cb, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_parse_dscp(__u32 *key, const char *arg)
|
|
||||||
{
|
|
||||||
if (parse_mapping_num_all(key, arg) == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (rtnl_dsfield_a2n(key, arg) != 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
if (*key & 0x03) {
|
|
||||||
fprintf(stderr, "The values `%s' uses non-DSCP bits.\n", arg);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Unshift the value to convert it from dsfield to DSCP. */
|
|
||||||
*key >>= 2;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_parse_mapping_dscp_prio(__u32 key, char *value, void *data)
|
|
||||||
{
|
|
||||||
__u8 prio;
|
|
||||||
|
|
||||||
if (get_u8(&prio, value, 0))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return dcb_parse_mapping("DSCP", key, 63,
|
|
||||||
"PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
|
|
||||||
dcb_app_parse_mapping_cb, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_parse_mapping_port_prio(__u32 key, char *value, void *data)
|
|
||||||
{
|
|
||||||
__u8 prio;
|
|
||||||
|
|
||||||
if (key == 0) {
|
|
||||||
fprintf(stderr, "Port ID of 0 is invalid\n");
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (get_u8(&prio, value, 0))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return dcb_parse_mapping("PORT", key, 0xffff,
|
|
||||||
"PRIO", prio, IEEE_8021QAZ_MAX_TCS - 1,
|
|
||||||
dcb_app_parse_mapping_cb, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_parse_default_prio(int *argcp, char ***argvp, struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
int argc = *argcp;
|
|
||||||
char **argv = *argvp;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
while (argc > 0) {
|
|
||||||
struct dcb_app app;
|
|
||||||
__u8 prio;
|
|
||||||
|
|
||||||
if (get_u8(&prio, *argv, 0)) {
|
|
||||||
ret = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
app = (struct dcb_app){
|
|
||||||
.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE,
|
|
||||||
.protocol = 0,
|
|
||||||
.priority = prio,
|
|
||||||
};
|
|
||||||
ret = dcb_app_table_push(tab, &app);
|
|
||||||
if (ret != 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
argc--, argv++;
|
|
||||||
}
|
|
||||||
|
|
||||||
*argcp = argc;
|
|
||||||
*argvp = argv;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool dcb_app_is_ethtype(const struct dcb_app *app)
|
|
||||||
{
|
|
||||||
return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
|
|
||||||
app->protocol != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool dcb_app_is_default(const struct dcb_app *app)
|
|
||||||
{
|
|
||||||
return app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
|
|
||||||
app->protocol == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool dcb_app_is_dscp(const struct dcb_app *app)
|
|
||||||
{
|
|
||||||
return app->selector == IEEE_8021QAZ_APP_SEL_DSCP;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool dcb_app_is_stream_port(const struct dcb_app *app)
|
|
||||||
{
|
|
||||||
return app->selector == IEEE_8021QAZ_APP_SEL_STREAM;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool dcb_app_is_dgram_port(const struct dcb_app *app)
|
|
||||||
{
|
|
||||||
return app->selector == IEEE_8021QAZ_APP_SEL_DGRAM;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool dcb_app_is_port(const struct dcb_app *app)
|
|
||||||
{
|
|
||||||
return app->selector == IEEE_8021QAZ_APP_SEL_ANY;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_print_key_dec(__u16 protocol)
|
|
||||||
{
|
|
||||||
return print_uint(PRINT_ANY, NULL, "%d:", protocol);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_print_key_hex(__u16 protocol)
|
|
||||||
{
|
|
||||||
return print_uint(PRINT_ANY, NULL, "%x:", protocol);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_print_key_dscp(__u16 protocol)
|
|
||||||
{
|
|
||||||
const char *name = rtnl_dsfield_get_name(protocol << 2);
|
|
||||||
|
|
||||||
|
|
||||||
if (!is_json_context() && name != NULL)
|
|
||||||
return print_string(PRINT_FP, NULL, "%s:", name);
|
|
||||||
return print_uint(PRINT_ANY, NULL, "%d:", protocol);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_print_filtered(const struct dcb_app_table *tab,
|
|
||||||
bool (*filter)(const struct dcb_app *),
|
|
||||||
int (*print_key)(__u16 protocol),
|
|
||||||
const char *json_name,
|
|
||||||
const char *fp_name)
|
|
||||||
{
|
|
||||||
bool first = true;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < tab->n_apps; i++) {
|
|
||||||
struct dcb_app *app = &tab->apps[i];
|
|
||||||
|
|
||||||
if (!filter(app))
|
|
||||||
continue;
|
|
||||||
if (first) {
|
|
||||||
open_json_array(PRINT_JSON, json_name);
|
|
||||||
print_string(PRINT_FP, NULL, "%s ", fp_name);
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
open_json_array(PRINT_JSON, NULL);
|
|
||||||
print_key(app->protocol);
|
|
||||||
print_uint(PRINT_ANY, NULL, "%d ", app->priority);
|
|
||||||
close_json_array(PRINT_JSON, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!first) {
|
|
||||||
close_json_array(PRINT_JSON, json_name);
|
|
||||||
print_nl();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_print_ethtype_prio(const struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
dcb_app_print_filtered(tab, dcb_app_is_ethtype, dcb_app_print_key_hex,
|
|
||||||
"ethtype_prio", "ethtype-prio");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_print_dscp_prio(const struct dcb *dcb,
|
|
||||||
const struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
dcb_app_print_filtered(tab, dcb_app_is_dscp,
|
|
||||||
dcb->numeric ? dcb_app_print_key_dec
|
|
||||||
: dcb_app_print_key_dscp,
|
|
||||||
"dscp_prio", "dscp-prio");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_print_stream_port_prio(const struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
dcb_app_print_filtered(tab, dcb_app_is_stream_port, dcb_app_print_key_dec,
|
|
||||||
"stream_port_prio", "stream-port-prio");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_print_dgram_port_prio(const struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
dcb_app_print_filtered(tab, dcb_app_is_dgram_port, dcb_app_print_key_dec,
|
|
||||||
"dgram_port_prio", "dgram-port-prio");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_print_port_prio(const struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
dcb_app_print_filtered(tab, dcb_app_is_port, dcb_app_print_key_dec,
|
|
||||||
"port_prio", "port-prio");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_print_default_prio(const struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
bool first = true;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < tab->n_apps; i++) {
|
|
||||||
if (!dcb_app_is_default(&tab->apps[i]))
|
|
||||||
continue;
|
|
||||||
if (first) {
|
|
||||||
open_json_array(PRINT_JSON, "default_prio");
|
|
||||||
print_string(PRINT_FP, NULL, "default-prio ", NULL);
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
print_uint(PRINT_ANY, NULL, "%d ", tab->apps[i].priority);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!first) {
|
|
||||||
close_json_array(PRINT_JSON, "default_prio");
|
|
||||||
print_nl();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_app_print(const struct dcb *dcb, const struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
dcb_app_print_ethtype_prio(tab);
|
|
||||||
dcb_app_print_default_prio(tab);
|
|
||||||
dcb_app_print_dscp_prio(dcb, tab);
|
|
||||||
dcb_app_print_stream_port_prio(tab);
|
|
||||||
dcb_app_print_dgram_port_prio(tab);
|
|
||||||
dcb_app_print_port_prio(tab);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_get_table_attr_cb(const struct nlattr *attr, void *data)
|
|
||||||
{
|
|
||||||
struct dcb_app_table *tab = data;
|
|
||||||
struct dcb_app *app;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (mnl_attr_get_type(attr) != DCB_ATTR_IEEE_APP) {
|
|
||||||
fprintf(stderr, "Unknown attribute in DCB_ATTR_IEEE_APP_TABLE: %d\n",
|
|
||||||
mnl_attr_get_type(attr));
|
|
||||||
return MNL_CB_OK;
|
|
||||||
}
|
|
||||||
if (mnl_attr_get_payload_len(attr) < sizeof(struct dcb_app)) {
|
|
||||||
fprintf(stderr, "DCB_ATTR_IEEE_APP payload expected to have size %zd, not %d\n",
|
|
||||||
sizeof(struct dcb_app), mnl_attr_get_payload_len(attr));
|
|
||||||
return MNL_CB_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
app = mnl_attr_get_payload(attr);
|
|
||||||
ret = dcb_app_table_push(tab, app);
|
|
||||||
if (ret != 0)
|
|
||||||
return MNL_CB_ERROR;
|
|
||||||
|
|
||||||
return MNL_CB_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_get(struct dcb *dcb, const char *dev, struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
uint16_t payload_len;
|
|
||||||
void *payload;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_get_attribute_va(dcb, dev, DCB_ATTR_IEEE_APP_TABLE, &payload, &payload_len);
|
|
||||||
if (ret != 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = mnl_attr_parse_payload(payload, payload_len, dcb_app_get_table_attr_cb, tab);
|
|
||||||
if (ret != MNL_CB_OK)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct dcb_app_add_del {
|
|
||||||
const struct dcb_app_table *tab;
|
|
||||||
bool (*filter)(const struct dcb_app *app);
|
|
||||||
};
|
|
||||||
|
|
||||||
static int dcb_app_add_del_cb(struct dcb *dcb, struct nlmsghdr *nlh, void *data)
|
|
||||||
{
|
|
||||||
struct dcb_app_add_del *add_del = data;
|
|
||||||
struct nlattr *nest;
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
nest = mnl_attr_nest_start(nlh, DCB_ATTR_IEEE_APP_TABLE);
|
|
||||||
|
|
||||||
for (i = 0; i < add_del->tab->n_apps; i++) {
|
|
||||||
const struct dcb_app *app = &add_del->tab->apps[i];
|
|
||||||
|
|
||||||
if (add_del->filter == NULL || add_del->filter(app))
|
|
||||||
mnl_attr_put(nlh, DCB_ATTR_IEEE_APP, sizeof(*app), app);
|
|
||||||
}
|
|
||||||
|
|
||||||
mnl_attr_nest_end(nlh, nest);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_app_add_del(struct dcb *dcb, const char *dev, int command,
|
|
||||||
const struct dcb_app_table *tab,
|
|
||||||
bool (*filter)(const struct dcb_app *))
|
|
||||||
{
|
|
||||||
struct dcb_app_add_del add_del = {
|
|
||||||
.tab = tab,
|
|
||||||
.filter = filter,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (tab->n_apps == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return dcb_set_attribute_va(dcb, command, dev, dcb_app_add_del_cb, &add_del);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_app_parse_add_del(struct dcb *dcb, const char *dev,
|
|
||||||
int argc, char **argv, struct dcb_app_table *tab)
|
|
||||||
{
|
|
||||||
struct dcb_app_parse_mapping pm = {
|
|
||||||
.tab = tab,
|
|
||||||
};
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_app_help_add();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_app_help_add();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "ethtype-prio") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
pm.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
|
|
||||||
ret = parse_mapping(&argc, &argv, false,
|
|
||||||
&dcb_app_parse_mapping_ethtype_prio,
|
|
||||||
&pm);
|
|
||||||
} else if (matches(*argv, "default-prio") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = dcb_app_parse_default_prio(&argc, &argv, pm.tab);
|
|
||||||
if (ret != 0) {
|
|
||||||
fprintf(stderr, "Invalid default priority %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
} else if (matches(*argv, "dscp-prio") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
pm.selector = IEEE_8021QAZ_APP_SEL_DSCP;
|
|
||||||
ret = parse_mapping_gen(&argc, &argv,
|
|
||||||
&dcb_app_parse_dscp,
|
|
||||||
&dcb_app_parse_mapping_dscp_prio,
|
|
||||||
&pm);
|
|
||||||
} else if (matches(*argv, "stream-port-prio") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
pm.selector = IEEE_8021QAZ_APP_SEL_STREAM;
|
|
||||||
ret = parse_mapping(&argc, &argv, false,
|
|
||||||
&dcb_app_parse_mapping_port_prio,
|
|
||||||
&pm);
|
|
||||||
} else if (matches(*argv, "dgram-port-prio") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
pm.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
|
|
||||||
ret = parse_mapping(&argc, &argv, false,
|
|
||||||
&dcb_app_parse_mapping_port_prio,
|
|
||||||
&pm);
|
|
||||||
} else if (matches(*argv, "port-prio") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
pm.selector = IEEE_8021QAZ_APP_SEL_ANY;
|
|
||||||
ret = parse_mapping(&argc, &argv, false,
|
|
||||||
&dcb_app_parse_mapping_port_prio,
|
|
||||||
&pm);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_app_help_add();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret != 0) {
|
|
||||||
fprintf(stderr, "Invalid mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
if (pm.err)
|
|
||||||
return pm.err;
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_app_add(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct dcb_app_table tab = {};
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
|
|
||||||
if (ret != 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &tab, NULL);
|
|
||||||
dcb_app_table_fini(&tab);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_app_del(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct dcb_app_table tab = {};
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
|
|
||||||
if (ret != 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL);
|
|
||||||
dcb_app_table_fini(&tab);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_app_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct dcb_app_table tab = {};
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_app_get(dcb, dev, &tab);
|
|
||||||
if (ret != 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
dcb_app_table_sort(&tab);
|
|
||||||
|
|
||||||
open_json_object(NULL);
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_app_print(dcb, &tab);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_app_help_show_flush();
|
|
||||||
goto out;
|
|
||||||
} else if (matches(*argv, "ethtype-prio") == 0) {
|
|
||||||
dcb_app_print_ethtype_prio(&tab);
|
|
||||||
} else if (matches(*argv, "dscp-prio") == 0) {
|
|
||||||
dcb_app_print_dscp_prio(dcb, &tab);
|
|
||||||
} else if (matches(*argv, "stream-port-prio") == 0) {
|
|
||||||
dcb_app_print_stream_port_prio(&tab);
|
|
||||||
} else if (matches(*argv, "dgram-port-prio") == 0) {
|
|
||||||
dcb_app_print_dgram_port_prio(&tab);
|
|
||||||
} else if (matches(*argv, "port-prio") == 0) {
|
|
||||||
dcb_app_print_port_prio(&tab);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_app_help_show_flush();
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
out:
|
|
||||||
close_json_object();
|
|
||||||
dcb_app_table_fini(&tab);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_app_flush(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct dcb_app_table tab = {};
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_app_get(dcb, dev, &tab);
|
|
||||||
if (ret != 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab, NULL);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_app_help_show_flush();
|
|
||||||
goto out;
|
|
||||||
} else if (matches(*argv, "ethtype-prio") == 0) {
|
|
||||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
|
|
||||||
&dcb_app_is_ethtype);
|
|
||||||
if (ret != 0)
|
|
||||||
goto out;
|
|
||||||
} else if (matches(*argv, "default-prio") == 0) {
|
|
||||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
|
|
||||||
&dcb_app_is_default);
|
|
||||||
if (ret != 0)
|
|
||||||
goto out;
|
|
||||||
} else if (matches(*argv, "dscp-prio") == 0) {
|
|
||||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &tab,
|
|
||||||
&dcb_app_is_dscp);
|
|
||||||
if (ret != 0)
|
|
||||||
goto out;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_app_help_show_flush();
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
out:
|
|
||||||
dcb_app_table_fini(&tab);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_app_replace(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct dcb_app_table orig = {};
|
|
||||||
struct dcb_app_table tab = {};
|
|
||||||
struct dcb_app_table new = {};
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_app_get(dcb, dev, &orig);
|
|
||||||
if (ret != 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = dcb_cmd_app_parse_add_del(dcb, dev, argc, argv, &tab);
|
|
||||||
if (ret != 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/* Attempts to add an existing entry would be rejected, so drop
|
|
||||||
* these entries from tab.
|
|
||||||
*/
|
|
||||||
ret = dcb_app_table_copy(&new, &tab);
|
|
||||||
if (ret != 0)
|
|
||||||
goto out;
|
|
||||||
dcb_app_table_remove_existing(&new, &orig);
|
|
||||||
|
|
||||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_SET, &new, NULL);
|
|
||||||
if (ret != 0) {
|
|
||||||
fprintf(stderr, "Could not add new APP entries\n");
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Remove the obsolete entries. */
|
|
||||||
dcb_app_table_remove_replaced(&orig, &tab);
|
|
||||||
ret = dcb_app_add_del(dcb, dev, DCB_CMD_IEEE_DEL, &orig, NULL);
|
|
||||||
if (ret != 0) {
|
|
||||||
fprintf(stderr, "Could not remove replaced APP entries\n");
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
dcb_app_table_fini(&new);
|
|
||||||
dcb_app_table_fini(&tab);
|
|
||||||
dcb_app_table_fini(&orig);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_cmd_app(struct dcb *dcb, int argc, char **argv)
|
|
||||||
{
|
|
||||||
if (!argc || matches(*argv, "help") == 0) {
|
|
||||||
dcb_app_help();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "show") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_app_show, dcb_app_help_show_flush);
|
|
||||||
} else if (matches(*argv, "flush") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_app_flush, dcb_app_help_show_flush);
|
|
||||||
} else if (matches(*argv, "add") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_app_add, dcb_app_help_add);
|
|
||||||
} else if (matches(*argv, "del") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_app_del, dcb_app_help_add);
|
|
||||||
} else if (matches(*argv, "replace") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_app_replace, dcb_app_help_add);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_app_help();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
235
dcb/dcb_buffer.c
235
dcb/dcb_buffer.c
|
|
@ -1,235 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0+
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <linux/dcbnl.h>
|
|
||||||
|
|
||||||
#include "dcb.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
static void dcb_buffer_help_set(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb buffer set dev STRING\n"
|
|
||||||
" [ prio-buffer PRIO-MAP ]\n"
|
|
||||||
" [ buffer-size SIZE-MAP ]\n"
|
|
||||||
"\n"
|
|
||||||
" where PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n"
|
|
||||||
" PRIO-MAPPING := { all | PRIO }:BUFFER\n"
|
|
||||||
" SIZE-MAP := [ SIZE-MAP ] SIZE-MAPPING\n"
|
|
||||||
" SIZE-MAPPING := { all | BUFFER }:INTEGER\n"
|
|
||||||
" PRIO := { 0 .. 7 }\n"
|
|
||||||
" BUFFER := { 0 .. 7 }\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_buffer_help_show(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb buffer show dev STRING\n"
|
|
||||||
" [ prio-buffer ] [ buffer-size ] [ total-size ]\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_buffer_help(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb buffer help\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
dcb_buffer_help_show();
|
|
||||||
dcb_buffer_help_set();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_buffer_parse_mapping_prio_buffer(__u32 key, char *value, void *data)
|
|
||||||
{
|
|
||||||
struct dcbnl_buffer *buffer = data;
|
|
||||||
__u8 buf;
|
|
||||||
|
|
||||||
if (get_u8(&buf, value, 0))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return dcb_parse_mapping("PRIO", key, IEEE_8021Q_MAX_PRIORITIES - 1,
|
|
||||||
"BUFFER", buf, DCBX_MAX_BUFFERS - 1,
|
|
||||||
dcb_set_u8, buffer->prio2buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_buffer_parse_mapping_buffer_size(__u32 key, char *value, void *data)
|
|
||||||
{
|
|
||||||
struct dcbnl_buffer *buffer = data;
|
|
||||||
unsigned int size;
|
|
||||||
|
|
||||||
if (get_size(&size, value)) {
|
|
||||||
fprintf(stderr, "%d:%s: Illegal value for buffer size\n", key, value);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return dcb_parse_mapping("BUFFER", key, DCBX_MAX_BUFFERS - 1,
|
|
||||||
"INTEGER", size, -1,
|
|
||||||
dcb_set_u32, buffer->buffer_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_buffer_print_total_size(const struct dcbnl_buffer *buffer)
|
|
||||||
{
|
|
||||||
print_size(PRINT_ANY, "total_size", "total-size %s ", buffer->total_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_buffer_print_prio_buffer(const struct dcbnl_buffer *buffer)
|
|
||||||
{
|
|
||||||
dcb_print_named_array("prio_buffer", "prio-buffer",
|
|
||||||
buffer->prio2buffer, ARRAY_SIZE(buffer->prio2buffer),
|
|
||||||
dcb_print_array_u8);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_buffer_print_buffer_size(const struct dcbnl_buffer *buffer)
|
|
||||||
{
|
|
||||||
size_t size = ARRAY_SIZE(buffer->buffer_size);
|
|
||||||
SPRINT_BUF(b);
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
open_json_array(PRINT_JSON, "buffer_size");
|
|
||||||
print_string(PRINT_FP, NULL, "buffer-size ", NULL);
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++) {
|
|
||||||
snprintf(b, sizeof(b), "%zd:%%s ", i);
|
|
||||||
print_size(PRINT_ANY, NULL, b, buffer->buffer_size[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
close_json_array(PRINT_JSON, "buffer_size");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_buffer_print(const struct dcbnl_buffer *buffer)
|
|
||||||
{
|
|
||||||
dcb_buffer_print_prio_buffer(buffer);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_buffer_print_buffer_size(buffer);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_buffer_print_total_size(buffer);
|
|
||||||
print_nl();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_buffer_get(struct dcb *dcb, const char *dev, struct dcbnl_buffer *buffer)
|
|
||||||
{
|
|
||||||
return dcb_get_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_buffer_set(struct dcb *dcb, const char *dev, const struct dcbnl_buffer *buffer)
|
|
||||||
{
|
|
||||||
return dcb_set_attribute(dcb, dev, DCB_ATTR_DCB_BUFFER, buffer, sizeof(*buffer));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_buffer_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct dcbnl_buffer buffer;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_buffer_help_set();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = dcb_buffer_get(dcb, dev, &buffer);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_buffer_help_set();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "prio-buffer") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true,
|
|
||||||
&dcb_buffer_parse_mapping_prio_buffer, &buffer);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid priority mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else if (matches(*argv, "buffer-size") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true,
|
|
||||||
&dcb_buffer_parse_mapping_buffer_size, &buffer);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid buffer size mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_buffer_help_set();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
return dcb_buffer_set(dcb, dev, &buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_buffer_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct dcbnl_buffer buffer;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_buffer_get(dcb, dev, &buffer);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
open_json_object(NULL);
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_buffer_print(&buffer);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_buffer_help_show();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "prio-buffer") == 0) {
|
|
||||||
dcb_buffer_print_prio_buffer(&buffer);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "buffer-size") == 0) {
|
|
||||||
dcb_buffer_print_buffer_size(&buffer);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "total-size") == 0) {
|
|
||||||
dcb_buffer_print_total_size(&buffer);
|
|
||||||
print_nl();
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_buffer_help_show();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
out:
|
|
||||||
close_json_object();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_cmd_buffer(struct dcb *dcb, int argc, char **argv)
|
|
||||||
{
|
|
||||||
if (!argc || matches(*argv, "help") == 0) {
|
|
||||||
dcb_buffer_help();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "show") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_buffer_show, dcb_buffer_help_show);
|
|
||||||
} else if (matches(*argv, "set") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_buffer_set, dcb_buffer_help_set);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_buffer_help();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
192
dcb/dcb_dcbx.c
192
dcb/dcb_dcbx.c
|
|
@ -1,192 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0+
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <linux/dcbnl.h>
|
|
||||||
|
|
||||||
#include "dcb.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
static void dcb_dcbx_help_set(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb dcbx set dev STRING\n"
|
|
||||||
" [ host | lld-managed ]\n"
|
|
||||||
" [ cee | ieee ] [ static ]\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_dcbx_help_show(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb dcbx show dev STRING\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_dcbx_help(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb dcbx help\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
dcb_dcbx_help_show();
|
|
||||||
dcb_dcbx_help_set();
|
|
||||||
}
|
|
||||||
|
|
||||||
struct dcb_dcbx_flag {
|
|
||||||
__u8 value;
|
|
||||||
const char *key_fp;
|
|
||||||
const char *key_json;
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct dcb_dcbx_flag dcb_dcbx_flags[] = {
|
|
||||||
{DCB_CAP_DCBX_HOST, "host"},
|
|
||||||
{DCB_CAP_DCBX_LLD_MANAGED, "lld-managed", "lld_managed"},
|
|
||||||
{DCB_CAP_DCBX_VER_CEE, "cee"},
|
|
||||||
{DCB_CAP_DCBX_VER_IEEE, "ieee"},
|
|
||||||
{DCB_CAP_DCBX_STATIC, "static"},
|
|
||||||
};
|
|
||||||
|
|
||||||
static void dcb_dcbx_print(__u8 dcbx)
|
|
||||||
{
|
|
||||||
int bit;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
while ((bit = ffs(dcbx))) {
|
|
||||||
bool found = false;
|
|
||||||
|
|
||||||
bit--;
|
|
||||||
for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) {
|
|
||||||
struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i];
|
|
||||||
|
|
||||||
if (flag->value == 1 << bit) {
|
|
||||||
print_bool(PRINT_JSON, flag->key_json ?: flag->key_fp,
|
|
||||||
NULL, true);
|
|
||||||
print_string(PRINT_FP, NULL, "%s ", flag->key_fp);
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!found)
|
|
||||||
fprintf(stderr, "Unknown DCBX bit %#x.\n", 1 << bit);
|
|
||||||
|
|
||||||
dcbx &= ~(1 << bit);
|
|
||||||
}
|
|
||||||
|
|
||||||
print_nl();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_dcbx_get(struct dcb *dcb, const char *dev, __u8 *dcbx)
|
|
||||||
{
|
|
||||||
__u16 payload_len;
|
|
||||||
void *payload;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
err = dcb_get_attribute_bare(dcb, DCB_CMD_IEEE_GET, dev, DCB_ATTR_DCBX,
|
|
||||||
&payload, &payload_len);
|
|
||||||
if (err != 0)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
if (payload_len != 1) {
|
|
||||||
fprintf(stderr, "DCB_ATTR_DCBX payload has size %d, expected 1.\n",
|
|
||||||
payload_len);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
*dcbx = *(__u8 *) payload;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_dcbx_set(struct dcb *dcb, const char *dev, __u8 dcbx)
|
|
||||||
{
|
|
||||||
return dcb_set_attribute_bare(dcb, DCB_CMD_SDCBX, dev, DCB_ATTR_DCBX,
|
|
||||||
&dcbx, 1, DCB_ATTR_DCBX);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_dcbx_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
__u8 dcbx = 0;
|
|
||||||
__u8 i;
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_dcbx_help_set();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_dcbx_help_set();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(dcb_dcbx_flags); i++) {
|
|
||||||
struct dcb_dcbx_flag *flag = &dcb_dcbx_flags[i];
|
|
||||||
|
|
||||||
if (matches(*argv, flag->key_fp) == 0) {
|
|
||||||
dcbx |= flag->value;
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
goto next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_dcbx_help_set();
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
next:
|
|
||||||
;
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
return dcb_dcbx_set(dcb, dev, dcbx);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_dcbx_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
__u8 dcbx;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_dcbx_get(dcb, dev, &dcbx);
|
|
||||||
if (ret != 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
while (argc > 0) {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_dcbx_help_show();
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_dcbx_help_show();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
}
|
|
||||||
|
|
||||||
open_json_object(NULL);
|
|
||||||
dcb_dcbx_print(dcbx);
|
|
||||||
close_json_object();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_cmd_dcbx(struct dcb *dcb, int argc, char **argv)
|
|
||||||
{
|
|
||||||
if (!argc || matches(*argv, "help") == 0) {
|
|
||||||
dcb_dcbx_help();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "show") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_dcbx_show, dcb_dcbx_help_show);
|
|
||||||
} else if (matches(*argv, "set") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_dcbx_set, dcb_dcbx_help_set);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_dcbx_help();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
435
dcb/dcb_ets.c
435
dcb/dcb_ets.c
|
|
@ -1,435 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0+
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <linux/dcbnl.h>
|
|
||||||
|
|
||||||
#include "dcb.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
static void dcb_ets_help_set(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb ets set dev STRING\n"
|
|
||||||
" [ willing { on | off } ]\n"
|
|
||||||
" [ { tc-tsa | reco-tc-tsa } TSA-MAP ]\n"
|
|
||||||
" [ { pg-bw | tc-bw | reco-tc-bw } BW-MAP ]\n"
|
|
||||||
" [ { prio-tc | reco-prio-tc } PRIO-MAP ]\n"
|
|
||||||
"\n"
|
|
||||||
" where TSA-MAP := [ TSA-MAP ] TSA-MAPPING\n"
|
|
||||||
" TSA-MAPPING := { all | TC }:{ strict | cbs | ets | vendor }\n"
|
|
||||||
" BW-MAP := [ BW-MAP ] BW-MAPPING\n"
|
|
||||||
" BW-MAPPING := { all | TC }:INTEGER\n"
|
|
||||||
" PRIO-MAP := [ PRIO-MAP ] PRIO-MAPPING\n"
|
|
||||||
" PRIO-MAPPING := { all | PRIO }:TC\n"
|
|
||||||
" TC := { 0 .. 7 }\n"
|
|
||||||
" PRIO := { 0 .. 7 }\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_help_show(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb ets show dev STRING\n"
|
|
||||||
" [ willing ] [ ets-cap ] [ cbs ] [ tc-tsa ]\n"
|
|
||||||
" [ reco-tc-tsa ] [ pg-bw ] [ tc-bw ] [ reco-tc-bw ]\n"
|
|
||||||
" [ prio-tc ] [ reco-prio-tc ]\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_help(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb ets help\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
dcb_ets_help_show();
|
|
||||||
dcb_ets_help_set();
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char *const tsa_names[] = {
|
|
||||||
[IEEE_8021QAZ_TSA_STRICT] = "strict",
|
|
||||||
[IEEE_8021QAZ_TSA_CB_SHAPER] = "cbs",
|
|
||||||
[IEEE_8021QAZ_TSA_ETS] = "ets",
|
|
||||||
[IEEE_8021QAZ_TSA_VENDOR] = "vendor",
|
|
||||||
};
|
|
||||||
|
|
||||||
static int dcb_ets_parse_mapping_tc_tsa(__u32 key, char *value, void *data)
|
|
||||||
{
|
|
||||||
__u8 tsa;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
tsa = parse_one_of("TSA", value, tsa_names, ARRAY_SIZE(tsa_names), &ret);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
|
|
||||||
"TSA", tsa, -1U,
|
|
||||||
dcb_set_u8, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_ets_parse_mapping_tc_bw(__u32 key, char *value, void *data)
|
|
||||||
{
|
|
||||||
__u8 bw;
|
|
||||||
|
|
||||||
if (get_u8(&bw, value, 0))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
|
|
||||||
"BW", bw, 100,
|
|
||||||
dcb_set_u8, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_ets_parse_mapping_prio_tc(unsigned int key, char *value, void *data)
|
|
||||||
{
|
|
||||||
__u8 tc;
|
|
||||||
|
|
||||||
if (get_u8(&tc, value, 0))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1,
|
|
||||||
"TC", tc, IEEE_8021QAZ_MAX_TCS - 1,
|
|
||||||
dcb_set_u8, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_print_array_tsa(const __u8 *array, size_t size)
|
|
||||||
{
|
|
||||||
dcb_print_array_kw(array, size, tsa_names, ARRAY_SIZE(tsa_names));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_willing(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
print_on_off(PRINT_ANY, "willing", "willing %s ", ets->willing);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_ets_cap(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
print_uint(PRINT_ANY, "ets_cap", "ets-cap %d ", ets->ets_cap);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_cbs(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
print_on_off(PRINT_ANY, "cbs", "cbs %s ", ets->cbs);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_tc_bw(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
dcb_print_named_array("tc_bw", "tc-bw",
|
|
||||||
ets->tc_tx_bw, ARRAY_SIZE(ets->tc_tx_bw),
|
|
||||||
dcb_print_array_u8);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_pg_bw(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
dcb_print_named_array("pg_bw", "pg-bw",
|
|
||||||
ets->tc_rx_bw, ARRAY_SIZE(ets->tc_rx_bw),
|
|
||||||
dcb_print_array_u8);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_tc_tsa(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
dcb_print_named_array("tc_tsa", "tc-tsa",
|
|
||||||
ets->tc_tsa, ARRAY_SIZE(ets->tc_tsa),
|
|
||||||
dcb_print_array_tsa);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_prio_tc(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
dcb_print_named_array("prio_tc", "prio-tc",
|
|
||||||
ets->prio_tc, ARRAY_SIZE(ets->prio_tc),
|
|
||||||
dcb_print_array_u8);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_reco_tc_bw(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
dcb_print_named_array("reco_tc_bw", "reco-tc-bw",
|
|
||||||
ets->tc_reco_bw, ARRAY_SIZE(ets->tc_reco_bw),
|
|
||||||
dcb_print_array_u8);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_reco_tc_tsa(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
dcb_print_named_array("reco_tc_tsa", "reco-tc-tsa",
|
|
||||||
ets->tc_reco_tsa, ARRAY_SIZE(ets->tc_reco_tsa),
|
|
||||||
dcb_print_array_tsa);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print_reco_prio_tc(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
dcb_print_named_array("reco_prio_tc", "reco-prio-tc",
|
|
||||||
ets->reco_prio_tc, ARRAY_SIZE(ets->reco_prio_tc),
|
|
||||||
dcb_print_array_u8);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_ets_print(const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
dcb_ets_print_willing(ets);
|
|
||||||
dcb_ets_print_ets_cap(ets);
|
|
||||||
dcb_ets_print_cbs(ets);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_ets_print_tc_bw(ets);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_ets_print_pg_bw(ets);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_ets_print_tc_tsa(ets);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_ets_print_prio_tc(ets);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_ets_print_reco_tc_bw(ets);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_ets_print_reco_tc_tsa(ets);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_ets_print_reco_prio_tc(ets);
|
|
||||||
print_nl();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_ets_get(struct dcb *dcb, const char *dev, struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_ets_validate_bw(const __u8 bw[], const __u8 tsa[], const char *what)
|
|
||||||
{
|
|
||||||
bool has_ets = false;
|
|
||||||
unsigned int total = 0;
|
|
||||||
unsigned int tc;
|
|
||||||
|
|
||||||
for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) {
|
|
||||||
if (tsa[tc] == IEEE_8021QAZ_TSA_ETS) {
|
|
||||||
has_ets = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* TC bandwidth is only intended for ETS, but 802.1Q-2018 only requires
|
|
||||||
* that the sum be 100, and individual entries 0..100. It explicitly
|
|
||||||
* notes that non-ETS TCs can have non-0 TC bandwidth during
|
|
||||||
* reconfiguration.
|
|
||||||
*/
|
|
||||||
for (tc = 0; tc < IEEE_8021QAZ_MAX_TCS; tc++) {
|
|
||||||
if (bw[tc] > 100) {
|
|
||||||
fprintf(stderr, "%d%% for TC %d of %s is not a valid bandwidth percentage, expected 0..100%%\n",
|
|
||||||
bw[tc], tc, what);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
total += bw[tc];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This is what 802.1Q-2018 requires. */
|
|
||||||
if (total == 100)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* But this requirement does not make sense for all-strict
|
|
||||||
* configurations. Anything else than 0 does not make sense: either BW
|
|
||||||
* has not been reconfigured for the all-strict allocation yet, at which
|
|
||||||
* point we expect sum of 100. Or it has already been reconfigured, at
|
|
||||||
* which point accept 0.
|
|
||||||
*/
|
|
||||||
if (!has_ets && total == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
fprintf(stderr, "Bandwidth percentages in %s sum to %d%%, expected %d%%\n",
|
|
||||||
what, total, has_ets ? 100 : 0);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_ets_set(struct dcb *dcb, const char *dev, const struct ieee_ets *ets)
|
|
||||||
{
|
|
||||||
/* Do not validate pg-bw, which is not standard and has unclear
|
|
||||||
* meaning.
|
|
||||||
*/
|
|
||||||
if (dcb_ets_validate_bw(ets->tc_tx_bw, ets->tc_tsa, "tc-bw") ||
|
|
||||||
dcb_ets_validate_bw(ets->tc_reco_bw, ets->tc_reco_tsa, "reco-tc-bw"))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_ETS, ets, sizeof(*ets));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_ets_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct ieee_ets ets;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_ets_help_set();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = dcb_ets_get(dcb, dev, &ets);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_ets_help_set();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "willing") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ets.willing = parse_on_off("willing", *argv, &ret);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
} else if (matches(*argv, "tc-tsa") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa,
|
|
||||||
ets.tc_tsa);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid tc-tsa mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else if (matches(*argv, "reco-tc-tsa") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_tsa,
|
|
||||||
ets.tc_reco_tsa);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid reco-tc-tsa mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else if (matches(*argv, "tc-bw") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
|
|
||||||
ets.tc_tx_bw);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid tc-bw mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else if (matches(*argv, "pg-bw") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
|
|
||||||
ets.tc_rx_bw);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid pg-bw mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else if (matches(*argv, "reco-tc-bw") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_tc_bw,
|
|
||||||
ets.tc_reco_bw);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid reco-tc-bw mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else if (matches(*argv, "prio-tc") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc,
|
|
||||||
ets.prio_tc);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid prio-tc mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else if (matches(*argv, "reco-prio-tc") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true, &dcb_ets_parse_mapping_prio_tc,
|
|
||||||
ets.reco_prio_tc);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid reco-prio-tc mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_ets_help_set();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
return dcb_ets_set(dcb, dev, &ets);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_ets_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct ieee_ets ets;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_ets_get(dcb, dev, &ets);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
open_json_object(NULL);
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_ets_print(&ets);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_ets_help_show();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "willing") == 0) {
|
|
||||||
dcb_ets_print_willing(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "ets-cap") == 0) {
|
|
||||||
dcb_ets_print_ets_cap(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "cbs") == 0) {
|
|
||||||
dcb_ets_print_cbs(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "tc-tsa") == 0) {
|
|
||||||
dcb_ets_print_tc_tsa(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "reco-tc-tsa") == 0) {
|
|
||||||
dcb_ets_print_reco_tc_tsa(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "tc-bw") == 0) {
|
|
||||||
dcb_ets_print_tc_bw(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "pg-bw") == 0) {
|
|
||||||
dcb_ets_print_pg_bw(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "reco-tc-bw") == 0) {
|
|
||||||
dcb_ets_print_reco_tc_bw(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "prio-tc") == 0) {
|
|
||||||
dcb_ets_print_prio_tc(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "reco-prio-tc") == 0) {
|
|
||||||
dcb_ets_print_reco_prio_tc(&ets);
|
|
||||||
print_nl();
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_ets_help_show();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
out:
|
|
||||||
close_json_object();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_cmd_ets(struct dcb *dcb, int argc, char **argv)
|
|
||||||
{
|
|
||||||
if (!argc || matches(*argv, "help") == 0) {
|
|
||||||
dcb_ets_help();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "show") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_show, dcb_ets_help_show);
|
|
||||||
} else if (matches(*argv, "set") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv, dcb_cmd_ets_set, dcb_ets_help_set);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_ets_help();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,182 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0+
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <linux/dcbnl.h>
|
|
||||||
|
|
||||||
#include "dcb.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
static void dcb_maxrate_help_set(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb maxrate set dev STRING\n"
|
|
||||||
" [ tc-maxrate RATE-MAP ]\n"
|
|
||||||
"\n"
|
|
||||||
" where RATE-MAP := [ RATE-MAP ] RATE-MAPPING\n"
|
|
||||||
" RATE-MAPPING := { all | TC }:RATE\n"
|
|
||||||
" TC := { 0 .. 7 }\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_maxrate_help_show(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb [ -i ] maxrate show dev STRING\n"
|
|
||||||
" [ tc-maxrate ]\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_maxrate_help(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb maxrate help\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
dcb_maxrate_help_show();
|
|
||||||
dcb_maxrate_help_set();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_maxrate_parse_mapping_tc_maxrate(__u32 key, char *value, void *data)
|
|
||||||
{
|
|
||||||
__u64 rate;
|
|
||||||
|
|
||||||
if (get_rate64(&rate, value))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
return dcb_parse_mapping("TC", key, IEEE_8021QAZ_MAX_TCS - 1,
|
|
||||||
"RATE", rate, -1,
|
|
||||||
dcb_set_u64, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_maxrate_print_tc_maxrate(struct dcb *dcb, const struct ieee_maxrate *maxrate)
|
|
||||||
{
|
|
||||||
size_t size = ARRAY_SIZE(maxrate->tc_maxrate);
|
|
||||||
SPRINT_BUF(b);
|
|
||||||
size_t i;
|
|
||||||
|
|
||||||
open_json_array(PRINT_JSON, "tc_maxrate");
|
|
||||||
print_string(PRINT_FP, NULL, "tc-maxrate ", NULL);
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++) {
|
|
||||||
snprintf(b, sizeof(b), "%zd:%%s ", i);
|
|
||||||
print_rate(dcb->use_iec, PRINT_ANY, NULL, b, maxrate->tc_maxrate[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
close_json_array(PRINT_JSON, "tc_maxrate");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_maxrate_print(struct dcb *dcb, const struct ieee_maxrate *maxrate)
|
|
||||||
{
|
|
||||||
dcb_maxrate_print_tc_maxrate(dcb, maxrate);
|
|
||||||
print_nl();
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_maxrate_get(struct dcb *dcb, const char *dev, struct ieee_maxrate *maxrate)
|
|
||||||
{
|
|
||||||
return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_maxrate_set(struct dcb *dcb, const char *dev, const struct ieee_maxrate *maxrate)
|
|
||||||
{
|
|
||||||
return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_MAXRATE, maxrate, sizeof(*maxrate));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_maxrate_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct ieee_maxrate maxrate;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_maxrate_help_set();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = dcb_maxrate_get(dcb, dev, &maxrate);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_maxrate_help_set();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "tc-maxrate") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true,
|
|
||||||
&dcb_maxrate_parse_mapping_tc_maxrate, &maxrate);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_maxrate_help_set();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
return dcb_maxrate_set(dcb, dev, &maxrate);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_maxrate_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct ieee_maxrate maxrate;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_maxrate_get(dcb, dev, &maxrate);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
open_json_object(NULL);
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_maxrate_print(dcb, &maxrate);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_maxrate_help_show();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "tc-maxrate") == 0) {
|
|
||||||
dcb_maxrate_print_tc_maxrate(dcb, &maxrate);
|
|
||||||
print_nl();
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_maxrate_help_show();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
out:
|
|
||||||
close_json_object();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_cmd_maxrate(struct dcb *dcb, int argc, char **argv)
|
|
||||||
{
|
|
||||||
if (!argc || matches(*argv, "help") == 0) {
|
|
||||||
dcb_maxrate_help();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "show") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_maxrate_show, dcb_maxrate_help_show);
|
|
||||||
} else if (matches(*argv, "set") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_maxrate_set, dcb_maxrate_help_set);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_maxrate_help();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
286
dcb/dcb_pfc.c
286
dcb/dcb_pfc.c
|
|
@ -1,286 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0+
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <linux/dcbnl.h>
|
|
||||||
|
|
||||||
#include "dcb.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
static void dcb_pfc_help_set(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb pfc set dev STRING\n"
|
|
||||||
" [ prio-pfc PFC-MAP ]\n"
|
|
||||||
" [ macsec-bypass { on | off } ]\n"
|
|
||||||
" [ delay INTEGER ]\n"
|
|
||||||
"\n"
|
|
||||||
" where PFC-MAP := [ PFC-MAP ] PFC-MAPPING\n"
|
|
||||||
" PFC-MAPPING := { all | TC }:PFC\n"
|
|
||||||
" TC := { 0 .. 7 }\n"
|
|
||||||
" PFC := { on | off }\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_help_show(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb [ -s ] pfc show dev STRING\n"
|
|
||||||
" [ pfc-cap ] [ prio-pfc ] [ macsec-bypass ]\n"
|
|
||||||
" [ delay ] [ requests ] [ indications ]\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_help(void)
|
|
||||||
{
|
|
||||||
fprintf(stderr,
|
|
||||||
"Usage: dcb pfc help\n"
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
dcb_pfc_help_show();
|
|
||||||
dcb_pfc_help_set();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_to_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 pfc_en)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
|
|
||||||
array[i] = !!(pfc_en & (1 << i));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_from_array(__u8 array[IEEE_8021QAZ_MAX_TCS], __u8 *pfc_en_p)
|
|
||||||
{
|
|
||||||
__u8 pfc_en = 0;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
|
|
||||||
if (array[i])
|
|
||||||
pfc_en |= 1 << i;
|
|
||||||
}
|
|
||||||
|
|
||||||
*pfc_en_p = pfc_en;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_pfc_parse_mapping_prio_pfc(__u32 key, char *value, void *data)
|
|
||||||
{
|
|
||||||
struct ieee_pfc *pfc = data;
|
|
||||||
__u8 pfc_en[IEEE_8021QAZ_MAX_TCS];
|
|
||||||
bool enabled;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
dcb_pfc_to_array(pfc_en, pfc->pfc_en);
|
|
||||||
|
|
||||||
enabled = parse_on_off("PFC", value, &ret);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = dcb_parse_mapping("PRIO", key, IEEE_8021QAZ_MAX_TCS - 1,
|
|
||||||
"PFC", enabled, -1,
|
|
||||||
dcb_set_u8, pfc_en);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
dcb_pfc_from_array(pfc_en, &pfc->pfc_en);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_print_pfc_cap(const struct ieee_pfc *pfc)
|
|
||||||
{
|
|
||||||
print_uint(PRINT_ANY, "pfc_cap", "pfc-cap %d ", pfc->pfc_cap);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_print_macsec_bypass(const struct ieee_pfc *pfc)
|
|
||||||
{
|
|
||||||
print_on_off(PRINT_ANY, "macsec_bypass", "macsec-bypass %s ", pfc->mbc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_print_delay(const struct ieee_pfc *pfc)
|
|
||||||
{
|
|
||||||
print_uint(PRINT_ANY, "delay", "delay %d ", pfc->delay);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_print_prio_pfc(const struct ieee_pfc *pfc)
|
|
||||||
{
|
|
||||||
__u8 pfc_en[IEEE_8021QAZ_MAX_TCS];
|
|
||||||
|
|
||||||
dcb_pfc_to_array(pfc_en, pfc->pfc_en);
|
|
||||||
dcb_print_named_array("prio_pfc", "prio-pfc",
|
|
||||||
pfc_en, ARRAY_SIZE(pfc_en), &dcb_print_array_on_off);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_print_requests(const struct ieee_pfc *pfc)
|
|
||||||
{
|
|
||||||
open_json_array(PRINT_JSON, "requests");
|
|
||||||
print_string(PRINT_FP, NULL, "requests ", NULL);
|
|
||||||
dcb_print_array_u64(pfc->requests, ARRAY_SIZE(pfc->requests));
|
|
||||||
close_json_array(PRINT_JSON, "requests");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_print_indications(const struct ieee_pfc *pfc)
|
|
||||||
{
|
|
||||||
open_json_array(PRINT_JSON, "indications");
|
|
||||||
print_string(PRINT_FP, NULL, "indications ", NULL);
|
|
||||||
dcb_print_array_u64(pfc->indications, ARRAY_SIZE(pfc->indications));
|
|
||||||
close_json_array(PRINT_JSON, "indications");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dcb_pfc_print(const struct dcb *dcb, const struct ieee_pfc *pfc)
|
|
||||||
{
|
|
||||||
dcb_pfc_print_pfc_cap(pfc);
|
|
||||||
dcb_pfc_print_macsec_bypass(pfc);
|
|
||||||
dcb_pfc_print_delay(pfc);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_pfc_print_prio_pfc(pfc);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
if (dcb->stats) {
|
|
||||||
dcb_pfc_print_requests(pfc);
|
|
||||||
print_nl();
|
|
||||||
|
|
||||||
dcb_pfc_print_indications(pfc);
|
|
||||||
print_nl();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_pfc_get(struct dcb *dcb, const char *dev, struct ieee_pfc *pfc)
|
|
||||||
{
|
|
||||||
return dcb_get_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_pfc_set(struct dcb *dcb, const char *dev, const struct ieee_pfc *pfc)
|
|
||||||
{
|
|
||||||
return dcb_set_attribute(dcb, dev, DCB_ATTR_IEEE_PFC, pfc, sizeof(*pfc));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_pfc_set(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct ieee_pfc pfc;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_pfc_help_set();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = dcb_pfc_get(dcb, dev, &pfc);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_pfc_help_set();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "prio-pfc") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
ret = parse_mapping(&argc, &argv, true,
|
|
||||||
&dcb_pfc_parse_mapping_prio_pfc, &pfc);
|
|
||||||
if (ret) {
|
|
||||||
fprintf(stderr, "Invalid pfc mapping %s\n", *argv);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else if (matches(*argv, "macsec-bypass") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
pfc.mbc = parse_on_off("macsec-bypass", *argv, &ret);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
} else if (matches(*argv, "delay") == 0) {
|
|
||||||
NEXT_ARG();
|
|
||||||
/* Do not support the size notations for delay.
|
|
||||||
* Delay is specified in "bit times", not bits, so
|
|
||||||
* it is not applicable. At the same time it would
|
|
||||||
* be confusing that 10Kbit does not mean 10240,
|
|
||||||
* but 1280.
|
|
||||||
*/
|
|
||||||
if (get_u16(&pfc.delay, *argv, 0)) {
|
|
||||||
fprintf(stderr, "Invalid delay `%s', expected an integer 0..65535\n",
|
|
||||||
*argv);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_pfc_help_set();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
return dcb_pfc_set(dcb, dev, &pfc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int dcb_cmd_pfc_show(struct dcb *dcb, const char *dev, int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct ieee_pfc pfc;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = dcb_pfc_get(dcb, dev, &pfc);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
open_json_object(NULL);
|
|
||||||
|
|
||||||
if (!argc) {
|
|
||||||
dcb_pfc_print(dcb, &pfc);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (matches(*argv, "help") == 0) {
|
|
||||||
dcb_pfc_help_show();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "prio-pfc") == 0) {
|
|
||||||
dcb_pfc_print_prio_pfc(&pfc);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "pfc-cap") == 0) {
|
|
||||||
dcb_pfc_print_pfc_cap(&pfc);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "macsec-bypass") == 0) {
|
|
||||||
dcb_pfc_print_macsec_bypass(&pfc);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "delay") == 0) {
|
|
||||||
dcb_pfc_print_delay(&pfc);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "requests") == 0) {
|
|
||||||
dcb_pfc_print_requests(&pfc);
|
|
||||||
print_nl();
|
|
||||||
} else if (matches(*argv, "indications") == 0) {
|
|
||||||
dcb_pfc_print_indications(&pfc);
|
|
||||||
print_nl();
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_pfc_help_show();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
} while (argc > 0);
|
|
||||||
|
|
||||||
out:
|
|
||||||
close_json_object();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dcb_cmd_pfc(struct dcb *dcb, int argc, char **argv)
|
|
||||||
{
|
|
||||||
if (!argc || matches(*argv, "help") == 0) {
|
|
||||||
dcb_pfc_help();
|
|
||||||
return 0;
|
|
||||||
} else if (matches(*argv, "show") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_pfc_show, dcb_pfc_help_show);
|
|
||||||
} else if (matches(*argv, "set") == 0) {
|
|
||||||
NEXT_ARG_FWD();
|
|
||||||
return dcb_cmd_parse_dev(dcb, argc, argv,
|
|
||||||
dcb_cmd_pfc_set, dcb_pfc_help_set);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "What is \"%s\"?\n", *argv);
|
|
||||||
dcb_pfc_help();
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,25 +1,21 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
include ../Config
|
||||||
include ../config.mk
|
|
||||||
|
|
||||||
TARGETS :=
|
|
||||||
|
|
||||||
ifeq ($(HAVE_MNL),y)
|
ifeq ($(HAVE_MNL),y)
|
||||||
|
|
||||||
DEVLINKOBJ = devlink.o mnlg.o
|
DEVLINKOBJ = devlink.o mnlg.o
|
||||||
TARGETS += devlink
|
TARGETS=devlink
|
||||||
LDLIBS += -lm
|
|
||||||
|
CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags)
|
||||||
|
LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs)
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
all: $(TARGETS) $(LIBS)
|
all: $(TARGETS) $(LIBS)
|
||||||
|
|
||||||
devlink: $(DEVLINKOBJ) $(LIBNETLINK)
|
devlink: $(DEVLINKOBJ)
|
||||||
$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
|
$(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
|
||||||
|
|
||||||
install: all
|
install: all
|
||||||
for i in $(TARGETS); \
|
install -m 0755 $(TARGETS) $(DESTDIR)$(SBINDIR)
|
||||||
do install -m 0755 $$i $(DESTDIR)$(SBINDIR); \
|
|
||||||
done
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f $(DEVLINKOBJ) $(TARGETS)
|
rm -f $(DEVLINKOBJ) $(TARGETS)
|
||||||
|
|
|
||||||
6543
devlink/devlink.c
6543
devlink/devlink.c
File diff suppressed because it is too large
Load Diff
145
devlink/mnlg.c
145
devlink/mnlg.c
|
|
@ -14,12 +14,10 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <time.h>
|
||||||
#include <libmnl/libmnl.h>
|
#include <libmnl/libmnl.h>
|
||||||
#include <linux/genetlink.h>
|
#include <linux/genetlink.h>
|
||||||
|
|
||||||
#include "libnetlink.h"
|
|
||||||
#include "mnl_utils.h"
|
|
||||||
#include "utils.h"
|
|
||||||
#include "mnlg.h"
|
#include "mnlg.h"
|
||||||
|
|
||||||
struct mnlg_socket {
|
struct mnlg_socket {
|
||||||
|
|
@ -28,13 +26,56 @@ struct mnlg_socket {
|
||||||
uint32_t id;
|
uint32_t id;
|
||||||
uint8_t version;
|
uint8_t version;
|
||||||
unsigned int seq;
|
unsigned int seq;
|
||||||
|
unsigned int portid;
|
||||||
};
|
};
|
||||||
|
|
||||||
int mnlg_socket_send(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh)
|
static struct nlmsghdr *__mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd,
|
||||||
|
uint16_t flags, uint32_t id,
|
||||||
|
uint8_t version)
|
||||||
|
{
|
||||||
|
struct nlmsghdr *nlh;
|
||||||
|
struct genlmsghdr *genl;
|
||||||
|
|
||||||
|
nlh = mnl_nlmsg_put_header(nlg->buf);
|
||||||
|
nlh->nlmsg_type = id;
|
||||||
|
nlh->nlmsg_flags = flags;
|
||||||
|
nlg->seq = time(NULL);
|
||||||
|
nlh->nlmsg_seq = nlg->seq;
|
||||||
|
|
||||||
|
genl = mnl_nlmsg_put_extra_header(nlh, sizeof(struct genlmsghdr));
|
||||||
|
genl->cmd = cmd;
|
||||||
|
genl->version = version;
|
||||||
|
|
||||||
|
return nlh;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct nlmsghdr *mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd,
|
||||||
|
uint16_t flags)
|
||||||
|
{
|
||||||
|
return __mnlg_msg_prepare(nlg, cmd, flags, nlg->id, nlg->version);
|
||||||
|
}
|
||||||
|
|
||||||
|
int mnlg_socket_send(struct mnlg_socket *nlg, const struct nlmsghdr *nlh)
|
||||||
{
|
{
|
||||||
return mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len);
|
return mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
do {
|
||||||
|
err = mnl_socket_recvfrom(nlg->nl, nlg->buf,
|
||||||
|
MNL_SOCKET_BUFFER_SIZE);
|
||||||
|
if (err <= 0)
|
||||||
|
break;
|
||||||
|
err = mnl_cb_run(nlg->buf, err, nlg->seq, nlg->portid,
|
||||||
|
data_cb, data);
|
||||||
|
} while (err > 0);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
struct group_info {
|
struct group_info {
|
||||||
bool found;
|
bool found;
|
||||||
uint32_t id;
|
uint32_t id;
|
||||||
|
|
@ -114,17 +155,15 @@ static int get_group_id_cb(const struct nlmsghdr *nlh, void *data)
|
||||||
return MNL_CB_OK;
|
return MNL_CB_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name)
|
int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name)
|
||||||
{
|
{
|
||||||
struct nlmsghdr *nlh;
|
struct nlmsghdr *nlh;
|
||||||
struct group_info group_info;
|
struct group_info group_info;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
nlh = _mnlu_gen_socket_cmd_prepare(nlg, CTRL_CMD_GETFAMILY,
|
nlh = __mnlg_msg_prepare(nlg, CTRL_CMD_GETFAMILY,
|
||||||
NLM_F_REQUEST | NLM_F_ACK,
|
NLM_F_REQUEST | NLM_F_ACK, GENL_ID_CTRL, 1);
|
||||||
GENL_ID_CTRL, 1);
|
mnl_attr_put_u32(nlh, CTRL_ATTR_FAMILY_ID, nlg->id);
|
||||||
|
|
||||||
mnl_attr_put_u16(nlh, CTRL_ATTR_FAMILY_ID, nlg->family);
|
|
||||||
|
|
||||||
err = mnlg_socket_send(nlg, nlh);
|
err = mnlg_socket_send(nlg, nlh);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
|
|
@ -132,7 +171,7 @@ int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name)
|
||||||
|
|
||||||
group_info.found = false;
|
group_info.found = false;
|
||||||
group_info.name = group_name;
|
group_info.name = group_name;
|
||||||
err = mnlu_gen_socket_recv_run(nlg, get_group_id_cb, &group_info);
|
err = mnlg_socket_recv_run(nlg, get_group_id_cb, &group_info);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
|
@ -149,7 +188,87 @@ int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mnlg_socket_get_fd(struct mnlu_gen_socket *nlg)
|
static int get_family_id_attr_cb(const struct nlattr *attr, void *data)
|
||||||
{
|
{
|
||||||
return mnl_socket_get_fd(nlg->nl);
|
const struct nlattr **tb = data;
|
||||||
|
int type = mnl_attr_get_type(attr);
|
||||||
|
|
||||||
|
if (mnl_attr_type_valid(attr, CTRL_ATTR_MAX) < 0)
|
||||||
|
return MNL_CB_ERROR;
|
||||||
|
|
||||||
|
if (type == CTRL_ATTR_FAMILY_ID &&
|
||||||
|
mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
|
||||||
|
return MNL_CB_ERROR;
|
||||||
|
tb[type] = attr;
|
||||||
|
return MNL_CB_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int get_family_id_cb(const struct nlmsghdr *nlh, void *data)
|
||||||
|
{
|
||||||
|
uint32_t *p_id = data;
|
||||||
|
struct nlattr *tb[CTRL_ATTR_MAX + 1] = {};
|
||||||
|
struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
|
||||||
|
|
||||||
|
mnl_attr_parse(nlh, sizeof(*genl), get_family_id_attr_cb, tb);
|
||||||
|
if (!tb[CTRL_ATTR_FAMILY_ID])
|
||||||
|
return MNL_CB_ERROR;
|
||||||
|
*p_id = mnl_attr_get_u16(tb[CTRL_ATTR_FAMILY_ID]);
|
||||||
|
return MNL_CB_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version)
|
||||||
|
{
|
||||||
|
struct mnlg_socket *nlg;
|
||||||
|
struct nlmsghdr *nlh;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
nlg = malloc(sizeof(*nlg));
|
||||||
|
if (!nlg)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
nlg->buf = malloc(MNL_SOCKET_BUFFER_SIZE);
|
||||||
|
if (!nlg->buf)
|
||||||
|
goto err_buf_alloc;
|
||||||
|
|
||||||
|
nlg->nl = mnl_socket_open(NETLINK_GENERIC);
|
||||||
|
if (!nlg->nl)
|
||||||
|
goto err_mnl_socket_open;
|
||||||
|
|
||||||
|
err = mnl_socket_bind(nlg->nl, 0, MNL_SOCKET_AUTOPID);
|
||||||
|
if (err < 0)
|
||||||
|
goto err_mnl_socket_bind;
|
||||||
|
|
||||||
|
nlg->portid = mnl_socket_get_portid(nlg->nl);
|
||||||
|
|
||||||
|
nlh = __mnlg_msg_prepare(nlg, CTRL_CMD_GETFAMILY,
|
||||||
|
NLM_F_REQUEST | NLM_F_ACK, GENL_ID_CTRL, 1);
|
||||||
|
mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name);
|
||||||
|
|
||||||
|
err = mnlg_socket_send(nlg, nlh);
|
||||||
|
if (err < 0)
|
||||||
|
goto err_mnlg_socket_send;
|
||||||
|
|
||||||
|
err = mnlg_socket_recv_run(nlg, get_family_id_cb, &nlg->id);
|
||||||
|
if (err < 0)
|
||||||
|
goto err_mnlg_socket_recv_run;
|
||||||
|
|
||||||
|
nlg->version = version;
|
||||||
|
return nlg;
|
||||||
|
|
||||||
|
err_mnlg_socket_recv_run:
|
||||||
|
err_mnlg_socket_send:
|
||||||
|
err_mnl_socket_bind:
|
||||||
|
mnl_socket_close(nlg->nl);
|
||||||
|
err_mnl_socket_open:
|
||||||
|
free(nlg->buf);
|
||||||
|
err_buf_alloc:
|
||||||
|
free(nlg);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mnlg_socket_close(struct mnlg_socket *nlg)
|
||||||
|
{
|
||||||
|
mnl_socket_close(nlg->nl);
|
||||||
|
free(nlg->buf);
|
||||||
|
free(nlg);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,14 @@
|
||||||
|
|
||||||
#include <libmnl/libmnl.h>
|
#include <libmnl/libmnl.h>
|
||||||
|
|
||||||
struct mnlu_gen_socket;
|
struct mnlg_socket;
|
||||||
|
|
||||||
int mnlg_socket_send(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh);
|
struct nlmsghdr *mnlg_msg_prepare(struct mnlg_socket *nlg, uint8_t cmd,
|
||||||
int mnlg_socket_group_add(struct mnlu_gen_socket *nlg, const char *group_name);
|
uint16_t flags);
|
||||||
int mnlg_socket_get_fd(struct mnlu_gen_socket *nlg);
|
int mnlg_socket_send(struct mnlg_socket *nlg, const struct nlmsghdr *nlh);
|
||||||
|
int mnlg_socket_recv_run(struct mnlg_socket *nlg, mnl_cb_t data_cb, void *data);
|
||||||
|
int mnlg_socket_group_add(struct mnlg_socket *nlg, const char *group_name);
|
||||||
|
struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version);
|
||||||
|
void mnlg_socket_close(struct mnlg_socket *nlg);
|
||||||
|
|
||||||
#endif /* _MNLG_H_ */
|
#endif /* _MNLG_H_ */
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,73 @@
|
||||||
|
PSFILES=ip-cref.ps ip-tunnels.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps tc-filters.ps
|
||||||
|
# tc-cref.ps
|
||||||
|
# api-rtnl.tex api-pmtudisc.tex api-news.tex
|
||||||
|
# iki-netdev.ps iki-neighdst.ps
|
||||||
|
|
||||||
|
|
||||||
|
LATEX=latex
|
||||||
|
DVIPS=dvips
|
||||||
|
SGML2DVI=sgml2latex
|
||||||
|
SGML2HTML=sgml2html -s 0
|
||||||
|
LPR=lpr -Zsduplex
|
||||||
|
SHELL=bash
|
||||||
|
PAGESIZE=a4
|
||||||
|
PAGESPERPAGE=2
|
||||||
|
|
||||||
|
HTMLFILES=$(subst .sgml,.html,$(shell echo *.sgml))
|
||||||
|
DVIFILES=$(subst .ps,.dvi,$(PSFILES))
|
||||||
|
PDFFILES=$(subst .ps,.pdf,$(PSFILES))
|
||||||
|
|
||||||
|
|
||||||
|
all: pstwocol
|
||||||
|
|
||||||
|
pstwocol: $(PSFILES)
|
||||||
|
|
||||||
|
html: $(HTMLFILES)
|
||||||
|
|
||||||
|
dvi: $(DVIFILES)
|
||||||
|
|
||||||
|
pdf: $(PDFFILES)
|
||||||
|
|
||||||
|
print: $(PSFILES)
|
||||||
|
$(LPR) $(PSFILES)
|
||||||
|
|
||||||
|
%.tex: %.sgml
|
||||||
|
$(SGML2DVI) --output=tex $<
|
||||||
|
|
||||||
|
%.dvi: %.sgml
|
||||||
|
$(SGML2DVI) --output=dvi $<
|
||||||
|
|
||||||
|
%.dvi: %.tex
|
||||||
|
@set -e; pass=2; echo "Running LaTeX $<"; \
|
||||||
|
while [ `$(LATEX) $< </dev/null 2>&1 | \
|
||||||
|
grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
|
||||||
|
if [ $$pass -gt 3 ]; then \
|
||||||
|
echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
|
||||||
|
fi; \
|
||||||
|
echo "Re-running LaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
|
||||||
|
done
|
||||||
|
|
||||||
|
%.pdf: %.tex
|
||||||
|
@set -e; pass=2; echo "Running pdfLaTeX $<"; \
|
||||||
|
while [ `pdflatex $< </dev/null 2>&1 | \
|
||||||
|
grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
|
||||||
|
if [ $$pass -gt 3 ]; then \
|
||||||
|
echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
|
||||||
|
fi; \
|
||||||
|
echo "Re-running pdfLaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
|
||||||
|
done
|
||||||
|
#%.pdf: %.ps
|
||||||
|
# ps2pdf $<
|
||||||
|
|
||||||
|
%.ps: %.dvi
|
||||||
|
$(DVIPS) $< -o $@
|
||||||
|
|
||||||
|
%.html: %.sgml
|
||||||
|
$(SGML2HTML) $<
|
||||||
|
|
||||||
|
install:
|
||||||
|
install -m 0644 $(shell echo *.tex) $(DESTDIR)$(DOCDIR)
|
||||||
|
install -m 0644 $(shell echo *.sgml) $(DESTDIR)$(DOCDIR)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f *.aux *.log *.toc $(PSFILES) $(DVIFILES) *.html *.pdf
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
Partially finished work.
|
||||||
|
|
||||||
|
1. User Reference manuals.
|
||||||
|
1.1 IP Command reference (ip-cref.tex, published)
|
||||||
|
1.2 TC Command reference (tc-cref.tex)
|
||||||
|
1.3 IP tunnels (ip-tunnels.tex, published)
|
||||||
|
|
||||||
|
2. Linux-2.2 Networking API
|
||||||
|
2.1 RTNETLINK (api-rtnl.tex)
|
||||||
|
2.2 Path MTU Discovery (api-pmtudisc.tex)
|
||||||
|
2.3 IPv6 Flow Labels (api-ip6-flowlabels.tex, published)
|
||||||
|
2.4 Miscellaneous extensions (api-misc.tex)
|
||||||
|
|
||||||
|
3. Linux-2.2 Networking Intra-Kernel Interfaces
|
||||||
|
3.1 NetDev --- Networking Devices and netdev... (iki-netdev.tex)
|
||||||
|
3.2 Neighbour cache and destination cache. (iki-neighdst.tex)
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
\def\Draft{020116}
|
||||||
|
|
@ -6,8 +6,8 @@ What is it?
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
An extension to the filtering/classification architecture of Linux Traffic
|
An extension to the filtering/classification architecture of Linux Traffic
|
||||||
Control.
|
Control.
|
||||||
Up to 2.6.8 the only action that could be "attached" to a filter was policing.
|
Up to 2.6.8 the only action that could be "attached" to a filter was policing.
|
||||||
i.e you could say something like:
|
i.e you could say something like:
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
@ -17,11 +17,11 @@ tc filter add dev lo parent ffff: protocol ip prio 10 u32 match ip src \
|
||||||
|
|
||||||
which implies "if a packet is seen on the ingress of the lo device with
|
which implies "if a packet is seen on the ingress of the lo device with
|
||||||
a source IP address of 127.0.0.1/32 we give it a classification id of 1:1 and
|
a source IP address of 127.0.0.1/32 we give it a classification id of 1:1 and
|
||||||
we execute a policing action which rate limits its bandwidth utilization
|
we execute a policing action which rate limits its bandwidth utilization
|
||||||
to 1.5Mbps".
|
to 1.5Mbps".
|
||||||
|
|
||||||
The new extensions allow for more than just policing actions to be added.
|
The new extensions allow for more than just policing actions to be added.
|
||||||
They are also fully backward compatible. If you have a kernel that doesn't
|
They are also fully backward compatible. If you have a kernel that doesnt
|
||||||
understand them, then the effect is null i.e if you have a newer tc
|
understand them, then the effect is null i.e if you have a newer tc
|
||||||
but older kernel, the actions are not installed. Likewise if you
|
but older kernel, the actions are not installed. Likewise if you
|
||||||
have a newer kernel but older tc, obviously the tc will use current
|
have a newer kernel but older tc, obviously the tc will use current
|
||||||
|
|
@ -29,9 +29,9 @@ syntax which will work fine. Of course to get the required effect you need
|
||||||
both newer tc and kernel. If you are reading this you have the
|
both newer tc and kernel. If you are reading this you have the
|
||||||
right tc ;->
|
right tc ;->
|
||||||
|
|
||||||
A side effect is that we can now get stateless firewalling to work with tc.
|
A side effect is that we can now get stateless firewalling to work with tc.
|
||||||
Essentially this is now an alternative to iptables.
|
Essentially this is now an alternative to iptables.
|
||||||
I won't go into details of my dislike for iptables at times, but
|
I wont go into details of my dislike for iptables at times, but
|
||||||
scalability is one of the main issues; however, if you need stateful
|
scalability is one of the main issues; however, if you need stateful
|
||||||
classification - use netfilter (for now).
|
classification - use netfilter (for now).
|
||||||
|
|
||||||
|
|
@ -61,7 +61,7 @@ tc filter add dev lo parent 1:0 protocol ip prio 10 u32 \
|
||||||
match ip src 127.0.0.1/32 flowid 1:1 \
|
match ip src 127.0.0.1/32 flowid 1:1 \
|
||||||
action police mtu 4000 rate 1500kbit burst 90k
|
action police mtu 4000 rate 1500kbit burst 90k
|
||||||
|
|
||||||
" generic Actions" (gact) at the moment are:
|
" generic Actions" (gact) at the moment are:
|
||||||
{ drop, pass, reclassify, continue}
|
{ drop, pass, reclassify, continue}
|
||||||
(If you have others, no listed here give me a reason and we will add them)
|
(If you have others, no listed here give me a reason and we will add them)
|
||||||
+drop says to drop the packet
|
+drop says to drop the packet
|
||||||
|
|
@ -77,7 +77,7 @@ iptable target. I have only tested with mangler targets up to now.
|
||||||
In terms of hooks:
|
In terms of hooks:
|
||||||
*ingress is mapped to pre-routing hook
|
*ingress is mapped to pre-routing hook
|
||||||
*egress is mapped to post-routing hook
|
*egress is mapped to post-routing hook
|
||||||
I don't see much value in the other hooks, if you see it and email me good
|
I dont see much value in the other hooks, if you see it and email me good
|
||||||
reasons, the addition is trivial.
|
reasons, the addition is trivial.
|
||||||
|
|
||||||
Example syntax for iptables targets usage becomes:
|
Example syntax for iptables targets usage becomes:
|
||||||
|
|
@ -93,43 +93,43 @@ decimal 12, then use flowid 1:c.
|
||||||
|
|
||||||
3) A feature i call pipe
|
3) A feature i call pipe
|
||||||
The motivation is derived from Unix pipe mechanism but applied to packets.
|
The motivation is derived from Unix pipe mechanism but applied to packets.
|
||||||
Essentially take a matching packet and pass it through
|
Essentially take a matching packet and pass it through
|
||||||
action1 | action2 | action3 etc.
|
action1 | action2 | action3 etc.
|
||||||
You could do something similar to this with the tc policer and the "continue"
|
You could do something similar to this with the tc policer and the "continue"
|
||||||
operator but this rather restricts it to just the policer and requires
|
operator but this rather restricts it to just the policer and requires
|
||||||
multiple rules (and lookups, hence quiet inefficient);
|
multiple rules (and lookups, hence quiet inefficient);
|
||||||
|
|
||||||
as an example -- and please note that this is just an example _not_ The
|
as an example -- and please note that this is just an example _not_ The
|
||||||
Word Youve Been Waiting For (yes i have had problems giving examples
|
Word Youve Been Waiting For (yes i have had problems giving examples
|
||||||
which ended becoming dogma in documents and people modifying them a little
|
which ended becoming dogma in documents and people modifying them a little
|
||||||
to look clever);
|
to look clever);
|
||||||
|
|
||||||
i selected the metering rates to be small so that i can show better how
|
i selected the metering rates to be small so that i can show better how
|
||||||
things work.
|
things work.
|
||||||
|
|
||||||
|
The script below does the following:
|
||||||
|
- an incoming packet from 10.0.0.21 is first given a firewall mark of 1.
|
||||||
|
|
||||||
The script below does the following:
|
- It is then metered to make sure it does not exceed its allocated rate of
|
||||||
- an incoming packet from 10.0.0.21 is first given a firewall mark of 1.
|
1Kbps. If it doesnt exceed rate, this is where we terminate action execution.
|
||||||
|
|
||||||
- It is then metered to make sure it does not exceed its allocated rate of
|
- If it does exceed its rate, its "color" changes to a mark of 2 and it is
|
||||||
1Kbps. If it doesn't exceed rate, this is where we terminate action execution.
|
|
||||||
|
|
||||||
- If it does exceed its rate, its "color" changes to a mark of 2 and it is
|
|
||||||
then passed through a second meter.
|
then passed through a second meter.
|
||||||
|
|
||||||
-The second meter is shared across all flows on that device [i am surpised
|
-The second meter is shared across all flows on that device [i am suprised
|
||||||
that this seems to be not a well know feature of the policer; Bert was telling
|
that this seems to be not a well know feature of the policer; Bert was telling
|
||||||
me that someone was writing a qdisc just to do sharing across multiple devices;
|
me that someone was writing a qdisc just to do sharing across multiple devices;
|
||||||
it must be the summer heat again; weve had someone doing that every year around
|
it must be the summer heat again; weve had someone doing that every year around
|
||||||
summer -- the key to sharing is to use a operator "index" in your policer
|
summer -- the key to sharing is to use a operator "index" in your policer
|
||||||
rules (example "index 20"). All your rules have to use the same index to
|
rules (example "index 20"). All your rules have to use the same index to
|
||||||
share.]
|
share.]
|
||||||
|
|
||||||
-If the second meter is exceeded the color of the flow changes further to 3.
|
-If the second meter is exceeded the color of the flow changes further to 3.
|
||||||
|
|
||||||
-We then pass the packet to another meter which is shared across all devices
|
-We then pass the packet to another meter which is shared across all devices
|
||||||
in the system. If this meter is exceeded we drop the packet.
|
in the system. If this meter is exceeded we drop the packet.
|
||||||
|
|
||||||
Note the mark can be used further up the system to do things like policy
|
Note the mark can be used further up the system to do things like policy
|
||||||
or more interesting things on the egress.
|
or more interesting things on the egress.
|
||||||
|
|
||||||
------------------ cut here -------------------------------
|
------------------ cut here -------------------------------
|
||||||
|
|
@ -145,7 +145,7 @@ u32 match ip src 10.0.0.21/32 flowid 1:15 \
|
||||||
action ipt -j mark --set-mark 1 index 2 \
|
action ipt -j mark --set-mark 1 index 2 \
|
||||||
#
|
#
|
||||||
# then pass it through a policer which allows 1kbps; if the flow
|
# then pass it through a policer which allows 1kbps; if the flow
|
||||||
# doesn't exceed that rate, this is where we stop, if it exceeds we
|
# doesnt exceed that rate, this is where we stop, if it exceeds we
|
||||||
# pipe the packet to the next action
|
# pipe the packet to the next action
|
||||||
action police rate 1kbit burst 9k pipe \
|
action police rate 1kbit burst 9k pipe \
|
||||||
#
|
#
|
||||||
|
|
@ -161,31 +161,31 @@ action ipt -j mark --set-mark 3 \
|
||||||
# and then attempt to borrow from a meter used by all devices in the
|
# and then attempt to borrow from a meter used by all devices in the
|
||||||
# system. Should this be exceeded, drop the packet on the floor.
|
# system. Should this be exceeded, drop the packet on the floor.
|
||||||
action police index 20 mtu 5000 rate 1kbit burst 90k drop
|
action police index 20 mtu 5000 rate 1kbit burst 90k drop
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
||||||
Now lets see the actions installed with
|
Now lets see the actions installed with
|
||||||
"tc filter show parent ffff: dev eth0"
|
"tc filter show parent ffff: dev eth0"
|
||||||
|
|
||||||
-------- output -----------
|
-------- output -----------
|
||||||
jroot# tc filter show parent ffff: dev eth0
|
jroot# tc filter show parent ffff: dev eth0
|
||||||
filter protocol ip pref 1 u32
|
filter protocol ip pref 1 u32
|
||||||
filter protocol ip pref 1 u32 fh 800: ht divisor 1
|
filter protocol ip pref 1 u32 fh 800: ht divisor 1
|
||||||
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15
|
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15
|
||||||
|
|
||||||
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||||
target MARK set 0x1 index 2
|
target MARK set 0x1 index 2
|
||||||
|
|
||||||
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
|
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
|
||||||
|
|
||||||
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
|
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||||
target MARK set 0x2 index 1
|
target MARK set 0x2 index 1
|
||||||
|
|
||||||
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
|
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
|
||||||
|
|
||||||
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
|
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||||
target MARK set 0x3 index 3
|
target MARK set 0x3 index 3
|
||||||
|
|
||||||
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
|
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
|
||||||
|
|
||||||
match 0a000015/ffffffff at 12
|
match 0a000015/ffffffff at 12
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
@ -209,31 +209,31 @@ Now lets take a look at the stats with "tc -s filter show parent ffff: dev eth0"
|
||||||
|
|
||||||
--------------
|
--------------
|
||||||
jroot# tc -s filter show parent ffff: dev eth0
|
jroot# tc -s filter show parent ffff: dev eth0
|
||||||
filter protocol ip pref 1 u32
|
filter protocol ip pref 1 u32
|
||||||
filter protocol ip pref 1 u32 fh 800: ht divisor 1
|
filter protocol ip pref 1 u32 fh 800: ht divisor 1
|
||||||
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
|
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
|
||||||
5
|
5
|
||||||
|
|
||||||
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||||
target MARK set 0x1 index 2
|
target MARK set 0x1 index 2
|
||||||
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0)
|
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0)
|
||||||
|
|
||||||
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
|
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
|
||||||
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122)
|
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122)
|
||||||
|
|
||||||
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
|
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||||
target MARK set 0x2 index 1
|
target MARK set 0x2 index 1
|
||||||
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0)
|
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0)
|
||||||
|
|
||||||
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
|
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
|
||||||
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945)
|
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945)
|
||||||
|
|
||||||
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
|
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||||
target MARK set 0x3 index 3
|
target MARK set 0x3 index 3
|
||||||
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0)
|
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0)
|
||||||
|
|
||||||
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
|
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
|
||||||
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437)
|
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437)
|
||||||
|
|
||||||
match 0a000015/ffffffff at 12
|
match 0a000015/ffffffff at 12
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
@ -241,7 +241,7 @@ filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
|
||||||
Neat, eh?
|
Neat, eh?
|
||||||
|
|
||||||
|
|
||||||
Want to write an action module?
|
Wanna write an action module?
|
||||||
------------------------------
|
------------------------------
|
||||||
Its easy. Either look at the code or send me email. I will document at
|
Its easy. Either look at the code or send me email. I will document at
|
||||||
some point; will also accept documentation.
|
some point; will also accept documentation.
|
||||||
|
|
@ -254,3 +254,4 @@ At the moment the focus has been on getting the architecture in place.
|
||||||
Expect new things in the spurious time i have to work on this
|
Expect new things in the spurious time i have to work on this
|
||||||
(particularly around end of year when i have typically get time off
|
(particularly around end of year when i have typically get time off
|
||||||
from work).
|
from work).
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,16 @@
|
||||||
|
|
||||||
gact <ACTION> [RAND] [INDEX]
|
gact <ACTION> [RAND] [INDEX]
|
||||||
|
|
||||||
Where:
|
Where:
|
||||||
ACTION := reclassify | drop | continue | pass | ok
|
ACTION := reclassify | drop | continue | pass | ok
|
||||||
RAND := random <RANDTYPE> <ACTION> <VAL>
|
RAND := random <RANDTYPE> <ACTION> <VAL>
|
||||||
RANDTYPE := netrand | determ
|
RANDTYPE := netrand | determ
|
||||||
VAL : = value not exceeding 10000
|
VAL : = value not exceeding 10000
|
||||||
INDEX := index value used
|
INDEX := index value used
|
||||||
|
|
||||||
ACTION semantics
|
ACTION semantics
|
||||||
- pass and ok are equivalent to accept
|
- pass and ok are equivalent to accept
|
||||||
- continue allows one to restart classification lookup
|
- continue allows to restart classification lookup
|
||||||
- drop drops packets
|
- drop drops packets
|
||||||
- reclassify implies continue classification where we left off
|
- reclassify implies continue classification where we left off
|
||||||
|
|
||||||
|
|
@ -42,14 +42,14 @@ filter u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16 (rule hit 32 suc
|
||||||
random type none pass val 0
|
random type none pass val 0
|
||||||
index 1 ref 1 bind 1 installed 59 sec used 35 sec
|
index 1 ref 1 bind 1 installed 59 sec used 35 sec
|
||||||
Sent 1680 bytes 20 pkts (dropped 20, overlimits 0 )
|
Sent 1680 bytes 20 pkts (dropped 20, overlimits 0 )
|
||||||
|
|
||||||
----
|
----
|
||||||
|
|
||||||
# example 2
|
# example 2
|
||||||
#allow 1 out 10 randomly using the netrand generator
|
#allow 1 out 10 randomly using the netrand generator
|
||||||
tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
|
tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
|
||||||
10.0.0.9/32 flowid 1:16 action drop random netrand ok 10
|
10.0.0.9/32 flowid 1:16 action drop random netrand ok 10
|
||||||
|
|
||||||
ping -c 20 10.0.0.9
|
ping -c 20 10.0.0.9
|
||||||
|
|
||||||
----
|
----
|
||||||
|
|
@ -59,14 +59,14 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
|
||||||
random type netrand pass val 10
|
random type netrand pass val 10
|
||||||
index 5 ref 1 bind 1 installed 49 sec used 25 sec
|
index 5 ref 1 bind 1 installed 49 sec used 25 sec
|
||||||
Sent 1680 bytes 20 pkts (dropped 16, overlimits 0 )
|
Sent 1680 bytes 20 pkts (dropped 16, overlimits 0 )
|
||||||
|
|
||||||
--------
|
--------
|
||||||
#alternative: deterministically accept every second packet
|
#alternative: deterministically accept every second packet
|
||||||
tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
|
tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
|
||||||
10.0.0.9/32 flowid 1:16 action drop random determ ok 2
|
10.0.0.9/32 flowid 1:16 action drop random determ ok 2
|
||||||
|
|
||||||
ping -c 20 10.0.0.9
|
ping -c 20 10.0.0.9
|
||||||
|
|
||||||
tc -s filter show parent ffff: dev eth0
|
tc -s filter show parent ffff: dev eth0
|
||||||
-----
|
-----
|
||||||
filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1filter protocol ip pref 6 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16 (rule hit 20 success 20)
|
filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1filter protocol ip pref 6 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16 (rule hit 20 success 20)
|
||||||
|
|
@ -76,3 +76,4 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
|
||||||
index 4 ref 1 bind 1 installed 118 sec used 82 sec
|
index 4 ref 1 bind 1 installed 118 sec used 82 sec
|
||||||
Sent 1680 bytes 20 pkts (dropped 10, overlimits 0 )
|
Sent 1680 bytes 20 pkts (dropped 10, overlimits 0 )
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,47 +6,47 @@ with a _lot_ less code.
|
||||||
Known IMQ/IFB USES
|
Known IMQ/IFB USES
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
As far as i know the reasons listed below is why people use IMQ.
|
As far as i know the reasons listed below is why people use IMQ.
|
||||||
It would be nice to know of anything else that i missed.
|
It would be nice to know of anything else that i missed.
|
||||||
|
|
||||||
1) qdiscs/policies that are per device as opposed to system wide.
|
1) qdiscs/policies that are per device as opposed to system wide.
|
||||||
IFB allows for sharing.
|
IFB allows for sharing.
|
||||||
|
|
||||||
2) Allows for queueing incoming traffic for shaping instead of
|
2) Allows for queueing incoming traffic for shaping instead of
|
||||||
dropping. I am not aware of any study that shows policing is
|
dropping. I am not aware of any study that shows policing is
|
||||||
worse than shaping in achieving the end goal of rate control.
|
worse than shaping in achieving the end goal of rate control.
|
||||||
I would be interested if anyone is experimenting.
|
I would be interested if anyone is experimenting.
|
||||||
|
|
||||||
3) Very interesting use: if you are serving p2p you may want to give
|
3) Very interesting use: if you are serving p2p you may wanna give
|
||||||
preference to your own locally originated traffic (when responses come back)
|
preference to your own localy originated traffic (when responses come back)
|
||||||
vs someone using your system to do bittorent. So QoSing based on state
|
vs someone using your system to do bittorent. So QoSing based on state
|
||||||
comes in as the solution. What people did to achieve this was stick
|
comes in as the solution. What people did to achive this was stick
|
||||||
the IMQ somewhere prelocal hook.
|
the IMQ somewhere prelocal hook.
|
||||||
I think this is a pretty neat feature to have in Linux in general.
|
I think this is a pretty neat feature to have in Linux in general.
|
||||||
(i.e not just for IMQ).
|
(i.e not just for IMQ).
|
||||||
But i won't go back to putting netfilter hooks in the device to satisfy
|
But i wont go back to putting netfilter hooks in the device to satisfy
|
||||||
this. I also don't think its worth it hacking ifb some more to be
|
this. I also dont think its worth it hacking ifb some more to be
|
||||||
aware of say L3 info and play ip rule tricks to achieve this.
|
aware of say L3 info and play ip rule tricks to achieve this.
|
||||||
--> Instead the plan is to have a conntrack related action. This action will
|
--> Instead the plan is to have a contrack related action. This action will
|
||||||
selectively either query/create conntrack state on incoming packets.
|
selectively either query/create contrack state on incoming packets.
|
||||||
Packets could then be redirected to ifb based on what happens -> eg
|
Packets could then be redirected to ifb based on what happens -> eg
|
||||||
on incoming packets; if we find they are of known state we could send to
|
on incoming packets; if we find they are of known state we could send to
|
||||||
a different queue than one which didn't have existing state. This
|
a different queue than one which didnt have existing state. This
|
||||||
all however is dependent on whatever rules the admin enters.
|
all however is dependent on whatever rules the admin enters.
|
||||||
|
|
||||||
At the moment this 3rd function does not exist yet. I have decided that
|
At the moment this 3rd function does not exist yet. I have decided that
|
||||||
instead of sitting on the patch for another year, to release it and then
|
instead of sitting on the patch for another year, to release it and then
|
||||||
if there is pressure i will add this feature.
|
if theres pressure i will add this feature.
|
||||||
|
|
||||||
An example, to provide functionality that most people use IMQ for below:
|
An example, to provide functionality that most people use IMQ for below:
|
||||||
|
|
||||||
--------
|
--------
|
||||||
export TC="/sbin/tc"
|
export TC="/sbin/tc"
|
||||||
|
|
||||||
$TC qdisc add dev ifb0 root handle 1: prio
|
$TC qdisc add dev ifb0 root handle 1: prio
|
||||||
$TC qdisc add dev ifb0 parent 1:1 handle 10: sfq
|
$TC qdisc add dev ifb0 parent 1:1 handle 10: sfq
|
||||||
$TC qdisc add dev ifb0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000
|
$TC qdisc add dev ifb0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000
|
||||||
$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq
|
$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq
|
||||||
$TC filter add dev ifb0 protocol ip pref 1 parent 1: handle 1 fw classid 1:1
|
$TC filter add dev ifb0 protocol ip pref 1 parent 1: handle 1 fw classid 1:1
|
||||||
$TC filter add dev ifb0 protocol ip pref 2 parent 1: handle 2 fw classid 1:2
|
$TC filter add dev ifb0 protocol ip pref 2 parent 1: handle 2 fw classid 1:2
|
||||||
|
|
||||||
|
|
@ -54,7 +54,7 @@ ifconfig ifb0 up
|
||||||
|
|
||||||
$TC qdisc add dev eth0 ingress
|
$TC qdisc add dev eth0 ingress
|
||||||
|
|
||||||
# redirect all IP packets arriving in eth0 to ifb0
|
# redirect all IP packets arriving in eth0 to ifb0
|
||||||
# use mark 1 --> puts them onto class 1:1
|
# use mark 1 --> puts them onto class 1:1
|
||||||
$TC filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
|
$TC filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
|
||||||
match u32 0 0 flowid 1:1 \
|
match u32 0 0 flowid 1:1 \
|
||||||
|
|
@ -77,44 +77,44 @@ PING 10.22 (10.0.0.22): 56 data bytes
|
||||||
--- 10.22 ping statistics ---
|
--- 10.22 ping statistics ---
|
||||||
3 packets transmitted, 3 packets received, 0% packet loss
|
3 packets transmitted, 3 packets received, 0% packet loss
|
||||||
round-trip min/avg/max = 0.6/1.3/2.8 ms
|
round-trip min/avg/max = 0.6/1.3/2.8 ms
|
||||||
[root@jzny action-tests]#
|
[root@jzny action-tests]#
|
||||||
-----
|
-----
|
||||||
Now look at some stats:
|
Now look at some stats:
|
||||||
|
|
||||||
---
|
---
|
||||||
[root@jmandrake]:~# $TC -s filter show parent ffff: dev eth0
|
[root@jmandrake]:~# $TC -s filter show parent ffff: dev eth0
|
||||||
filter protocol ip pref 10 u32
|
filter protocol ip pref 10 u32
|
||||||
filter protocol ip pref 10 u32 fh 800: ht divisor 1
|
filter protocol ip pref 10 u32 fh 800: ht divisor 1
|
||||||
filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
|
filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
|
||||||
match 00000000/00000000 at 0
|
match 00000000/00000000 at 0
|
||||||
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
|
||||||
target MARK set 0x1
|
target MARK set 0x1
|
||||||
index 1 ref 1 bind 1 installed 4195sec used 27sec
|
index 1 ref 1 bind 1 installed 4195sec used 27sec
|
||||||
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
|
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
|
||||||
|
|
||||||
action order 2: mirred (Egress Redirect to device ifb0) stolen
|
action order 2: mirred (Egress Redirect to device ifb0) stolen
|
||||||
index 1 ref 1 bind 1 installed 165 sec used 27 sec
|
index 1 ref 1 bind 1 installed 165 sec used 27 sec
|
||||||
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
|
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
|
||||||
|
|
||||||
[root@jmandrake]:~# $TC -s qdisc
|
[root@jmandrake]:~# $TC -s qdisc
|
||||||
qdisc sfq 30: dev ifb0 limit 128p quantum 1514b
|
qdisc sfq 30: dev ifb0 limit 128p quantum 1514b
|
||||||
Sent 0 bytes 0 pkts (dropped 0, overlimits 0)
|
Sent 0 bytes 0 pkts (dropped 0, overlimits 0)
|
||||||
qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s
|
qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s
|
||||||
Sent 210 bytes 3 pkts (dropped 0, overlimits 0)
|
Sent 210 bytes 3 pkts (dropped 0, overlimits 0)
|
||||||
qdisc sfq 10: dev ifb0 limit 128p quantum 1514b
|
qdisc sfq 10: dev ifb0 limit 128p quantum 1514b
|
||||||
Sent 294 bytes 3 pkts (dropped 0, overlimits 0)
|
Sent 294 bytes 3 pkts (dropped 0, overlimits 0)
|
||||||
qdisc prio 1: dev ifb0 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
|
qdisc prio 1: dev ifb0 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
|
||||||
Sent 504 bytes 6 pkts (dropped 0, overlimits 0)
|
Sent 504 bytes 6 pkts (dropped 0, overlimits 0)
|
||||||
qdisc ingress ffff: dev eth0 ----------------
|
qdisc ingress ffff: dev eth0 ----------------
|
||||||
Sent 308 bytes 5 pkts (dropped 0, overlimits 0)
|
Sent 308 bytes 5 pkts (dropped 0, overlimits 0)
|
||||||
|
|
||||||
[root@jmandrake]:~# ifconfig ifb0
|
[root@jmandrake]:~# ifconfig ifb0
|
||||||
ifb0 Link encap:Ethernet HWaddr 00:00:00:00:00:00
|
ifb0 Link encap:Ethernet HWaddr 00:00:00:00:00:00
|
||||||
inet6 addr: fe80::200:ff:fe00:0/64 Scope:Link
|
inet6 addr: fe80::200:ff:fe00:0/64 Scope:Link
|
||||||
UP BROADCAST RUNNING NOARP MTU:1500 Metric:1
|
UP BROADCAST RUNNING NOARP MTU:1500 Metric:1
|
||||||
RX packets:6 errors:0 dropped:3 overruns:0 frame:0
|
RX packets:6 errors:0 dropped:3 overruns:0 frame:0
|
||||||
TX packets:3 errors:0 dropped:0 overruns:0 carrier:0
|
TX packets:3 errors:0 dropped:0 overruns:0 carrier:0
|
||||||
collisions:0 txqueuelen:32
|
collisions:0 txqueuelen:32
|
||||||
RX bytes:504 (504.0 b) TX bytes:252 (252.0 b)
|
RX bytes:504 (504.0 b) TX bytes:252 (252.0 b)
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,10 @@ flow to be mirrored. High end switches typically can select based
|
||||||
on more than just a port (eg a 5 tuple classifier). They may also be
|
on more than just a port (eg a 5 tuple classifier). They may also be
|
||||||
capable of redirecting.
|
capable of redirecting.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
|
|
||||||
mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME>
|
mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME>
|
||||||
where:
|
where:
|
||||||
DIRECTION := <ingress | egress>
|
DIRECTION := <ingress | egress>
|
||||||
ACTION := <mirror | redirect>
|
ACTION := <mirror | redirect>
|
||||||
INDEX is the specific policy instance id
|
INDEX is the specific policy instance id
|
||||||
|
|
@ -18,7 +18,7 @@ DEVICENAME is the devicename
|
||||||
|
|
||||||
Direction:
|
Direction:
|
||||||
- Ingress is not supported at the moment. It will be in the
|
- Ingress is not supported at the moment. It will be in the
|
||||||
future as well as mirror/redirecting to a socket.
|
future as well as mirror/redirecting to a socket.
|
||||||
|
|
||||||
Action:
|
Action:
|
||||||
- Mirror takes a copy of the packet and sends it to specified
|
- Mirror takes a copy of the packet and sends it to specified
|
||||||
|
|
@ -26,17 +26,17 @@ dev ("port" in ethernet switch/bridging terminology)
|
||||||
- redirect
|
- redirect
|
||||||
steals the packet and redirects to specified destination dev.
|
steals the packet and redirects to specified destination dev.
|
||||||
|
|
||||||
What NOT to do if you don't want your machine to crash:
|
What NOT to do if you dont want your machine to crash:
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
|
|
||||||
Do not create loops!
|
Do not create loops!
|
||||||
Loops are not hard to create in the egress qdiscs.
|
Loops are not hard to create in the egress qdiscs.
|
||||||
|
|
||||||
Here are simple rules to follow if you don't want to get
|
Here are simple rules to follow if you dont want to get
|
||||||
hurt:
|
hurt:
|
||||||
A) Do not have the same packet go to same netdevice twice
|
A) Do not have the same packet go to same netdevice twice
|
||||||
in a single graph of policies. Your machine will just hang!
|
in a single graph of policies. Your machine will just hang!
|
||||||
This is design intent _not a bug_ to teach you some lessons.
|
This is design intent _not a bug_ to teach you some lessons.
|
||||||
|
|
||||||
In the future if there are easy ways to do this in the kernel
|
In the future if there are easy ways to do this in the kernel
|
||||||
without affecting other packets not interested in this feature
|
without affecting other packets not interested in this feature
|
||||||
|
|
@ -51,7 +51,7 @@ B) Do not redirect from one IFB device to another.
|
||||||
Remember that IFB is a very specialized case of packet redirecting
|
Remember that IFB is a very specialized case of packet redirecting
|
||||||
device. Instead of redirecting it puts packets at the exact spot
|
device. Instead of redirecting it puts packets at the exact spot
|
||||||
on the stack it found them from.
|
on the stack it found them from.
|
||||||
Redirecting from ifbX->ifbY will actually not crash your machine but your
|
Redirecting from ifbX->ifbY will actually not crash your machine but your
|
||||||
packets will all be dropped (this is much simpler to detect
|
packets will all be dropped (this is much simpler to detect
|
||||||
and resolve and is only affecting users of ifb as opposed to the
|
and resolve and is only affecting users of ifb as opposed to the
|
||||||
whole stack).
|
whole stack).
|
||||||
|
|
@ -64,7 +64,7 @@ Some examples:
|
||||||
|
|
||||||
1) Mirror all packets arriving on eth0 to be sent out on eth1.
|
1) Mirror all packets arriving on eth0 to be sent out on eth1.
|
||||||
You may have a sniffer or some accounting box hooked up on eth1.
|
You may have a sniffer or some accounting box hooked up on eth1.
|
||||||
|
|
||||||
---
|
---
|
||||||
tc qdisc add dev eth0 ingress
|
tc qdisc add dev eth0 ingress
|
||||||
tc filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
|
tc filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
|
||||||
|
|
@ -100,7 +100,7 @@ stack (i.e ping would work).
|
||||||
3) Even more funky example:
|
3) Even more funky example:
|
||||||
|
|
||||||
#
|
#
|
||||||
#allow 1 out 10 packets on ingress of lo to randomly make it to the
|
#allow 1 out 10 packets on ingress of lo to randomly make it to the
|
||||||
# host A (Randomness uses the netrand generator)
|
# host A (Randomness uses the netrand generator)
|
||||||
#
|
#
|
||||||
---
|
---
|
||||||
|
|
@ -111,9 +111,9 @@ action mirred egress mirror dev eth0
|
||||||
---
|
---
|
||||||
|
|
||||||
4)
|
4)
|
||||||
# for packets from 10.0.0.9 going out on eth0 (could be local
|
# for packets from 10.0.0.9 going out on eth0 (could be local
|
||||||
# IP or something # we are forwarding) -
|
# IP or something # we are forwarding) -
|
||||||
# if exceeding a 100Kbps rate, then redirect to eth1
|
# if exceeding a 100Kbps rate, then redirect to eth1
|
||||||
#
|
#
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
@ -129,7 +129,7 @@ so you could tcpdump them (dummy by defaults drops all packets it sees).
|
||||||
This is a very useful debug feature.
|
This is a very useful debug feature.
|
||||||
|
|
||||||
Lets say you are policing packets from alias 192.168.200.200/32
|
Lets say you are policing packets from alias 192.168.200.200/32
|
||||||
you don't want those to exceed 100kbps going out.
|
you dont want those to exceed 100kbps going out.
|
||||||
|
|
||||||
---
|
---
|
||||||
tc qdisc add dev eth0 handle 1:0 root prio
|
tc qdisc add dev eth0 handle 1:0 root prio
|
||||||
|
|
@ -158,7 +158,7 @@ Essentially a good debugging/logging interface (sort of like
|
||||||
BSDs speacialized log device does without needing one).
|
BSDs speacialized log device does without needing one).
|
||||||
|
|
||||||
If you replace mirror with redirect, those packets will be
|
If you replace mirror with redirect, those packets will be
|
||||||
blackholed and will never make it out.
|
blackholed and will never make it out.
|
||||||
|
|
||||||
cheers,
|
cheers,
|
||||||
jamal
|
jamal
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,429 @@
|
||||||
|
\documentstyle[12pt,twoside]{article}
|
||||||
|
\def\TITLE{IPv6 Flow Labels}
|
||||||
|
\input preamble
|
||||||
|
\begin{center}
|
||||||
|
\Large\bf IPv6 Flow Labels in Linux-2.2.
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
{ \large Alexey~N.~Kuznetsov } \\
|
||||||
|
\em Institute for Nuclear Research, Moscow \\
|
||||||
|
\verb|kuznet@ms2.inr.ac.ru| \\
|
||||||
|
\rm April 11, 1999
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
\vspace{5mm}
|
||||||
|
|
||||||
|
\tableofcontents
|
||||||
|
|
||||||
|
\section{Introduction.}
|
||||||
|
|
||||||
|
Every IPv6 packet carries 28 bits of flow information. RFC2460 splits
|
||||||
|
these bits to two fields: 8 bits of traffic class (or DS field, if you
|
||||||
|
prefer this term) and 20 bits of flow label. Currently there exist
|
||||||
|
no well-defined API to manage IPv6 flow information. In this document
|
||||||
|
I describe an attempt to design the API for Linux-2.2 IPv6 stack.
|
||||||
|
|
||||||
|
\vskip 1mm
|
||||||
|
|
||||||
|
The API must solve the following tasks:
|
||||||
|
|
||||||
|
\begin{enumerate}
|
||||||
|
|
||||||
|
\item To allow user to set traffic class bits.
|
||||||
|
|
||||||
|
\item To allow user to read traffic class bits of received packets.
|
||||||
|
This feature is not so useful as the first one, however it will be
|
||||||
|
necessary f.e.\ to implement ECN [RFC2481] for datagram oriented services
|
||||||
|
or to implement receiver side of SRP or another end-to-end protocol
|
||||||
|
using traffic class bits.
|
||||||
|
|
||||||
|
\item To assign flow labels to packets sent by user.
|
||||||
|
|
||||||
|
\item To get flow labels of received packets. I do not know
|
||||||
|
any applications of this feature, but it is possible that receiver will
|
||||||
|
want to use flow labels to distinguish sub-flows.
|
||||||
|
|
||||||
|
\item To allocate flow labels in the way, compliant to RFC2460. Namely:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item
|
||||||
|
Flow labels must be uniformly distributed (pseudo-)random numbers,
|
||||||
|
so that any subset of 20 bits can be used as hash key.
|
||||||
|
|
||||||
|
\item
|
||||||
|
Flows with coinciding source address and flow label must have identical
|
||||||
|
destination address and not-fragmentable extensions headers (i.e.\
|
||||||
|
hop by hop options and all the headers up to and including routing header,
|
||||||
|
if it is present.)
|
||||||
|
|
||||||
|
\begin{NB}
|
||||||
|
There is a hole in specs: some hop-by-hop options can be
|
||||||
|
defined only on per-packet base (f.e.\ jumbo payload option).
|
||||||
|
Essentially, it means that such options cannot present in packets
|
||||||
|
with flow labels.
|
||||||
|
\end{NB}
|
||||||
|
\begin{NB}
|
||||||
|
NB notes here and below reflect only my personal opinion,
|
||||||
|
they should be read with smile or should not be read at all :-).
|
||||||
|
\end{NB}
|
||||||
|
|
||||||
|
|
||||||
|
\item
|
||||||
|
Flow labels have finite lifetime and source is not allowed to reuse
|
||||||
|
flow label for another flow within the maximal lifetime has expired,
|
||||||
|
so that intermediate nodes will be able to invalidate flow state before
|
||||||
|
the label is taken over by another flow.
|
||||||
|
Flow state, including lifetime, is propagated along datagram path
|
||||||
|
by some application specific methods
|
||||||
|
(f.e.\ in RSVP PATH messages or in some hop-by-hop option).
|
||||||
|
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
\section{Sending/receiving flow information.}
|
||||||
|
|
||||||
|
\paragraph{Discussion.}
|
||||||
|
\addcontentsline{toc}{subsection}{Discussion}
|
||||||
|
It was proposed (Where? I do not remember any explicit statement)
|
||||||
|
to solve the first four tasks using
|
||||||
|
\verb|sin6_flowinfo| field added to \verb|struct| \verb|sockaddr_in6|
|
||||||
|
(see RFC2553).
|
||||||
|
|
||||||
|
\begin{NB}
|
||||||
|
This method is difficult to consider as reasonable, because it
|
||||||
|
puts additional overhead to all the services, despite of only
|
||||||
|
very small subset of them (none, to be more exact) really use it.
|
||||||
|
It contradicts both to IETF spirit and the letter. Before RFC2553
|
||||||
|
one justification existed, IPv6 address alignment left 4 byte
|
||||||
|
hole in \verb|sockaddr_in6| in any case. Now it has no justification.
|
||||||
|
\end{NB}
|
||||||
|
|
||||||
|
We have two problems with this method. The first one is common for all OSes:
|
||||||
|
if \verb|recvmsg()| initializes \verb|sin6_flowinfo| to flow info
|
||||||
|
of received packet, we loose one very important property of BSD socket API,
|
||||||
|
namely, we are not allowed to use received address for reply directly
|
||||||
|
and have to mangle it, even if we are not interested in flowinfo subtleties.
|
||||||
|
|
||||||
|
\begin{NB}
|
||||||
|
RFC2553 adds new requirement: to clear \verb|sin6_flowinfo|.
|
||||||
|
Certainly, it is not solution but rather attempt to force applications
|
||||||
|
to make unnecessary work. Well, as usually, one mistake in design
|
||||||
|
is followed by attempts to patch the hole and more mistakes...
|
||||||
|
\end{NB}
|
||||||
|
|
||||||
|
Another problem is Linux specific. Historically Linux IPv6 did not
|
||||||
|
initialize \verb|sin6_flowinfo| at all, so that, if kernel does not
|
||||||
|
support flow labels, this field is not zero, but a random number.
|
||||||
|
Some applications also did not take care about it.
|
||||||
|
|
||||||
|
\begin{NB}
|
||||||
|
Following RFC2553 such applications can be considered as broken,
|
||||||
|
but I still think that they are right: clearing all the address
|
||||||
|
before filling known fields is robust but stupid solution.
|
||||||
|
Useless wasting CPU cycles and
|
||||||
|
memory bandwidth is not a good idea. Such patches are acceptable
|
||||||
|
as temporary hacks, but not as standard of the future.
|
||||||
|
\end{NB}
|
||||||
|
|
||||||
|
|
||||||
|
\paragraph{Implementation.}
|
||||||
|
\addcontentsline{toc}{subsection}{Implementation}
|
||||||
|
By default Linux IPv6 does not read \verb|sin6_flowinfo| field
|
||||||
|
assuming that common applications are not obliged to initialize it
|
||||||
|
and are permitted to consider it as pure alignment padding.
|
||||||
|
In order to tell kernel that application
|
||||||
|
is aware of this field, it is necessary to set socket option
|
||||||
|
\verb|IPV6_FLOWINFO_SEND|.
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
int on = 1;
|
||||||
|
setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO_SEND,
|
||||||
|
(void*)&on, sizeof(on));
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
Linux kernel never fills \verb|sin6_flowinfo| field, when passing
|
||||||
|
message to user space, though the kernels which support flow labels
|
||||||
|
initialize it to zero. If user wants to get received flowinfo, he
|
||||||
|
will set option \verb|IPV6_FLOWINFO| and after this he will receive
|
||||||
|
flowinfo as ancillary data object of type \verb|IPV6_FLOWINFO|
|
||||||
|
(cf.\ RFC2292).
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
int on = 1;
|
||||||
|
setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO, (void*)&on, sizeof(on));
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
Flowinfo received and latched by a connected TCP socket also may be fetched
|
||||||
|
with \verb|getsockopt()| \verb|IPV6_PKTOPTIONS| together with
|
||||||
|
another optional information.
|
||||||
|
|
||||||
|
Besides that, in the spirit of RFC2292 the option \verb|IPV6_FLOWINFO|
|
||||||
|
may be used as alternative way to send flowinfo with \verb|sendmsg()| or
|
||||||
|
to latch it with \verb|IPV6_PKTOPTIONS|.
|
||||||
|
|
||||||
|
\paragraph{Note about IPv6 options and destination address.}
|
||||||
|
\addcontentsline{toc}{subsection}{IPv6 options and destination address}
|
||||||
|
If \verb|sin6_flowinfo| does contain not zero flow label,
|
||||||
|
destination address in \verb|sin6_addr| and non-fragmentable
|
||||||
|
extension headers are ignored. Instead, kernel uses the values
|
||||||
|
cached at flow setup (see below). However, for connected sockets
|
||||||
|
kernel prefers the values set at connection time.
|
||||||
|
|
||||||
|
\paragraph{Example.}
|
||||||
|
\addcontentsline{toc}{subsection}{Example}
|
||||||
|
After setting socket option \verb|IPV6_FLOWINFO|
|
||||||
|
flowlabel and DS field are received as ancillary data object
|
||||||
|
of type \verb|IPV6_FLOWINFO| and level \verb|SOL_IPV6|.
|
||||||
|
In the cases when it is convenient to use \verb|recvfrom(2)|,
|
||||||
|
it is possible to replace library variant with your own one,
|
||||||
|
sort of:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <netinet/in6.h>
|
||||||
|
|
||||||
|
size_t recvfrom(int fd, char *buf, size_t len, int flags,
|
||||||
|
struct sockaddr *addr, int *addrlen)
|
||||||
|
{
|
||||||
|
size_t cc;
|
||||||
|
char cbuf[128];
|
||||||
|
struct cmsghdr *c;
|
||||||
|
struct iovec iov = { buf, len };
|
||||||
|
struct msghdr msg = { addr, *addrlen,
|
||||||
|
&iov, 1,
|
||||||
|
cbuf, sizeof(cbuf),
|
||||||
|
0 };
|
||||||
|
|
||||||
|
cc = recvmsg(fd, &msg, flags);
|
||||||
|
if (cc < 0)
|
||||||
|
return cc;
|
||||||
|
((struct sockaddr_in6*)addr)->sin6_flowinfo = 0;
|
||||||
|
*addrlen = msg.msg_namelen;
|
||||||
|
for (c=CMSG_FIRSTHDR(&msg); c; c = CMSG_NEXTHDR(&msg, c)) {
|
||||||
|
if (c->cmsg_level != SOL_IPV6 ||
|
||||||
|
c->cmsg_type != IPV6_FLOWINFO)
|
||||||
|
continue;
|
||||||
|
((struct sockaddr_in6*)addr)->sin6_flowinfo = *(__u32*)CMSG_DATA(c);
|
||||||
|
}
|
||||||
|
return cc;
|
||||||
|
}
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\section{Flow label management.}
|
||||||
|
|
||||||
|
\paragraph{Discussion.}
|
||||||
|
\addcontentsline{toc}{subsection}{Discussion}
|
||||||
|
Requirements of RFC2460 are pretty tough. Particularly, lifetimes
|
||||||
|
longer than boot time require to store allocated labels at stable
|
||||||
|
storage, so that the full implementation necessarily includes user space flow
|
||||||
|
label manager. There are at least three different approaches:
|
||||||
|
|
||||||
|
\begin{enumerate}
|
||||||
|
\item {\bf ``Cooperative''. } We could leave flow label allocation wholly
|
||||||
|
to user space. When user needs label he requests manager directly. The approach
|
||||||
|
is valid, but as any ``cooperative'' approach it suffers of security problems.
|
||||||
|
|
||||||
|
\begin{NB}
|
||||||
|
One idea is to disallow not privileged user to allocate flow
|
||||||
|
labels, but instead to pass the socket to manager via \verb|SCM_RIGHTS|
|
||||||
|
control message, so that it will allocate label and assign it to socket
|
||||||
|
itself. Hmm... the idea is interesting.
|
||||||
|
\end{NB}
|
||||||
|
|
||||||
|
\item {\bf ``Indirect''.} Kernel redirects requests to user level daemon
|
||||||
|
and does not install label until the daemon acknowledged the request.
|
||||||
|
The approach is the most promising, it is especially pleasant to recognize
|
||||||
|
parallel with IPsec API [RFC2367,Craig]. Actually, it may share API with
|
||||||
|
IPsec.
|
||||||
|
|
||||||
|
\item {\bf ``Stupid''.} To allocate labels in kernel space. It is the simplest
|
||||||
|
method, but it suffers of two serious flaws: the first,
|
||||||
|
we cannot lease labels with lifetimes longer than boot time, the second,
|
||||||
|
it is sensitive to DoS attacks. Kernel have to remember all the obsolete
|
||||||
|
labels until their expiration and malicious user may fastly eat all the
|
||||||
|
flow label space.
|
||||||
|
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
Certainly, I choose the most ``stupid'' method. It is the cheapest one
|
||||||
|
for implementor (i.e.\ me), and taking into account that flow labels
|
||||||
|
still have no serious applications it is not useful to work on more
|
||||||
|
advanced API, especially, taking into account that eventually we
|
||||||
|
will get it for no fee together with IPsec.
|
||||||
|
|
||||||
|
|
||||||
|
\paragraph{Implementation.}
|
||||||
|
\addcontentsline{toc}{subsection}{Implementation}
|
||||||
|
Socket option \verb|IPV6_FLOWLABEL_MGR| allows to
|
||||||
|
request flow label manager to allocate new flow label, to reuse
|
||||||
|
already allocated one or to delete old flow label.
|
||||||
|
Its argument is \verb|struct| \verb|in6_flowlabel_req|:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
struct in6_flowlabel_req
|
||||||
|
{
|
||||||
|
struct in6_addr flr_dst;
|
||||||
|
__u32 flr_label;
|
||||||
|
__u8 flr_action;
|
||||||
|
__u8 flr_share;
|
||||||
|
__u16 flr_flags;
|
||||||
|
__u16 flr_expires;
|
||||||
|
__u16 flr_linger;
|
||||||
|
__u32 __flr_reserved;
|
||||||
|
/* Options in format of IPV6_PKTOPTIONS */
|
||||||
|
};
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
|
||||||
|
\item \verb|dst| is IPv6 destination address associated with the label.
|
||||||
|
|
||||||
|
\item \verb|label| is flow label value in network byte order. If it is zero,
|
||||||
|
kernel will allocate new pseudo-random number. Otherwise, kernel will try
|
||||||
|
to lease flow label ordered by user. In this case, it is user task to provide
|
||||||
|
necessary flow label randomness.
|
||||||
|
|
||||||
|
\item \verb|action| is requested operation. Currently, only three operations
|
||||||
|
are defined:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
#define IPV6_FL_A_GET 0 /* Get flow label */
|
||||||
|
#define IPV6_FL_A_PUT 1 /* Release flow label */
|
||||||
|
#define IPV6_FL_A_RENEW 2 /* Update expire time */
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\item \verb|flags| are optional modifiers. Currently
|
||||||
|
only \verb|IPV6_FL_A_GET| has modifiers:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
#define IPV6_FL_F_CREATE 1 /* Allowed to create new label */
|
||||||
|
#define IPV6_FL_F_EXCL 2 /* Do not create new label */
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
|
||||||
|
\item \verb|share| defines who is allowed to reuse the same flow label.
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
#define IPV6_FL_S_NONE 0 /* Not defined */
|
||||||
|
#define IPV6_FL_S_EXCL 1 /* Label is private */
|
||||||
|
#define IPV6_FL_S_PROCESS 2 /* May be reused by this process */
|
||||||
|
#define IPV6_FL_S_USER 3 /* May be reused by this user */
|
||||||
|
#define IPV6_FL_S_ANY 255 /* Anyone may reuse it */
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\item \verb|linger| is time in seconds. After the last user releases flow
|
||||||
|
label, it will not be reused with different destination and options at least
|
||||||
|
during this time. If \verb|share| is not \verb|IPV6_FL_S_EXCL| the label
|
||||||
|
still can be shared by another sockets. Current implementation does not allow
|
||||||
|
unprivileged user to set linger longer than 60 sec.
|
||||||
|
|
||||||
|
\item \verb|expires| is time in seconds. Flow label will be kept at least
|
||||||
|
for this time, but it will not be destroyed before user released it explicitly
|
||||||
|
or closed all the sockets using it. Current implementation does not allow
|
||||||
|
unprivileged user to set timeout longer than 60 sec. Proviledged applications
|
||||||
|
MAY set longer lifetimes, but in this case they MUST save allocated
|
||||||
|
labels at stable storage and restore them back after reboot before the first
|
||||||
|
application allocates new flow.
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
This structure is followed by optional extension headers associated
|
||||||
|
with this flow label in format of \verb|IPV6_PKTOPTIONS|. Only
|
||||||
|
\verb|IPV6_HOPOPTS|, \verb|IPV6_RTHDR| and, if \verb|IPV6_RTHDR| presents,
|
||||||
|
\verb|IPV6_DSTOPTS| are allowed.
|
||||||
|
|
||||||
|
\paragraph{Example.}
|
||||||
|
\addcontentsline{toc}{subsection}{Example}
|
||||||
|
The function \verb|get_flow_label| allocates
|
||||||
|
private flow label.
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
int get_flow_label(int fd, struct sockaddr_in6 *dst, __u32 fl)
|
||||||
|
{
|
||||||
|
int on = 1;
|
||||||
|
struct in6_flowlabel_req freq;
|
||||||
|
|
||||||
|
memset(&freq, 0, sizeof(freq));
|
||||||
|
freq.flr_label = htonl(fl);
|
||||||
|
freq.flr_action = IPV6_FL_A_GET;
|
||||||
|
freq.flr_flags = IPV6_FL_F_CREATE | IPV6_FL_F_EXCL;
|
||||||
|
freq.flr_share = IPV6_FL_S_EXCL;
|
||||||
|
memcpy(&freq.flr_dst, &dst->sin6_addr, 16);
|
||||||
|
if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
|
||||||
|
&freq, sizeof(freq)) == -1) {
|
||||||
|
perror ("can't lease flowlabel");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dst->sin6_flowinfo |= freq.flr_label;
|
||||||
|
|
||||||
|
if (setsockopt(fd, SOL_IPV6, IPV6_FLOWINFO_SEND,
|
||||||
|
&on, sizeof(on)) == -1) {
|
||||||
|
perror ("can't send flowinfo");
|
||||||
|
|
||||||
|
freq.flr_action = IPV6_FL_A_PUT;
|
||||||
|
setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
|
||||||
|
&freq, sizeof(freq));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
A bit more complicated example using routing header can be found
|
||||||
|
in \verb|ping6| utility (\verb|iputils| package). Linux rsvpd backend
|
||||||
|
contains an example of using operation \verb|IPV6_FL_A_RENEW|.
|
||||||
|
|
||||||
|
\paragraph{Listing flow labels.}
|
||||||
|
\addcontentsline{toc}{subsection}{Listing flow labels}
|
||||||
|
List of currently allocated
|
||||||
|
flow labels may be read from \verb|/proc/net/ip6_flowlabel|.
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
Label S Owner Users Linger Expires Dst Opt
|
||||||
|
A1BE5 1 0 0 6 3 3ffe2400000000010a0020fffe71fb30 0
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item \verb|Label| is hexadecimal flow label value.
|
||||||
|
\item \verb|S| is sharing style.
|
||||||
|
\item \verb|Owner| is ID of creator, it is zero, pid or uid, depending on
|
||||||
|
sharing style.
|
||||||
|
\item \verb|Users| is number of applications using the label now.
|
||||||
|
\item \verb|Linger| is \verb|linger| of this label in seconds.
|
||||||
|
\item \verb|Expires| is time until expiration of the label in seconds. It may
|
||||||
|
be negative, if the label is in use.
|
||||||
|
\item \verb|Dst| is IPv6 destination address.
|
||||||
|
\item \verb|Opt| is length of options, associated with the label. Option
|
||||||
|
data are not accessible.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
|
||||||
|
\paragraph{Flow labels and RSVP.}
|
||||||
|
\addcontentsline{toc}{subsection}{Flow labels and RSVP}
|
||||||
|
RSVP daemon supports IPv6 flow labels
|
||||||
|
without any modifications to standard ISI RAPI. Sender must allocate
|
||||||
|
flow label, fill corresponding sender template and submit it to local rsvp
|
||||||
|
daemon. rsvpd will check the label and start to announce it in PATH
|
||||||
|
messages. Rsvpd on sender node will renew the flow label, so that it will not
|
||||||
|
be reused before path state expires and all the intermediate
|
||||||
|
routers and receiver purge flow state.
|
||||||
|
|
||||||
|
\verb|rtap| utility is modified to parse flow labels. F.e.\ if user allocated
|
||||||
|
flow label \verb|0xA1234|, he may write:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
RTAP> sender 3ffe:2400::1/FL0xA1234 <Tspec>
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
Receiver makes reservation with command:
|
||||||
|
\begin{verbatim}
|
||||||
|
RTAP> reserve ff 3ffe:2400::1/FL0xA1234 <Flowspec>
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
@ -0,0 +1,130 @@
|
||||||
|
<!doctype linuxdoc system>
|
||||||
|
|
||||||
|
<article>
|
||||||
|
|
||||||
|
<title>ARPD Daemon
|
||||||
|
<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
|
||||||
|
<date>some_negative_number, 20 Sep 2001
|
||||||
|
<abstract>
|
||||||
|
<tt/arpd/ is daemon collecting gratuitous ARP information, saving
|
||||||
|
it on local disk and feeding it to kernel on demand to avoid
|
||||||
|
redundant broadcasting due to limited size of kernel ARP cache.
|
||||||
|
</abstract>
|
||||||
|
|
||||||
|
|
||||||
|
<p><bf/Description/
|
||||||
|
|
||||||
|
<p>The format of the command is:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
arpd OPTIONS [ INTERFACE [ INTERFACE ... ] ]
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
<p> <tt/OPTIONS/ are:
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
|
||||||
|
<item><tt/-l/ - dump <tt/arpd/ database to stdout and exit. Output consists
|
||||||
|
of three columns: interface index, IP address and MAC address.
|
||||||
|
Negative entries for dead hosts are also shown, in this case MAC address
|
||||||
|
is replaced by word <tt/FAILED/ followed by colon and time when the fact
|
||||||
|
that host is dead was proven the last time.
|
||||||
|
|
||||||
|
<item><tt/-f FILE/ - read and load <tt/arpd/ database from <tt/FILE/
|
||||||
|
in text format similar dumped by option <tt/-l/. Exit after load,
|
||||||
|
probably listing resulting database, if option <tt/-l/ is also given.
|
||||||
|
If <tt/FILE/ is <tt/-/, <tt/stdin/ is read to get ARP table.
|
||||||
|
|
||||||
|
<item><tt/-b DATABASE/ - location of database file. Default location is
|
||||||
|
<tt>/var/lib/arpd/arpd.db</tt>.
|
||||||
|
|
||||||
|
<item><tt/-a NUMBER/ - <tt/arpd/ not only passively listens ARP on wire, but
|
||||||
|
also send brodcast queries itself. <tt/NUMBER/ is number of such queries
|
||||||
|
to make before destination is considered as dead. When <tt/arpd/ is started
|
||||||
|
as kernel helper (i.e. with <tt/app_solicit/ enabled in <tt/sysctl/
|
||||||
|
or even with option <tt/-k/) without this option and still did not learn enough
|
||||||
|
information, you can observe 1 second gaps in service. Not fatal, but
|
||||||
|
not good.
|
||||||
|
|
||||||
|
<item><tt/-k/ - suppress sending broadcast queries by kernel. It takes
|
||||||
|
sense together with option <tt/-a/.
|
||||||
|
|
||||||
|
<item><tt/-n TIME/ - timeout of negative cache. When resolution fails <tt/arpd/
|
||||||
|
suppresses further attempts to resolve for this period. It makes sense
|
||||||
|
only together with option <tt/-k/. This timeout should not be too much
|
||||||
|
longer than boot time of a typical host not supporting gratuitous ARP.
|
||||||
|
Default value is 60 seconds.
|
||||||
|
|
||||||
|
<item><tt/-R RATE/ - maximal steady rate of broadcasts sent by <tt/arpd/
|
||||||
|
in packets per second. Default value is 1.
|
||||||
|
|
||||||
|
<item><tt/-B NUMBER/ - number of broadcasts sent by <tt/arpd/ back to back.
|
||||||
|
Default value is 3. Together with option <tt/-R/ this option allows
|
||||||
|
to police broadcasting not to exceed <tt/B+R*T/ over any interval
|
||||||
|
of time <tt/T/.
|
||||||
|
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
<p><tt/INTERFACE/ is name of networking inteface to watch.
|
||||||
|
If no interfaces given, <tt/arpd/ monitors all the interfaces.
|
||||||
|
In this case <tt/arpd/ does not adjust <tt/sysctl/ parameters,
|
||||||
|
it is supposed user does this himself after <tt/arpd/ is started.
|
||||||
|
|
||||||
|
|
||||||
|
<p> Signals
|
||||||
|
|
||||||
|
<p> <tt/arpd/ exits gracefully syncing database and restoring adjusted
|
||||||
|
<tt/sysctl/ parameters, when receives <tt/SIGINT/ or <tt/SIGTERM/.
|
||||||
|
<tt/SIGHUP/ syncs database to disk. <tt/SIGUSR1/ sends some statistics
|
||||||
|
to <tt/syslog/. Effect of another signals is undefined, they may corrupt
|
||||||
|
database and leave <tt/sysctl/ parameters in an unpredictable state.
|
||||||
|
|
||||||
|
<p> Note
|
||||||
|
|
||||||
|
<p> In order to <tt/arpd/ be able to serve as ARP resolver, kernel must be
|
||||||
|
compiled with the option <tt/CONFIG_ARPD/ and, in the case when interface list
|
||||||
|
is not given on command line, variable <tt/app_solicit/
|
||||||
|
on interfaces of interest should be set in <tt>/proc/sys/net/ipv4/neigh/*</tt>.
|
||||||
|
If this is not made <tt/arpd/ still collects gratuitous ARP information
|
||||||
|
in its database.
|
||||||
|
|
||||||
|
<p> Examples
|
||||||
|
|
||||||
|
<enum>
|
||||||
|
<item> Start <tt/arpd/ to collect gratuitous ARP, but not messing
|
||||||
|
with kernel functionality:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
arpd -b /var/tmp/arpd.db
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
<item> Look at result after some time:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
killall arpd
|
||||||
|
arpd -l -b /var/tmp/arpd.db
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
<item> To enable kernel helper, leaving leading role to kernel:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
arpd -b /var/tmp/arpd.db -a 1 eth0 eth1
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
<item> Completely replace kernel resolution on interfaces <tt/eth0/
|
||||||
|
and <tt/eth1/. In this case kernel still does unicast probing to
|
||||||
|
validate entries, but all the broadcast activity is suppressed
|
||||||
|
and made under authority of <tt/arpd/:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
arpd -b /var/tmp/arpd.db -a 3 -k eth0 eth1
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
This is mode which <tt/arpd/ is supposed to work normally.
|
||||||
|
It is not default just to prevent occasional enabling of too aggressive
|
||||||
|
mode occasionally.
|
||||||
|
|
||||||
|
</enum>
|
||||||
|
|
||||||
|
</article>
|
||||||
|
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
#! /bin/bash
|
||||||
|
# $1 = Temporary file . "string"
|
||||||
|
# $2 = File to process . "string"
|
||||||
|
# $3 = Page size . ie: a4 , letter ... "string"
|
||||||
|
# $4 = Number of pages to fit on a single sheet . "numeric"
|
||||||
|
|
||||||
|
if type psnup >&/dev/null; then
|
||||||
|
echo "psnup -$4 -p$3 $1 $2"
|
||||||
|
psnup -$4 -p$3 $1 $2
|
||||||
|
elif type psmulti >&/dev/null; then
|
||||||
|
echo "psmulti $1 > $2"
|
||||||
|
psmulti $1 > $2
|
||||||
|
else
|
||||||
|
echo "cp $1 $2"
|
||||||
|
cp $1 $2
|
||||||
|
fi
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,469 @@
|
||||||
|
\documentstyle[12pt,twoside]{article}
|
||||||
|
\def\TITLE{Tunnels over IP}
|
||||||
|
\input preamble
|
||||||
|
\begin{center}
|
||||||
|
\Large\bf Tunnels over IP in Linux-2.2
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
{ \large Alexey~N.~Kuznetsov } \\
|
||||||
|
\em Institute for Nuclear Research, Moscow \\
|
||||||
|
\verb|kuznet@ms2.inr.ac.ru| \\
|
||||||
|
\rm March 17, 1999
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
\vspace{5mm}
|
||||||
|
|
||||||
|
\tableofcontents
|
||||||
|
|
||||||
|
|
||||||
|
\section{Instead of introduction: micro-FAQ.}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
|
||||||
|
\item
|
||||||
|
Q: In linux-2.0.36 I used:
|
||||||
|
\begin{verbatim}
|
||||||
|
ifconfig tunl1 10.0.0.1 pointopoint 193.233.7.65
|
||||||
|
\end{verbatim}
|
||||||
|
to create tunnel. It does not work in 2.2.0!
|
||||||
|
|
||||||
|
A: You are right, it does not work. The command written above is split to two commands.
|
||||||
|
\begin{verbatim}
|
||||||
|
ip tunnel add MY-TUNNEL mode ipip remote 193.233.7.65
|
||||||
|
\end{verbatim}
|
||||||
|
will create tunnel device with name \verb|MY-TUNNEL|. Now you may configure
|
||||||
|
it with:
|
||||||
|
\begin{verbatim}
|
||||||
|
ifconfig MY-TUNNEL 10.0.0.1
|
||||||
|
\end{verbatim}
|
||||||
|
Certainly, if you prefer name \verb|tunl1| to \verb|MY-TUNNEL|,
|
||||||
|
you still may use it.
|
||||||
|
|
||||||
|
\item
|
||||||
|
Q: In linux-2.0.36 I used:
|
||||||
|
\begin{verbatim}
|
||||||
|
ifconfig tunl0 10.0.0.1
|
||||||
|
route add -net 10.0.0.0 gw 193.233.7.65 dev tunl0
|
||||||
|
\end{verbatim}
|
||||||
|
to tunnel net 10.0.0.0 via router 193.233.7.65. It does not
|
||||||
|
work in 2.2.0! Moreover, \verb|route| prints a funny error sort of
|
||||||
|
``network unreachable'' and after this I found a strange direct route
|
||||||
|
to 10.0.0.0 via \verb|tunl0| in routing table.
|
||||||
|
|
||||||
|
A: Yes, in 2.2 the rule that {\em normal} gateway must reside on directly
|
||||||
|
connected network has not any exceptions. You may tell kernel, that
|
||||||
|
this particular route is {\em abnormal}:
|
||||||
|
\begin{verbatim}
|
||||||
|
ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
|
||||||
|
ip route add 10.0.0.0/8 via 193.233.7.65 dev tunl0 onlink
|
||||||
|
\end{verbatim}
|
||||||
|
Note keyword \verb|onlink|, it is the magic key that orders kernel
|
||||||
|
not to check for consistency of gateway address.
|
||||||
|
Probably, after this explanation you have already guessed another method
|
||||||
|
to cheat kernel:
|
||||||
|
\begin{verbatim}
|
||||||
|
ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
|
||||||
|
route add -host 193.233.7.65 dev tunl0
|
||||||
|
route add -net 10.0.0.0 netmask 255.0.0.0 gw 193.233.7.65
|
||||||
|
route del -host 193.233.7.65 dev tunl0
|
||||||
|
\end{verbatim}
|
||||||
|
Well, if you like such tricks, nobody may prohibit you to use them.
|
||||||
|
Only do not forget
|
||||||
|
that between \verb|route add| and \verb|route del| host 193.233.7.65 is
|
||||||
|
unreachable.
|
||||||
|
|
||||||
|
\item
|
||||||
|
Q: In 2.0.36 I used to load \verb|tunnel| device module and \verb|ipip| module.
|
||||||
|
I cannot find any \verb|tunnel| in 2.2!
|
||||||
|
|
||||||
|
A: Linux-2.2 has single module \verb|ipip| for both directions of tunneling
|
||||||
|
and for all IPIP tunnel devices.
|
||||||
|
|
||||||
|
\item
|
||||||
|
Q: \verb|traceroute| does not work over tunnel! Well, stop... It works,
|
||||||
|
only skips some number of hops.
|
||||||
|
|
||||||
|
A: Yes. By default tunnel driver copies \verb|ttl| value from
|
||||||
|
inner packet to outer one. It means that path traversed by tunneled
|
||||||
|
packets to another endpoint is not hidden. If you dislike this, or if you
|
||||||
|
are going to use some routing protocol expecting that packets
|
||||||
|
with ttl 1 will reach peering host (f.e.\ RIP, OSPF or EBGP)
|
||||||
|
and you are not afraid of
|
||||||
|
tunnel loops, you may append option \verb|ttl 64|, when creating tunnel
|
||||||
|
with \verb|ip tunnel add|.
|
||||||
|
|
||||||
|
\item
|
||||||
|
Q: ... Well, list of things, which 2.0 was able to do finishes.
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\paragraph{Summary of differences between 2.2 and 2.0.}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
|
||||||
|
\item {\bf In 2.0} you could compile tunnel device into kernel
|
||||||
|
and got set of 4 devices \verb|tunl0| ... \verb|tunl3| or,
|
||||||
|
alternatively, compile it as module and load new module
|
||||||
|
for each new tunnel. Also, module \verb|ipip| was necessary
|
||||||
|
to receive tunneled packets.
|
||||||
|
|
||||||
|
{\bf 2.2} has {\em one\/} module \verb|ipip|. Loading it you get base
|
||||||
|
tunnel device \verb|tunl0| and another tunnels may be created with command
|
||||||
|
\verb|ip tunnel add|. These new devices may have arbitrary names.
|
||||||
|
|
||||||
|
|
||||||
|
\item {\bf In 2.0} you set remote tunnel endpoint address with
|
||||||
|
the command \verb|ifconfig| ... \verb|pointopoint A|.
|
||||||
|
|
||||||
|
{\bf In 2.2} this command has the same semantics on all
|
||||||
|
the interfaces, namely it sets not tunnel endpoint,
|
||||||
|
but address of peering host, which is directly reachable
|
||||||
|
via this tunnel,
|
||||||
|
rather than via Internet. Actual tunnel endpoint address \verb|A|
|
||||||
|
should be set with \verb|ip tunnel add ... remote A|.
|
||||||
|
|
||||||
|
\item {\bf In 2.0} you create tunnel routes with the command:
|
||||||
|
\begin{verbatim}
|
||||||
|
route add -net 10.0.0.0 gw A dev tunl0
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
{\bf 2.2} interprets this command equally for all device
|
||||||
|
kinds and gateway is required to be directly reachable via this tunnel,
|
||||||
|
rather than via Internet. You still may use \verb|ip route add ... onlink|
|
||||||
|
to override this behaviour.
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Tunnel setup: basics}
|
||||||
|
|
||||||
|
Standard Linux-2.2 kernel supports three flavor of tunnels,
|
||||||
|
listed in the following table:
|
||||||
|
\vspace{2mm}
|
||||||
|
|
||||||
|
\begin{tabular}{lll}
|
||||||
|
\vrule depth 0.8ex width 0pt\relax
|
||||||
|
Mode & Description & Base device \\
|
||||||
|
ipip & IP over IP & tunl0 \\
|
||||||
|
sit & IPv6 over IP & sit0 \\
|
||||||
|
gre & ANY over GRE over IP & gre0
|
||||||
|
\end{tabular}
|
||||||
|
|
||||||
|
\vspace{2mm}
|
||||||
|
|
||||||
|
\noindent All the kinds of tunnels are created with one command:
|
||||||
|
\begin{verbatim}
|
||||||
|
ip tunnel add <NAME> mode <MODE> [ local <S> ] [ remote <D> ]
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
This command creates new tunnel device with name \verb|<NAME>|.
|
||||||
|
The \verb|<NAME>| is an arbitrary string. Particularly,
|
||||||
|
it may be even \verb|eth0|. The rest of parameters set
|
||||||
|
different tunnel characteristics.
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
|
||||||
|
\item
|
||||||
|
\verb|mode <MODE>| sets tunnel mode. Three modes are available now
|
||||||
|
\verb|ipip|, \verb|sit| and \verb|gre|.
|
||||||
|
|
||||||
|
\item
|
||||||
|
\verb|remote <D>| sets remote endpoint of the tunnel to IP
|
||||||
|
address \verb|<D>|.
|
||||||
|
\item
|
||||||
|
\verb|local <S>| sets fixed local address for tunneled
|
||||||
|
packets. It must be an address on another interface of this host.
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\let\thefootnote\oldthefootnote
|
||||||
|
|
||||||
|
Both \verb|remote| and \verb|local| may be omitted. In this case we
|
||||||
|
say that they are zero or wildcard. Two tunnels of one mode cannot
|
||||||
|
have the same \verb|remote| and \verb|local|. Particularly it means
|
||||||
|
that base device or fallback tunnel cannot be replicated.\footnote{
|
||||||
|
This restriction is relaxed for keyed GRE tunnels.}
|
||||||
|
|
||||||
|
Tunnels are divided to two classes: {\bf pointopoint} tunnels, which
|
||||||
|
have some not wildcard \verb|remote| address and deliver all the packets
|
||||||
|
to this destination, and {\bf NBMA} (i.e. Non-Broadcast Multi-Access) tunnels,
|
||||||
|
which have no \verb|remote|. Particularly, base devices (f.e.\ \verb|tunl0|)
|
||||||
|
are NBMA, because they have neither \verb|remote| nor
|
||||||
|
\verb|local| addresses.
|
||||||
|
|
||||||
|
|
||||||
|
After tunnel device is created you should configure it as you did
|
||||||
|
it with another devices. Certainly, the configuration of tunnels has
|
||||||
|
some features related to the fact that they work over existing Internet
|
||||||
|
routing infrastructure and simultaneously create new virtual links,
|
||||||
|
which changes this infrastructure. The danger that not enough careful
|
||||||
|
tunnel setup will result in formation of tunnel loops,
|
||||||
|
collapse of routing or flooding network with exponentially
|
||||||
|
growing number of tunneled fragments is very real.
|
||||||
|
|
||||||
|
|
||||||
|
Protocol setup on pointopoint tunnels does not differ of configuration
|
||||||
|
of another devices. You should set a protocol address with \verb|ifconfig|
|
||||||
|
and add routes with \verb|route| utility.
|
||||||
|
|
||||||
|
NBMA tunnels are different. To route something via NBMA tunnel
|
||||||
|
you have to explain to driver, where it should deliver packets to.
|
||||||
|
The only way to make it is to create special routes with gateway
|
||||||
|
address pointing to desired endpoint. F.e.\
|
||||||
|
\begin{verbatim}
|
||||||
|
ip route add 10.0.0.0/24 via <A> dev tunl0 onlink
|
||||||
|
\end{verbatim}
|
||||||
|
It is important to use option \verb|onlink|, otherwise
|
||||||
|
kernel will refuse request to create route via gateway not directly
|
||||||
|
reachable over device \verb|tunl0|. With IPv6 the situation is much simpler:
|
||||||
|
when you start device \verb|sit0|, it automatically configures itself
|
||||||
|
with all IPv4 addresses mapped to IPv6 space, so that all IPv4
|
||||||
|
Internet is {\em really reachable} via \verb|sit0|! Excellent, the command
|
||||||
|
\begin{verbatim}
|
||||||
|
ip route add 3FFE::/16 via ::193.233.7.65 dev sit0
|
||||||
|
\end{verbatim}
|
||||||
|
will route \verb|3FFE::/16| via \verb|sit0|, sending all the packets
|
||||||
|
destined to this prefix to 193.233.7.65.
|
||||||
|
|
||||||
|
\section{Tunnel setup: options}
|
||||||
|
|
||||||
|
Command \verb|ip tunnel add| has several additional options.
|
||||||
|
\begin{itemize}
|
||||||
|
|
||||||
|
\item \verb|ttl N| --- set fixed TTL \verb|N| on tunneled packets.
|
||||||
|
\verb|N| is number in the range 1--255. 0 is special value,
|
||||||
|
meaning that packets inherit TTL value.
|
||||||
|
Default value is: \verb|inherit|.
|
||||||
|
|
||||||
|
\item \verb|tos T| --- set fixed tos \verb|T| on tunneled packets.
|
||||||
|
Default value is: \verb|inherit|.
|
||||||
|
|
||||||
|
\item \verb|dev DEV| --- bind tunnel to device \verb|DEV|, so that
|
||||||
|
tunneled packets will be routed only via this device and will
|
||||||
|
not be able to escape to another device, when route to endpoint changes.
|
||||||
|
|
||||||
|
\item \verb|nopmtudisc| --- disable Path MTU Discovery on this tunnel.
|
||||||
|
It is enabled by default. Note that fixed ttl is incompatible
|
||||||
|
with this option: tunnels with fixed ttl always make pmtu discovery.
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\verb|ipip| and \verb|sit| tunnels have no more options. \verb|gre|
|
||||||
|
tunnels are more complicated:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
|
||||||
|
\item \verb|key K| --- use keyed GRE with key \verb|K|. \verb|K| is
|
||||||
|
either number or IP address-like dotted quad.
|
||||||
|
|
||||||
|
\item \verb|csum| --- checksum tunneled packets.
|
||||||
|
|
||||||
|
\item \verb|seq| --- serialize packets.
|
||||||
|
\begin{NB}
|
||||||
|
I think this option does not
|
||||||
|
work. At least, I did not test it, did not debug it and
|
||||||
|
even do not understand, how it is supposed to work and for what
|
||||||
|
purpose Cisco planned to use it.
|
||||||
|
\end{NB}
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
|
||||||
|
Actually, these GRE options can be set separately for input and
|
||||||
|
output directions by prefixing corresponding keywords with letter
|
||||||
|
\verb|i| or \verb|o|. F.e.\ \verb|icsum| orders to accept only
|
||||||
|
packets with correct checksum and \verb|ocsum| means, that
|
||||||
|
our host will calculate and send checksum.
|
||||||
|
|
||||||
|
Command \verb|ip tunnel add| is not the only operation,
|
||||||
|
which can be made with tunnels. Certainly, you may get short help page
|
||||||
|
with:
|
||||||
|
\begin{verbatim}
|
||||||
|
ip tunnel help
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
Besides that, you may view list of installed tunnels with the help of command:
|
||||||
|
\begin{verbatim}
|
||||||
|
ip tunnel ls
|
||||||
|
\end{verbatim}
|
||||||
|
Also you may look at statistics:
|
||||||
|
\begin{verbatim}
|
||||||
|
ip -s tunnel ls Cisco
|
||||||
|
\end{verbatim}
|
||||||
|
where \verb|Cisco| is name of tunnel device. Command
|
||||||
|
\begin{verbatim}
|
||||||
|
ip tunnel del Cisco
|
||||||
|
\end{verbatim}
|
||||||
|
destroys tunnel \verb|Cisco|. And, finally,
|
||||||
|
\begin{verbatim}
|
||||||
|
ip tunnel change Cisco mode sit local ME remote HE ttl 32
|
||||||
|
\end{verbatim}
|
||||||
|
changes its parameters.
|
||||||
|
|
||||||
|
\section{Differences 2.2 and 2.0 tunnels revisited.}
|
||||||
|
|
||||||
|
Now we can discuss more subtle differences between tunneling in 2.0
|
||||||
|
and 2.2.
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
|
||||||
|
\item In 2.0 all tunneled packets were received promiscuously
|
||||||
|
as soon as you loaded module \verb|ipip|. 2.2 tries to select the best
|
||||||
|
tunnel device and packet looks as received on this. F.e.\ if host
|
||||||
|
received \verb|ipip| packet from host \verb|D| destined to our
|
||||||
|
local address \verb|S|, kernel searches for matching tunnels
|
||||||
|
in order:
|
||||||
|
|
||||||
|
\begin{tabular}{ll}
|
||||||
|
1 & \verb|remote| is \verb|D| and \verb|local| is \verb|S| \\
|
||||||
|
2 & \verb|remote| is \verb|D| and \verb|local| is wildcard \\
|
||||||
|
3 & \verb|remote| is wildcard and \verb|local| is \verb|S| \\
|
||||||
|
4 & \verb|tunl0|
|
||||||
|
\end{tabular}
|
||||||
|
|
||||||
|
If tunnel exists, but it is not in \verb|UP| state, the tunnel is ignored.
|
||||||
|
Note, that if \verb|tunl0| is \verb|UP| it receives all the IPIP packets,
|
||||||
|
not acknowledged by more specific tunnels.
|
||||||
|
Be careful, it means that without carefully installed firewall rules
|
||||||
|
anyone on the Internet may inject to your network any packets with
|
||||||
|
source addresses indistinguishable from local ones. It is not so bad idea
|
||||||
|
to design tunnels in the way enforcing maximal route symmetry
|
||||||
|
and to enable reversed path filter (\verb|rp_filter| sysctl option) on
|
||||||
|
tunnel devices.
|
||||||
|
|
||||||
|
\item In 2.2 you can monitor and debug tunnels with \verb|tcpdump|.
|
||||||
|
F.e.\ \verb|tcpdump| \verb|-i Cisco| \verb|-nvv| will dump packets,
|
||||||
|
which kernel output, via tunnel \verb|Cisco| and the packets received on it
|
||||||
|
from kernel viewpoint.
|
||||||
|
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Linux and Cisco IOS tunnels.}
|
||||||
|
|
||||||
|
Among another tunnels Cisco IOS supports IPIP and GRE.
|
||||||
|
Essentially, Cisco setup is subset of options, available for Linux.
|
||||||
|
Let us consider the simplest example:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
interface Tunnel0
|
||||||
|
tunnel mode gre ip
|
||||||
|
tunnel source 10.10.14.1
|
||||||
|
tunnel destination 10.10.13.2
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
|
||||||
|
This command set translates to:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
ip tunnel add Tunnel0 \
|
||||||
|
mode gre \
|
||||||
|
local 10.10.14.1 \
|
||||||
|
remote 10.10.13.2
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
Any questions? No questions.
|
||||||
|
|
||||||
|
\section{Interaction IPIP tunnels and DVMRP.}
|
||||||
|
|
||||||
|
DVMRP exploits IPIP tunnels to route multicasts via Internet.
|
||||||
|
\verb|mrouted| creates
|
||||||
|
IPIP tunnels listed in its configuration file automatically.
|
||||||
|
From kernel and user viewpoints there are no differences between
|
||||||
|
tunnels, created in this way, and tunnels created by \verb|ip tunnel|.
|
||||||
|
I.e.\ if \verb|mrouted| created some tunnel, it may be used to
|
||||||
|
route unicast packets, provided appropriate routes are added.
|
||||||
|
And vice versa, if administrator has already created a tunnel,
|
||||||
|
it will be reused by \verb|mrouted|, if it requests DVMRP
|
||||||
|
tunnel with the same local and remote addresses.
|
||||||
|
|
||||||
|
Do not wonder, if your manually configured tunnel is
|
||||||
|
destroyed, when mrouted exits.
|
||||||
|
|
||||||
|
|
||||||
|
\section{Broadcast GRE ``tunnels''.}
|
||||||
|
|
||||||
|
It is possible to set \verb|remote| for GRE tunnel to a multicast
|
||||||
|
address. Such tunnel becomes {\bf broadcast} tunnel (though word
|
||||||
|
tunnel is not quite appropriate in this case, it is rather virtual network).
|
||||||
|
\begin{verbatim}
|
||||||
|
ip tunnel add Universe local 193.233.7.65 \
|
||||||
|
remote 224.66.66.66 ttl 16
|
||||||
|
ip addr add 10.0.0.1/16 dev Universe
|
||||||
|
ip link set Universe up
|
||||||
|
\end{verbatim}
|
||||||
|
This tunnel is true broadcast network and broadcast packets are
|
||||||
|
sent to multicast group 224.66.66.66. By default such tunnel starts
|
||||||
|
to resolve both IP and IPv6 addresses via ARP/NDISC, so that
|
||||||
|
if multicast routing is supported in surrounding network, all GRE nodes
|
||||||
|
will find one another automatically and will form virtual Ethernet-like
|
||||||
|
broadcast network. If multicast routing does not work, it is unpleasant
|
||||||
|
but not fatal flaw. The tunnel becomes NBMA rather than broadcast network.
|
||||||
|
You may disable dynamic ARPing by:
|
||||||
|
\begin{verbatim}
|
||||||
|
echo 0 > /proc/sys/net/ipv4/neigh/Universe/mcast_solicit
|
||||||
|
\end{verbatim}
|
||||||
|
and to add required information to ARP tables manually:
|
||||||
|
\begin{verbatim}
|
||||||
|
ip neigh add 10.0.0.2 lladdr 128.6.190.2 dev Universe nud permanent
|
||||||
|
\end{verbatim}
|
||||||
|
In this case packets sent to 10.0.0.2 will be encapsulated in GRE
|
||||||
|
and sent to 128.6.190.2. It is possible to facilitate address resolution
|
||||||
|
using methods typical for another NBMA networks f.e.\ to start user
|
||||||
|
level \verb|arpd| daemon, which will maintain database of hosts attached
|
||||||
|
to GRE virtual network or ask for information
|
||||||
|
dedicated ARP or NHRP server.
|
||||||
|
|
||||||
|
|
||||||
|
Actually, such setup is the most natural for tunneling,
|
||||||
|
it is really flexible, scalable and easily managable, so that
|
||||||
|
it is strongly recommended to be used with GRE tunnels instead of ugly
|
||||||
|
hack with NBMA mode and \verb|onlink| modifier. Unfortunately,
|
||||||
|
by historical reasons broadcast mode is not supported by IPIP tunnels,
|
||||||
|
but this probably will change in future.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\section{Traffic control issues.}
|
||||||
|
|
||||||
|
Tunnels are devices, hence all the power of Linux traffic control
|
||||||
|
applies to them. The simplest (and the most useful in practice)
|
||||||
|
example is limiting tunnel bandwidth. The following command:
|
||||||
|
\begin{verbatim}
|
||||||
|
tc qdisc add dev tunl0 root tbf \
|
||||||
|
rate 128Kbit burst 4K limit 10K
|
||||||
|
\end{verbatim}
|
||||||
|
will limit tunneled traffic to 128Kbit with maximal burst size of 4K
|
||||||
|
and queuing not more than 10K.
|
||||||
|
|
||||||
|
However, you should remember, that tunnels are {\em virtual} devices
|
||||||
|
implemented in software and true queue management is impossible for them
|
||||||
|
just because they have no queues. Instead, it is better to create classes
|
||||||
|
on real physical interfaces and to map tunneled packets to them.
|
||||||
|
In general case of dynamic routing you should create such classes
|
||||||
|
on all outgoing interfaces, or, alternatively,
|
||||||
|
to use option \verb|dev DEV| to bind tunnel to a fixed physical device.
|
||||||
|
In the last case packets will be routed only via specified device
|
||||||
|
and you need to setup corresponding classes only on it.
|
||||||
|
Though you have to pay for this convenience,
|
||||||
|
if routing will change, your tunnel will fail.
|
||||||
|
|
||||||
|
Suppose that CBQ class \verb|1:ABC| has been created on device \verb|eth0|
|
||||||
|
specially for tunnel \verb|Cisco| with endpoints \verb|S| and \verb|D|.
|
||||||
|
Now you can select IPIP packets with addresses \verb|S| and \verb|D|
|
||||||
|
with some classifier and map them to class \verb|1:ABC|. F.e.\
|
||||||
|
it is easy to make with \verb|rsvp| classifier:
|
||||||
|
\begin{verbatim}
|
||||||
|
tc filter add dev eth0 pref 100 proto ip rsvp \
|
||||||
|
session D ipproto ipip filter S \
|
||||||
|
classid 1:ABC
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
If you want to make more detailed classification of sub-flows
|
||||||
|
transmitted via tunnel, you can build CBQ subtree,
|
||||||
|
rooted at \verb|1:ABC| and attach to subroot set of rules parsing
|
||||||
|
IPIP packets more deeply.
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
<!doctype linuxdoc system>
|
||||||
|
|
||||||
|
<article>
|
||||||
|
|
||||||
|
<title>NSTAT, IFSTAT and RTACCT Utilities
|
||||||
|
<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
|
||||||
|
<date>some_negative_number, 20 Sep 2001
|
||||||
|
<abstract>
|
||||||
|
<tt/nstat/, <tt/ifstat/ and <tt/rtacct/ are simple tools helping
|
||||||
|
to monitor kernel snmp counters and network interface statistics.
|
||||||
|
</abstract>
|
||||||
|
|
||||||
|
<p> These utilities are very similar, so that I describe
|
||||||
|
them simultaneously, using name <tt/Xstat/ in the places which apply
|
||||||
|
to all of them.
|
||||||
|
|
||||||
|
<p>The format of the command is:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
Xstat [ OPTIONS ] [ PATTERN [ PATTERN ... ] ]
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<tt/PATTERN/ is shell style pattern, selecting identifier
|
||||||
|
of SNMP variables or interfaces to show. Variable is displayed
|
||||||
|
if one of patterns matches its name. If no patterns are given,
|
||||||
|
<tt/Xstat/ assumes that user wants to see all the variables.
|
||||||
|
|
||||||
|
<p> <tt/OPTIONS/ is list of single letter options, using common unix
|
||||||
|
conventions.
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
<item><tt/-h/ - show help page
|
||||||
|
<item><tt/-?/ - the same, of course
|
||||||
|
<item><tt/-v/, <tt/-V/ - print version of <tt/Xstat/ and exit
|
||||||
|
<item><tt/-z/ - dump zero counters too. By default they are not shown.
|
||||||
|
<item><tt/-a/ - dump absolute values of counters. By default <tt/Xstat/
|
||||||
|
calculates increments since the previous use.
|
||||||
|
<item><tt/-s/ - do not update history, so that the next time you will
|
||||||
|
see counters including values accumulated to the moment
|
||||||
|
of this measurement too.
|
||||||
|
<item><tt/-n/ - do not display anything, only update history.
|
||||||
|
<item><tt/-r/ - reset history.
|
||||||
|
<item><tt/-d INTERVAL/ - <tt/Xstat/ is run in daemon mode collecting
|
||||||
|
statistics. <tt/INTERVAL/ is interval between measurements
|
||||||
|
in seconds.
|
||||||
|
<item><tt/-t INTERVAL/ - time interval to average rates. Default value
|
||||||
|
is 60 seconds.
|
||||||
|
<item><tt/-e/ - display extended information about errors (<tt/ifstat/ only).
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
History is just dump saved in file <tt>/tmp/.Xstat.uUID</tt>
|
||||||
|
or in file given by environment variables <tt/NSTAT_HISTORY/,
|
||||||
|
<tt/IFSTAT_HISTORY/ and <tt/RTACCT_HISTORY/.
|
||||||
|
Each time when you use <tt/Xstat/ values there are updated.
|
||||||
|
If you use patterns, only the values which you _really_ see
|
||||||
|
are updated. If you want to skip an unintersting period,
|
||||||
|
use option <tt/-n/, or just output to <tt>/dev/null</tt>.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<tt/Xstat/ understands when history is invalidated by system reboot
|
||||||
|
or source of information switched between different instances
|
||||||
|
of daemonic <tt/Xstat/ and kernel SNMP tables and does not
|
||||||
|
use invalid history.
|
||||||
|
|
||||||
|
<p> Beware, <tt/Xstat/ will not produce sane output,
|
||||||
|
when many processes use it simultaneously. If several processes
|
||||||
|
under single user need this utility they should use environment
|
||||||
|
variables to put their history in safe places
|
||||||
|
or to use it with options <tt/-a -s/.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Well, that's all. The utility is very simple, but nevertheless
|
||||||
|
very handy.
|
||||||
|
|
||||||
|
<p> <bf/Output of XSTAT/
|
||||||
|
<p> The first line of output is <tt/#/ followed by identifier
|
||||||
|
of source of information, it may be word <tt/kernel/, when <tt/Xstat/
|
||||||
|
gets information from kernel or some dotted decimal number followed
|
||||||
|
by parameters, when it obtains information from running <tt/Xstat/ daemon.
|
||||||
|
|
||||||
|
<p>In the case of <tt/nstat/ the rest of output consists of three columns:
|
||||||
|
SNMP MIB identifier,
|
||||||
|
its value (or increment since previous measurement) and average
|
||||||
|
rate of increase of the counter per second. <tt/ifstat/ outputs
|
||||||
|
interface name followed by pairs of counter and rate of its change.
|
||||||
|
|
||||||
|
<p> <bf/Daemonic Xstat/
|
||||||
|
<p> <tt/Xstat/ may be started as daemon by any user. This makes sense
|
||||||
|
to avoid wrapped counters and to obtain reasonable long counters
|
||||||
|
for large time. Also <tt/Xstat/ daemon calculates average rates.
|
||||||
|
For the first goal sampling interval (option <tt/-d/) may be large enough,
|
||||||
|
f.e. for gigabit rates byte counters overflow not more frequently than
|
||||||
|
each 40 seconds and you may select interval of 20 seconds.
|
||||||
|
From the other hand, when <tt/Xstat/ is used for estimating rates
|
||||||
|
interval should be less than averaging period (option <tt/-t/), otherwise
|
||||||
|
estimation loses in quality.
|
||||||
|
|
||||||
|
Client <tt/Xstat/, before trying to get information from the kernel,
|
||||||
|
contacts daemon started by this user, then it tries system wide
|
||||||
|
daemon, which is supposed to be started by superuser. And only if
|
||||||
|
none of them replied it gets information from kernel.
|
||||||
|
|
||||||
|
<p> <bf/Environment/
|
||||||
|
<p> <tt/NSTAT_HISTORY/ - name of history file for <tt/nstat/.
|
||||||
|
<p> <tt/IFSTAT_HISTORY/ - name of history file for <tt/ifstat/.
|
||||||
|
<p> <tt/RTACCT_HISTORY/ - name of history file for <tt/rtacct/.
|
||||||
|
|
||||||
|
</article>
|
||||||
|
|
@ -0,0 +1,26 @@
|
||||||
|
\textwidth 6.0in
|
||||||
|
\textheight 8.5in
|
||||||
|
|
||||||
|
\input SNAPSHOT
|
||||||
|
|
||||||
|
\pagestyle{myheadings}
|
||||||
|
\markboth{\protect\TITLE}{}
|
||||||
|
\markright{{\protect\sc iproute2-ss\Draft}}
|
||||||
|
|
||||||
|
% To print it in compact form: both sides on one sheet (psnup -2)
|
||||||
|
\evensidemargin=\oddsidemargin
|
||||||
|
|
||||||
|
\newenvironment{NB}{\bgroup \vskip 1mm\leftskip 1cm \footnotesize \noindent NB.
|
||||||
|
}{\par\egroup \vskip 1mm}
|
||||||
|
|
||||||
|
\def\threeonly{[2.3.15+ only] }
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\makeatletter
|
||||||
|
\renewcommand{\@oddhead}{{\protect\sc iproute2-ss\Draft} \hfill \protect\arabic{page}}
|
||||||
|
\makeatother
|
||||||
|
\let\oldthefootnote\thefootnote
|
||||||
|
\def\thefootnote{}
|
||||||
|
\footnotetext{Copyright \copyright~1999 A.N.Kuznetsov}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,52 @@
|
||||||
|
<!doctype linuxdoc system>
|
||||||
|
|
||||||
|
<article>
|
||||||
|
|
||||||
|
<title>RTACCT Utility
|
||||||
|
<author>Robert Olsson
|
||||||
|
<date>some_negative_number, 20 Dec 2001
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Here is some code for monitoring the route cache. For systems handling high
|
||||||
|
network load, servers, routers, firewalls etc the route cache and its garbage
|
||||||
|
collection is crucial. Linux has a solid implementation.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The kernel patch (not required since linux-2.4.7) adds statistics counters
|
||||||
|
from route cache process into
|
||||||
|
/proc/net/rt_cache_stat. A companion user mode program presents the statistics
|
||||||
|
in a vmstat or iostat manner. The ratio between cache hits and misses gives
|
||||||
|
the flow length.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Hopefully it can help understanding performance and DoS and other related
|
||||||
|
issues.
|
||||||
|
|
||||||
|
<p> An URL where newer versions of this utility can be (probably) found
|
||||||
|
is ftp://robur.slu.se/pub/Linux/net-development/rt_cache_stat/
|
||||||
|
|
||||||
|
|
||||||
|
<p><bf/Description/
|
||||||
|
|
||||||
|
<p>The format of the command is:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
rtstat [ OPTIONS ]
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
<p> <tt/OPTIONS/ are:
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
|
||||||
|
<item><tt/-h/, <tt/-help/ - show help page and version of the utility.
|
||||||
|
|
||||||
|
<item><tt/-i INTERVAL/ - interval between snapshots, default value is
|
||||||
|
2 seconds.
|
||||||
|
|
||||||
|
<item><tt/-s NUMBER/ - whether to print header line. 0 inhibits header line,
|
||||||
|
1 prescribes to print it once and 2 (this is default setting) forces header
|
||||||
|
line each 20 lines.
|
||||||
|
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
</article>
|
||||||
|
|
@ -0,0 +1,525 @@
|
||||||
|
<!doctype linuxdoc system>
|
||||||
|
|
||||||
|
<article>
|
||||||
|
|
||||||
|
<title>SS Utility: Quick Intro
|
||||||
|
<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
|
||||||
|
<date>some_negative_number, 20 Sep 2001
|
||||||
|
<abstract>
|
||||||
|
<tt/ss/ is one another utility to investigate sockets.
|
||||||
|
Functionally it is NOT better than <tt/netstat/ combined
|
||||||
|
with some perl/awk scripts and though it is surely faster
|
||||||
|
it is not enough to make it much better. :-)
|
||||||
|
So, stop reading this now and do not waste your time.
|
||||||
|
Well, certainly, it proposes some functionality, which current
|
||||||
|
netstat is still not able to do, but surely will soon.
|
||||||
|
</abstract>
|
||||||
|
|
||||||
|
<sect>Why?
|
||||||
|
|
||||||
|
<p> <tt>/proc</tt> interface is inadequate, unfortunately.
|
||||||
|
When amount of sockets is enough large, <tt/netstat/ or even
|
||||||
|
plain <tt>cat /proc/net/tcp/</tt> cause nothing but pains and curses.
|
||||||
|
In linux-2.4 the desease became worse: even if amount
|
||||||
|
of sockets is small reading <tt>/proc/net/tcp/</tt> is slow enough.
|
||||||
|
|
||||||
|
This utility presents a new approach, which is supposed to scale
|
||||||
|
well. I am not going to describe technical details here and
|
||||||
|
will concentrate on description of the command.
|
||||||
|
The only important thing to say is that it is not so bad idea
|
||||||
|
to load module <tt/tcp_diag/, which can be found in directory
|
||||||
|
<tt/Modules/ of <tt/iproute2/. If you do not make this <tt/ss/
|
||||||
|
will work, but it falls back to <tt>/proc</tt> and becomes slow
|
||||||
|
like <tt/netstat/, well, a bit faster yet (see section "Some numbers").
|
||||||
|
|
||||||
|
<sect>Old news
|
||||||
|
|
||||||
|
<p>
|
||||||
|
In the simplest form <tt/ss/ is equivalent to netstat
|
||||||
|
with some small deviations.
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
<item><tt/ss -t -a/ dumps all TCP sockets
|
||||||
|
<item><tt/ss -u -a/ dumps all UDP sockets
|
||||||
|
<item><tt/ss -w -a/ dumps all RAW sockets
|
||||||
|
<item><tt/ss -x -a/ dumps all UNIX sockets
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Option <tt/-o/ shows TCP timers state.
|
||||||
|
Option <tt/-e/ shows some extended information.
|
||||||
|
Etc. etc. etc. Seems, all the options of netstat related to sockets
|
||||||
|
are supported. Though not AX.25 and other bizarres. :-)
|
||||||
|
If someone wants, he can make support for decnet and ipx.
|
||||||
|
Some rudimentary support for them is already present in iproute2 libutils,
|
||||||
|
and I will be glad to see these new members.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
However, standard functionality is a bit different:
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The first: without option <tt/-a/ sockets in states
|
||||||
|
<tt/TIME-WAIT/ and <tt/SYN-RECV/ are skipped too.
|
||||||
|
It is more reasonable default, I think.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The second: format of UNIX sockets is different. It coincides
|
||||||
|
with tcp/udp. Though standard kernel still does not allow to
|
||||||
|
see write/read queues and peer address of connected UNIX sockets,
|
||||||
|
the patch doing this exists.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The third: default is to dump only TCP sockets, rather than all of the types.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The next: by default it does not resolve numeric host addresses (like <tt/ip/)!
|
||||||
|
Resolving is enabled with option <tt/-r/. Service names, usually stored
|
||||||
|
in local files, are resolved by default. Also, if service database
|
||||||
|
does not contain references to a port, <tt/ss/ queries system
|
||||||
|
<tt/rpcbind/. RPC services are prefixed with <tt/rpc./
|
||||||
|
Resolution of services may be suppressed with option <tt/-n/.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
It does not accept "long" options (I dislike them, sorry).
|
||||||
|
So, address family is given with family identifier following
|
||||||
|
option <tt/-f/ to be algined to iproute2 conventions.
|
||||||
|
Mostly, it is to allow option parser to parse
|
||||||
|
addresses correctly, but as side effect it really limits dumping
|
||||||
|
to sockets supporting only given family. Option <tt/-A/ followed
|
||||||
|
by list of socket tables to dump is also supported.
|
||||||
|
Logically, id of socket table is different of _address_ family, which is
|
||||||
|
another point of incompatibility. So, id is one of
|
||||||
|
<tt/all/, <tt/tcp/, <tt/udp/,
|
||||||
|
<tt/raw/, <tt/inet/, <tt/unix/, <tt/packet/, <tt/netlink/. See?
|
||||||
|
Well, <tt/inet/ is just abbreviation for <tt/tcp|udp|raw/
|
||||||
|
and it is not difficult to guess that <tt/packet/ allows
|
||||||
|
to look at packet sockets. Actually, there are also some other abbreviations,
|
||||||
|
f.e. <tt/unix_dgram/ selects only datagram UNIX sockets.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The next: well, I still do not know. :-)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<sect>Time to talk about new functionality.
|
||||||
|
|
||||||
|
<p>It is builtin filtering of socket lists.
|
||||||
|
|
||||||
|
<sect1> Filtering by state.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<tt/ss/ allows to filter socket states, using keywords
|
||||||
|
<tt/state/ and <tt/exclude/, followed by some state
|
||||||
|
identifier.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
State identifier are standard TCP state names (not listed,
|
||||||
|
they are useless for you if you already do not know them)
|
||||||
|
or abbreviations:
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
<item><tt/all/ - for all the states
|
||||||
|
<item><tt/bucket/ - for TCP minisockets (<tt/TIME-WAIT|SYN-RECV/)
|
||||||
|
<item><tt/big/ - all except for minisockets
|
||||||
|
<item><tt/connected/ - not closed and not listening
|
||||||
|
<item><tt/synchronized/ - connected and not <tt/SYN-SENT/
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
F.e. to dump all tcp sockets except <tt/SYN-RECV/:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
ss exclude SYN-RECV
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
If neither <tt/state/ nor <tt/exclude/ directives
|
||||||
|
are present,
|
||||||
|
state filter defaults to <tt/all/ with option <tt/-a/
|
||||||
|
or to <tt/all/,
|
||||||
|
excluding listening, syn-recv, time-wait and closed sockets.
|
||||||
|
|
||||||
|
<sect1> Filtering by addresses and ports.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Option list may contain address/port filter.
|
||||||
|
It is boolean expression which consists of boolean operation
|
||||||
|
<tt/or/, <tt/and/, <tt/not/ and predicates.
|
||||||
|
Actually, all the flavors of names for boolean operations are eaten:
|
||||||
|
<tt/&/, <tt/&&/, <tt/|/, <tt/||/, <tt/!/, but do not forget
|
||||||
|
about special sense given to these symbols by unix shells and escape
|
||||||
|
them correctly, when used from command line.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Predicates may be of the folowing kinds:
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
<item>A. Address/port match, where address is checked against mask
|
||||||
|
and port is either wildcard or exact. It is one of:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
dst prefix:port
|
||||||
|
src prefix:port
|
||||||
|
src unix:STRING
|
||||||
|
src link:protocol:ifindex
|
||||||
|
src nl:channel:pid
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
Both prefix and port may be absent or replaced with <tt/*/,
|
||||||
|
which means wildcard. UNIX socket use more powerful scheme
|
||||||
|
matching to socket names by shell wildcards. Also, prefixes
|
||||||
|
unix: and link: may be omitted, if address family is evident
|
||||||
|
from context (with option <tt/-x/ or with <tt/-f unix/
|
||||||
|
or with <tt/unix/ keyword)
|
||||||
|
|
||||||
|
<p>
|
||||||
|
F.e.
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
dst 10.0.0.1
|
||||||
|
dst 10.0.0.1:
|
||||||
|
dst 10.0.0.1/32:
|
||||||
|
dst 10.0.0.1:*
|
||||||
|
</verb></tscreen>
|
||||||
|
are equivalent and mean socket connected to
|
||||||
|
any port on host 10.0.0.1
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
dst 10.0.0.0/24:22
|
||||||
|
</verb></tscreen>
|
||||||
|
sockets connected to port 22 on network
|
||||||
|
10.0.0.0...255.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Note that port separated of address with colon, which creates
|
||||||
|
troubles with IPv6 addresses. Generally, we interpret the last
|
||||||
|
colon as splitting port. To allow to give IPv6 addresses,
|
||||||
|
trick like used in IPv6 HTTP URLs may be used:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
dst [::1]
|
||||||
|
</verb></tscreen>
|
||||||
|
are sockets connected to ::1 on any port
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Another way is <tt/dst ::1/128/. / helps to understand that
|
||||||
|
colon is part of IPv6 address.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Now we can add another alias for <tt/dst 10.0.0.1/:
|
||||||
|
<tt/dst [10.0.0.1]/. :-)
|
||||||
|
|
||||||
|
<p> Address may be a DNS name. In this case all the addresses are looked
|
||||||
|
up (in all the address families, if it is not limited by option <tt/-f/
|
||||||
|
or special address prefix <tt/inet:/, <tt/inet6/) and resulting
|
||||||
|
expression is <tt/or/ over all of them.
|
||||||
|
|
||||||
|
<item> B. Port expressions:
|
||||||
|
<tscreen><verb>
|
||||||
|
dport >= :1024
|
||||||
|
dport != :22
|
||||||
|
sport < :32000
|
||||||
|
</verb></tscreen>
|
||||||
|
etc.
|
||||||
|
|
||||||
|
All the relations: <tt/</, <tt/>/, <tt/=/, <tt/>=/, <tt/=/, <tt/==/,
|
||||||
|
<tt/!=/, <tt/eq/, <tt/ge/, <tt/lt/, <tt/ne/...
|
||||||
|
Use variant which you like more, but not forget to escape special
|
||||||
|
characters when typing them in command line. :-)
|
||||||
|
|
||||||
|
Note that port number syntactically coincides to the case A!
|
||||||
|
You may even add an IP address, but it will not participate
|
||||||
|
incomparison, except for <tt/==/ and <tt/!=/, which are equivalent
|
||||||
|
to corresponding predicates of type A. F.e.
|
||||||
|
<p>
|
||||||
|
<tt/dst 10.0.0.1:22/
|
||||||
|
is equivalent to <tt/dport eq 10.0.0.1:22/
|
||||||
|
and
|
||||||
|
<tt/not dst 10.0.0.1:22/ is equivalent to
|
||||||
|
<tt/dport neq 10.0.0.1:22/
|
||||||
|
|
||||||
|
<item>C. Keyword <tt/autobound/. It matches to sockets bound automatically
|
||||||
|
on local system.
|
||||||
|
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
|
||||||
|
<sect> Examples
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<itemize>
|
||||||
|
<item>1. List all the tcp sockets in state <tt/FIN-WAIT-1/ for our apache
|
||||||
|
to network 193.233.7/24 and look at their timers:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
ss -o state fin-wait-1 \( sport = :http or sport = :https \) \
|
||||||
|
dst 193.233.7/24
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
Oops, forgot to say that missing logical operation is
|
||||||
|
equivalent to <tt/and/.
|
||||||
|
|
||||||
|
<item> 2. Well, now look at the rest...
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
ss -o excl fin-wait-1
|
||||||
|
ss state fin-wait-1 \( sport neq :http and sport neq :https \) \
|
||||||
|
or not dst 193.233.7/24
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
Note that we have to do _two_ calls of ss to do this.
|
||||||
|
State match is always anded to address/port match.
|
||||||
|
The reason for this is purely technical: ss does fast skip of
|
||||||
|
not matching states before parsing addresses and I consider the
|
||||||
|
ability to skip fastly gobs of time-wait and syn-recv sockets
|
||||||
|
as more important than logical generality.
|
||||||
|
|
||||||
|
<item> 3. So, let's look at all our sockets using autobound ports:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
ss -a -A all autobound
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
|
||||||
|
<item> 4. And eventually find all the local processes connected
|
||||||
|
to local X servers:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
ss -xp dst "/tmp/.X11-unix/*"
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
Pardon, this does not work with current kernel, patching is required.
|
||||||
|
But we still can look at server side:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
ss -x src "/tmp/.X11-unix/*"
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
|
||||||
|
<sect> Returning to ground: real manual
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<sect1> Command arguments
|
||||||
|
|
||||||
|
<p> General format of arguments to <tt/ss/ is:
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
ss [ OPTIONS ] [ STATE-FILTER ] [ ADDRESS-FILTER ]
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
<sect2><tt/OPTIONS/
|
||||||
|
<p> <tt/OPTIONS/ is list of single letter options, using common unix
|
||||||
|
conventions.
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
<item><tt/-h/ - show help page
|
||||||
|
<item><tt/-?/ - the same, of course
|
||||||
|
<item><tt/-v/, <tt/-V/ - print version of <tt/ss/ and exit
|
||||||
|
<item><tt/-s/ - print summary statistics. This option does not parse
|
||||||
|
socket lists obtaining summary from various sources. It is useful
|
||||||
|
when amount of sockets is so huge that parsing <tt>/proc/net/tcp</tt>
|
||||||
|
is painful.
|
||||||
|
<item><tt/-D FILE/ - do not display anything, just dump raw information
|
||||||
|
about TCP sockets to <tt/FILE/ after applying filters. If <tt/FILE/ is <tt/-/
|
||||||
|
<tt/stdout/ is used.
|
||||||
|
<item><tt/-F FILE/ - read continuation of filter from <tt/FILE/.
|
||||||
|
Each line of <tt/FILE/ is interpreted like single command line option.
|
||||||
|
If <tt/FILE/ is <tt/-/ <tt/stdin/ is used.
|
||||||
|
<item><tt/-r/ - try to resolve numeric address/ports
|
||||||
|
<item><tt/-n/ - do not try to resolve ports
|
||||||
|
<item><tt/-o/ - show some optional information, f.e. TCP timers
|
||||||
|
<item><tt/-i/ - show some infomration specific to TCP (RTO, congestion
|
||||||
|
window, slow start threshould etc.)
|
||||||
|
<item><tt/-e/ - show even more optional information
|
||||||
|
<item><tt/-m/ - show extended information on memory used by the socket.
|
||||||
|
It is available only with <tt/tcp_diag/ enabled.
|
||||||
|
<item><tt/-p/ - show list of processes owning the socket
|
||||||
|
<item><tt/-f FAMILY/ - default address family used for parsing addresses.
|
||||||
|
Also this option limits listing to sockets supporting
|
||||||
|
given address family. Currently the following families
|
||||||
|
are supported: <tt/unix/, <tt/inet/, <tt/inet6/, <tt/link/,
|
||||||
|
<tt/netlink/.
|
||||||
|
<item><tt/-4/ - alias for <tt/-f inet/
|
||||||
|
<item><tt/-6/ - alias for <tt/-f inet6/
|
||||||
|
<item><tt/-0/ - alias for <tt/-f link/
|
||||||
|
<item><tt/-A LIST-OF-TABLES/ - list of socket tables to dump, separated
|
||||||
|
by commas. The following identifiers are understood:
|
||||||
|
<tt/all/, <tt/inet/, <tt/tcp/, <tt/udp/, <tt/raw/,
|
||||||
|
<tt/unix/, <tt/packet/, <tt/netlink/, <tt/unix_dgram/,
|
||||||
|
<tt/unix_stream/, <tt/packet_raw/, <tt/packet_dgram/.
|
||||||
|
<item><tt/-x/ - alias for <tt/-A unix/
|
||||||
|
<item><tt/-t/ - alias for <tt/-A tcp/
|
||||||
|
<item><tt/-u/ - alias for <tt/-A udp/
|
||||||
|
<item><tt/-w/ - alias for <tt/-A raw/
|
||||||
|
<item><tt/-a/ - show sockets of all the states. By default sockets
|
||||||
|
in states <tt/LISTEN/, <tt/TIME-WAIT/, <tt/SYN_RECV/
|
||||||
|
and <tt/CLOSE/ are skipped.
|
||||||
|
<item><tt/-l/ - show only sockets in state <tt/LISTEN/
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
<sect2><tt/STATE-FILTER/
|
||||||
|
|
||||||
|
<p><tt/STATE-FILTER/ allows to construct arbitrary set of
|
||||||
|
states to match. Its syntax is sequence of keywords <tt/state/
|
||||||
|
and <tt/exclude/ followed by identifier of state.
|
||||||
|
Available identifiers are:
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<itemize>
|
||||||
|
<item> All standard TCP states: <tt/established/, <tt/syn-sent/,
|
||||||
|
<tt/syn-recv/, <tt/fin-wait-1/, <tt/fin-wait-2/, <tt/time-wait/,
|
||||||
|
<tt/closed/, <tt/close-wait/, <tt/last-ack/, <tt/listen/ and <tt/closing/.
|
||||||
|
|
||||||
|
<item><tt/all/ - for all the states
|
||||||
|
<item><tt/connected/ - all the states except for <tt/listen/ and <tt/closed/
|
||||||
|
<item><tt/synchronized/ - all the <tt/connected/ states except for
|
||||||
|
<tt/syn-sent/
|
||||||
|
<item><tt/bucket/ - states, which are maintained as minisockets, i.e.
|
||||||
|
<tt/time-wait/ and <tt/syn-recv/.
|
||||||
|
<item><tt/big/ - opposite to <tt/bucket/
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
<sect2><tt/ADDRESS_FILTER/
|
||||||
|
|
||||||
|
<p><tt/ADDRESS_FILTER/ is boolean expression with operations <tt/and/, <tt/or/
|
||||||
|
and <tt/not/, which can be abbreviated in C style f.e. as <tt/&/,
|
||||||
|
<tt/&&/.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Predicates check socket addresses, both local and remote.
|
||||||
|
There are the following kinds of predicates:
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
<item> <tt/dst ADDRESS_PATTERN/ - matches remote address and port
|
||||||
|
<item> <tt/src ADDRESS_PATTERN/ - matches local address and port
|
||||||
|
<item> <tt/dport RELOP PORT/ - compares remote port to a number
|
||||||
|
<item> <tt/sport RELOP PORT/ - compares local port to a number
|
||||||
|
<item> <tt/autobound/ - checks that socket is bound to an ephemeral
|
||||||
|
port
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
<p><tt/RELOP/ is some of <tt/<=/, <tt/>=/, <tt/==/ etc.
|
||||||
|
To make this more convinient for use in unix shell, alphabetic
|
||||||
|
FORTRAN-like notations <tt/le/, <tt/gt/ etc. are accepted as well.
|
||||||
|
|
||||||
|
<p>The format and semantics of <tt/ADDRESS_PATTERN/ depends on address
|
||||||
|
family.
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
<item><tt/inet/ - <tt/ADDRESS_PATTERN/ consists of IP prefix, optionally
|
||||||
|
followed by colon and port. If prefix or port part is absent or replaced
|
||||||
|
with <tt/*/, this means wildcard match.
|
||||||
|
<item><tt/inet6/ - The same as <tt/inet/, only prefix refers to an IPv6
|
||||||
|
address. Unlike <tt/inet/ colon becomes ambiguous, so that <tt/ss/ allows
|
||||||
|
to use scheme, like used in URLs, where address is suppounded with
|
||||||
|
<tt/[/ ... <tt/]/.
|
||||||
|
<item><tt/unix/ - <tt/ADDRESS_PATTERN/ is shell-style wildcard.
|
||||||
|
<item><tt/packet/ - format looks like <tt/inet/, only interface index
|
||||||
|
stays instead of port and link layer protocol id instead of address.
|
||||||
|
<item><tt/netlink/ - format looks like <tt/inet/, only socket pid
|
||||||
|
stays instead of port and netlink channel instead of address.
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
<p><tt/PORT/ is syntactically <tt/ADDRESS_PATTERN/ with wildcard
|
||||||
|
address part. Certainly, it is undefined for UNIX sockets.
|
||||||
|
|
||||||
|
<sect1> Environment variables
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<tt/ss/ allows to change source of information using various
|
||||||
|
environment variables:
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<itemize>
|
||||||
|
<item> <tt/PROC_SLABINFO/ to override <tt>/proc/slabinfo</tt>
|
||||||
|
<item> <tt/PROC_NET_TCP/ to override <tt>/proc/net/tcp</tt>
|
||||||
|
<item> <tt/PROC_NET_UDP/ to override <tt>/proc/net/udp</tt>
|
||||||
|
<item> etc.
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Variable <tt/PROC_ROOT/ allows to change root of all the <tt>/proc/</tt>
|
||||||
|
hierarchy.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Variable <tt/TCPDIAG_FILE/ prescribes to open a file instead of
|
||||||
|
requesting kernel to dump information about TCP sockets.
|
||||||
|
|
||||||
|
|
||||||
|
<p> This option is used mainly to investigate bug reports,
|
||||||
|
when dumps of files usually found in <tt>/proc/</tt> are recevied
|
||||||
|
by e-mail.
|
||||||
|
|
||||||
|
<sect1> Output format
|
||||||
|
|
||||||
|
<p>Six columns. The first is <tt/Netid/, it denotes socket type and
|
||||||
|
transport protocol, when it is ambiguous: <tt/tcp/, <tt/udp/, <tt/raw/,
|
||||||
|
<tt/u_str/ is abbreviation for <tt/unix_stream/, <tt/u_dgr/ for UNIX
|
||||||
|
datagram sockets, <tt/nl/ for netlink, <tt/p_raw/ and <tt/p_dgr/ for
|
||||||
|
raw and datagram packet sockets. This column is optional, it will
|
||||||
|
be hidden, if filter selects an unique netid.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The second column is <tt/State/. Socket state is displayed here.
|
||||||
|
The names are standard TCP names, except for <tt/UNCONN/, which
|
||||||
|
cannot happen for TCP, but normal for not connected sockets
|
||||||
|
of another types. Again, this column can be hidden.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Then two columns (<tt/Recv-Q/ and <tt/Send-Q/) showing amount of data
|
||||||
|
queued for receive and transmit.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
And the last two columns display local address and port of the socket
|
||||||
|
and its peer address, if the socket is connected.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
If options <tt/-o/, <tt/-e/ or <tt/-p/ were given, options are
|
||||||
|
displayed not in fixed positions but separated by spaces pairs:
|
||||||
|
<tt/option:value/. If value is not a single number, it is presented
|
||||||
|
as list of values, enclosed to <tt/(/ ... <tt/)/ and separated with
|
||||||
|
commas. F.e.
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
timer:(keepalive,111min,0)
|
||||||
|
</verb></tscreen>
|
||||||
|
is typical format for TCP timer (option <tt/-o/).
|
||||||
|
|
||||||
|
<tscreen><verb>
|
||||||
|
users:((X,113,3))
|
||||||
|
</verb></tscreen>
|
||||||
|
is typical for list of users (option <tt/-p/).
|
||||||
|
|
||||||
|
|
||||||
|
<sect>Some numbers
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Well, let us use <tt/pidentd/ and a tool <tt/ibench/ to measure
|
||||||
|
its performance. It is 30 requests per second here. Nothing to test,
|
||||||
|
it is too slow. OK, let us patch pidentd with patch from directory
|
||||||
|
Patches. After this it handles about 4300 requests per second
|
||||||
|
and becomes handy tool to pollute socket tables with lots of timewait
|
||||||
|
buckets.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
So, each test starts from pollution tables with 30000 sockets
|
||||||
|
and then doing full dump of the table piped to wc and measuring
|
||||||
|
timings with time:
|
||||||
|
|
||||||
|
<p>Results:
|
||||||
|
|
||||||
|
<itemize>
|
||||||
|
<item> <tt/netstat -at/ - 15.6 seconds
|
||||||
|
<item> <tt/ss -atr/, but without <tt/tcp_diag/ - 5.4 seconds
|
||||||
|
<item> <tt/ss -atr/ with <tt/tcp_diag/ - 0.47 seconds
|
||||||
|
</itemize>
|
||||||
|
|
||||||
|
No comments. Though one comment is necessary, most of time
|
||||||
|
without <tt/tcp_diag/ is wasted inside kernel with completely
|
||||||
|
blocked networking. More than 10 seconds, yes. <tt/tcp_diag/
|
||||||
|
does the same work for 100 milliseconds of system time.
|
||||||
|
|
||||||
|
</article>
|
||||||
|
|
@ -0,0 +1,514 @@
|
||||||
|
\documentclass[12pt,twoside]{article}
|
||||||
|
|
||||||
|
\usepackage[hidelinks]{hyperref} % \url
|
||||||
|
\usepackage{booktabs} % nicer tabulars
|
||||||
|
\usepackage{fancyvrb}
|
||||||
|
\usepackage{fullpage}
|
||||||
|
\usepackage{float}
|
||||||
|
|
||||||
|
\newcommand{\iface}{\textit}
|
||||||
|
\newcommand{\cmd}{\texttt}
|
||||||
|
\newcommand{\man}{\textit}
|
||||||
|
\newcommand{\qdisc}{\texttt}
|
||||||
|
\newcommand{\filter}{\texttt}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\title{QoS in Linux with TC and Filters}
|
||||||
|
\author{Phil Sutter (phil@nwl.cc)}
|
||||||
|
\date{January 2016}
|
||||||
|
\maketitle
|
||||||
|
|
||||||
|
Standard practice when transmitting packets over a medium which may block (due
|
||||||
|
to congestion, e.g.) is to use a queue which temporarily holds these packets. In
|
||||||
|
Linux, this queueing approach is where QoS happens: A Queueing Discipline
|
||||||
|
(qdisc) holds multiple packet queues with different priorities for dequeueing to
|
||||||
|
the network driver. The classification (i.e. deciding which queue a packet
|
||||||
|
should go into) is typically done based on Type Of Service (IPv4) or Traffic
|
||||||
|
Class (IPv6) header fields but depending on qdisc implementation, might be
|
||||||
|
controlled by the user as well.
|
||||||
|
|
||||||
|
Qdiscs come in two flavors, classful or classless. While classless qdiscs are
|
||||||
|
not as flexible as classful ones, they also require much less customizing. Often
|
||||||
|
it is enough to just attach them to an interface, without exact knowledge of
|
||||||
|
what is done internally. Classful qdiscs are the exact opposite: flexible in
|
||||||
|
application, they are often not even usable without insightful configuration.
|
||||||
|
|
||||||
|
As the name implies, classful qdiscs provide configurable classes to sort
|
||||||
|
traffic into. In it's basic form, this is not much different than, say, the
|
||||||
|
classless \qdisc{pfifo\_fast} which holds three queues and classifies per
|
||||||
|
packet upon priority field. Though typically classes go beyond that by
|
||||||
|
supporting nesting and additional characteristics like e.g. maximum traffic
|
||||||
|
rate or quantum.
|
||||||
|
|
||||||
|
When it comes to controlling the classification process, filters come into play.
|
||||||
|
They attach to the parent of a set of classes (i.e. either the qdisc itself or
|
||||||
|
a parent class) and specify how a packet (or it's associated flow) has to look
|
||||||
|
like in order to suit a given class. To overcome this simplification, it is
|
||||||
|
possible to attach multiple filters to the same parent, which then consults each
|
||||||
|
of them in row until the first one accepts the packet.
|
||||||
|
|
||||||
|
Before getting into detail about what filters there are and how to use them, a
|
||||||
|
simple setup of a qdisc with classes is necessary:
|
||||||
|
\begin{figure}[H]
|
||||||
|
\begin{Verbatim}
|
||||||
|
.-------------------------------------------------------.
|
||||||
|
| |
|
||||||
|
| HTB |
|
||||||
|
| |
|
||||||
|
| .----------------------------------------------------.|
|
||||||
|
| | ||
|
||||||
|
| | Class 1:1 ||
|
||||||
|
| | ||
|
||||||
|
| | .---------------..---------------..---------------.||
|
||||||
|
| | | || || |||
|
||||||
|
| | | Class 1:10 || Class 1:20 || Class 1:30 |||
|
||||||
|
| | | || || |||
|
||||||
|
| | | .------------.|| .------------.|| .------------.|||
|
||||||
|
| | | | ||| | ||| | ||||
|
||||||
|
| | | | fq_codel ||| | fq_codel ||| | fq_codel ||||
|
||||||
|
| | | | ||| | ||| | ||||
|
||||||
|
| | | '------------'|| '------------'|| '------------'|||
|
||||||
|
| | '---------------''---------------''---------------'||
|
||||||
|
| '----------------------------------------------------'|
|
||||||
|
'-------------------------------------------------------'
|
||||||
|
\end{Verbatim}
|
||||||
|
\end{figure}
|
||||||
|
\noindent
|
||||||
|
The following commands establish the basic setup shown:
|
||||||
|
\begin{Verbatim}
|
||||||
|
(1) # tc qdisc replace dev eth0 root handle 1: htb default 30
|
||||||
|
(2) # tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit
|
||||||
|
(3) # alias tclass='tc class add dev eth0 parent 1:1'
|
||||||
|
(4) # tclass classid 1:10 htb rate 1mbit ceil 20mbit prio 1
|
||||||
|
(4) # tclass classid 1:20 htb rate 90mbit ceil 95mbit prio 2
|
||||||
|
(4) # tclass classid 1:30 htb rate 1mbit ceil 95mbit prio 3
|
||||||
|
(5) # tc qdisc add dev eth0 parent 1:10 fq_codel
|
||||||
|
(5) # tc qdisc add dev eth0 parent 1:20 fq_codel
|
||||||
|
(5) # tc qdisc add dev eth0 parent 1:30 fq_codel
|
||||||
|
\end{Verbatim}
|
||||||
|
A little explanation for the unfamiliar reader:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item Replace the root qdisc of \iface{eth0} by an instance of \qdisc{HTB}.
|
||||||
|
Specifying the handle is necessary so it can be referenced in consecutive
|
||||||
|
calls to \cmd{tc}. The default class for unclassified traffic is set to
|
||||||
|
30.
|
||||||
|
\item Create a single top-level class with handle 1:1 which limits the total
|
||||||
|
bandwidth allowed to 95mbit/s. It is assumed that \iface{eth0} is a 100mbit/s link,
|
||||||
|
staying a little below that helps to keep the main point of enqueueing in
|
||||||
|
the qdisc layer instead of the interface hardware queue or at another
|
||||||
|
bottleneck in the network.
|
||||||
|
\item Define an alias for the common part of the remaining three calls in order
|
||||||
|
to improve readability. This means all remaining classes are attached to the
|
||||||
|
common parent class from (2).
|
||||||
|
\item Create three child classes for different uses: Class 1:10 has highest
|
||||||
|
priority but is tightly limited in bandwidth - fine for interactive
|
||||||
|
connections. Class 1:20 has mid priority and high guaranteed bandwidth, for
|
||||||
|
high priority bulk traffic. Finally, there's the default class 1:30 with
|
||||||
|
lowest priority, low guaranteed bandwidth and the ability to use the full
|
||||||
|
link in case it's unused otherwise. This should be fine for uninteresting
|
||||||
|
traffic not explicitly taken care of.
|
||||||
|
\item Attach a leaf qdisc to each of the child classes created in (4). Since
|
||||||
|
\qdisc{HTB} by default attaches \qdisc{pfifo} as leaf qdisc, this step is optional. Still,
|
||||||
|
the fairness between different flows provided by the classless \qdisc{fq\_codel} is
|
||||||
|
worth the effort.
|
||||||
|
\end{enumerate}
|
||||||
|
More information about the qdiscs and fine-tuning parameters can be found in
|
||||||
|
\man{tc-htb(8)} and \man{tc-fq\_codel(8)}.
|
||||||
|
|
||||||
|
Without any additional setup done, now all traffic leaving \iface{eth0} is shaped to
|
||||||
|
95mbit/s and directed through class 1:30. This can be verified by looking at the
|
||||||
|
\texttt{Sent} field of the class statistics printed via \cmd{tc -s class show dev eth0}:
|
||||||
|
Only the root class 1:1 and it's child 1:30 should show any traffic.
|
||||||
|
|
||||||
|
|
||||||
|
\section*{Finally time to start filtering!}
|
||||||
|
|
||||||
|
Let's begin with a simple one, i.e. reestablishing what \qdisc{pfifo\_fast} did
|
||||||
|
automatically based on TOS/Priority field. Linux internally translates the
|
||||||
|
header field into the priority field of struct skbuff, which
|
||||||
|
\qdisc{pfifo\_fast} uses for
|
||||||
|
classification. \man{tc-prio(8)} contains a table listing the priority (and
|
||||||
|
ultimately, \qdisc{pfifo\_fast} queue index) each TOS value is being translated into.
|
||||||
|
Here is a shorter version:
|
||||||
|
\begin{center}
|
||||||
|
\begin{tabular}{lll}
|
||||||
|
TOS Values & Linux Priority (Number) & Queue Index \\
|
||||||
|
\midrule
|
||||||
|
0x0 - 0x6 & Best Effort (0) & 1 \\
|
||||||
|
0x8 - 0xe & Bulk (2) & 2 \\
|
||||||
|
0x10 - 0x16 & Interactive (6) & 0 \\
|
||||||
|
0x18 - 0x1e & Interactive Bulk (4) & 1 \\
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
Using the \filter{basic} filter, it is possible to match packets based on that skbuff
|
||||||
|
field, which has the added benefit of being IP version agnostic. Since the
|
||||||
|
\qdisc{HTB} setup above defaults to class ID 1:30, the Bulk priority can be
|
||||||
|
ignored. The \filter{basic} filter allows to combine matches, therefore we get along
|
||||||
|
with only two filters:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc filter add dev eth0 parent 1: basic \
|
||||||
|
match 'meta(priority eq 6)' classid 1:10
|
||||||
|
# tc filter add dev eth0 parent 1: basic \
|
||||||
|
match 'meta(priority eq 0)' \
|
||||||
|
or 'meta(priority eq 4)' classid 1:20
|
||||||
|
\end{Verbatim}
|
||||||
|
A detailed description of the \filter{basic} filter and the ematch syntax it uses can be
|
||||||
|
found in \man{tc-basic(8)} and \man{tc-ematch(8)}.
|
||||||
|
|
||||||
|
Obviously, this first example cries for optimization. A simple one would be to
|
||||||
|
just change the default class from 1:30 to 1:20, so filters are only needed for
|
||||||
|
Bulk and Interactive priorities:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc filter add dev eth0 parent 1: basic \
|
||||||
|
match 'meta(priority eq 6)' classid 1:10
|
||||||
|
# tc filter add dev eth0 parent 1: basic \
|
||||||
|
match 'meta(priority eq 2)' classid 1:20
|
||||||
|
\end{Verbatim}
|
||||||
|
Given that class IDs are random, choosing them wisely allows for a direct
|
||||||
|
mapping. So first, recreate the qdisc and classes configuration:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc qdisc replace dev eth0 root handle 1: htb default 10
|
||||||
|
# tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit
|
||||||
|
# alias tclass='tc class add dev eth0 parent 1:1'
|
||||||
|
# tclass classid 1:16 htb rate 1mbit ceil 20mbit prio 1
|
||||||
|
# tclass classid 1:10 htb rate 90mbit ceil 95mbit prio 2
|
||||||
|
# tclass classid 1:12 htb rate 1mbit ceil 95mbit prio 3
|
||||||
|
# tc qdisc add dev eth0 parent 1:16 fq_codel
|
||||||
|
# tc qdisc add dev eth0 parent 1:10 fq_codel
|
||||||
|
# tc qdisc add dev eth0 parent 1:12 fq_codel
|
||||||
|
\end{Verbatim}
|
||||||
|
This is basically identical to above, but with changed leaf class IDs and the
|
||||||
|
second priority class being the default. Using the \filter{flow} filter with it's \texttt{map}
|
||||||
|
functionality, a single filter command is enough:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc filter add dev eth0 parent 1: handle 0x1337 flow \
|
||||||
|
map key priority baseclass 1:10
|
||||||
|
\end{Verbatim}
|
||||||
|
The \filter{flow} filter now uses the priority value to construct a destination class ID
|
||||||
|
by adding it to the value of \texttt{baseclass}. While this works for priority values of
|
||||||
|
0, 2 and 6, it will result in non-existent class ID 1:14 for Interactive Bulk
|
||||||
|
traffic. In that case, the \qdisc{HTB} default applies so that traffic goes into class
|
||||||
|
ID 1:10 just as intended. Please note that specifying a handle is a mandatory
|
||||||
|
requirement by the \filter{flow} filter, although I didn't see where one would use that
|
||||||
|
later. For more information about \filter{flow}, see \man{tc-flow(8)}.
|
||||||
|
|
||||||
|
While \filter{flow} and \filter{basic} filters are relatively easy to apply and understand, they
|
||||||
|
are as well quite limited to their intended purpose. A more flexible option is
|
||||||
|
the \filter{u32} filter, which allows to match on arbitrary parts of the packet data -
|
||||||
|
yet only on that, not any meta data associated to it by the kernel (with the
|
||||||
|
exception of firewall mark value). So in order to continue this little
|
||||||
|
exercise with \filter{u32}, we have to base classification directly upon the actual TOS
|
||||||
|
value. An intuitive attempt might look like this:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# alias tcfilter='tc filter add dev eth0 parent 1:'
|
||||||
|
# tcfilter u32 match ip dsfield 0x10 0x1e classid 1:16
|
||||||
|
# tcfilter u32 match ip dsfield 0x12 0x1e classid 1:16
|
||||||
|
# tcfilter u32 match ip dsfield 0x14 0x1e classid 1:16
|
||||||
|
# tcfilter u32 match ip dsfield 0x16 0x1e classid 1:16
|
||||||
|
# tcfilter u32 match ip dsfield 0x8 0x1e classid 1:12
|
||||||
|
# tcfilter u32 match ip dsfield 0xa 0x1e classid 1:12
|
||||||
|
# tcfilter u32 match ip dsfield 0xc 0x1e classid 1:12
|
||||||
|
# tcfilter u32 match ip dsfield 0xe 0x1e classid 1:12
|
||||||
|
\end{Verbatim}
|
||||||
|
The obvious drawback here is the amount of filters needed. And without the
|
||||||
|
default class, eight more filters would be necessary. This also has performance
|
||||||
|
implications: A packet with TOS value 0xe will be checked eight times in total
|
||||||
|
in order to determine it's destination class. While there's not much to be done
|
||||||
|
about the number of filters, at least the performance problem can be eliminated
|
||||||
|
by using \filter{u32}'s hash table support:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc filter add dev eth0 parent 1: prio 99 handle 1: u32 divisor 16
|
||||||
|
\end{Verbatim}
|
||||||
|
This creates a hash table with 16 buckets. The table size is arbitrary, but not
|
||||||
|
random: Since the first bit of the TOS field is not interesting, it can be
|
||||||
|
ignored and therefore the range of values to consider is just [0;15], i.e. a
|
||||||
|
number of 16 different values. The next step is to populate the hash table:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# alias tcfilter='tc filter add dev eth0 parent 1: prio 99'
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:0: classid 1:16
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:1: classid 1:16
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:2: classid 1:16
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:3: classid 1:16
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:4: classid 1:12
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:5: classid 1:12
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:6: classid 1:12
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:7: classid 1:12
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:8: classid 1:16
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:9: classid 1:16
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:a: classid 1:16
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:b: classid 1:16
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:c: classid 1:10
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:d: classid 1:10
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:e: classid 1:10
|
||||||
|
# tcfilter u32 match u8 0 0 ht 1:f: classid 1:10
|
||||||
|
\end{Verbatim}
|
||||||
|
The parameter \texttt{ht} denotes the hash table and bucket the filter should be added
|
||||||
|
to. Since the first TOS bit is ignored, it's value has to be divided by two in
|
||||||
|
order to get to the bucket it maps to. E.g. a TOS value of 0x10 will therefore
|
||||||
|
map to bucket 0x8. For the sake of completeness, all possible values are mapped
|
||||||
|
and therefore a configurable default class is not required. Note that the used
|
||||||
|
match expression is not necessary, but mandatory. Therefore anything that
|
||||||
|
matches any packet will suffice. Finally, a filter which links to the defined
|
||||||
|
hash table is needed:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc filter add dev eth0 parent 1: prio 1 protocol ip u32 \
|
||||||
|
link 1: hashkey mask 0x001e0000 match u8 0 0
|
||||||
|
\end{Verbatim}
|
||||||
|
Here again, the actual match statement is not necessary, but syntactically
|
||||||
|
required. All the magic lies within the \texttt{hashkey} parameter, which defines which
|
||||||
|
part of the packet should be used directly as hash key. Here's a drawing of the
|
||||||
|
first four bytes of the IPv4 header, with the area selected by \texttt{hashkey mask}
|
||||||
|
highlighted:
|
||||||
|
\begin{figure}[H]
|
||||||
|
\begin{Verbatim}
|
||||||
|
0 1 2 3
|
||||||
|
.-----------------------------------------------------------------.
|
||||||
|
| | | ######## | | |
|
||||||
|
| Version| IHL | #DSCP### | ECN| Total Length |
|
||||||
|
| | | ######## | | |
|
||||||
|
'-----------------------------------------------------------------'
|
||||||
|
\end{Verbatim}
|
||||||
|
\end{figure}
|
||||||
|
\noindent
|
||||||
|
This may look confusing at first, but keep in mind that bit- as well as
|
||||||
|
byte-ordering here is LSB while the mask value is written in MSB we humans use.
|
||||||
|
Therefore reading the mask is done like so, starting from left:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item Skip the first byte (which contains Version and IHL fields).
|
||||||
|
\item Skip the lowest bit of the second byte (0x1e is even).
|
||||||
|
\item Mark the four following bits (0x1e is 11110 in binary).
|
||||||
|
\item Skip the remaining three bits of the second byte as well as the remaining two
|
||||||
|
bytes.
|
||||||
|
\end{enumerate}
|
||||||
|
Before doing the lookup, the kernel right-shifts the masked value by the amount
|
||||||
|
of zero-bits in \texttt{mask}, which implicitly also does the division by two which the
|
||||||
|
hash table depends on. With this setup, every packet has to pass exactly two
|
||||||
|
filters to be classified. Note that this filter is limited to IPv4 packets: Due
|
||||||
|
to the related Traffic Class field being at a different offset in the packet, it
|
||||||
|
would not work for IPv6. To use the same setup for IPv6 as well, a second
|
||||||
|
entry-level filter is necessary:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc filter add dev eth0 parent 1: prio 2 protocol ipv6 u32 \
|
||||||
|
link 1: hashkey mask 0x01e00000 match u8 0 0
|
||||||
|
\end{Verbatim}
|
||||||
|
For illustration purposes, here again is a drawing of the first four bytes of
|
||||||
|
the IPv6 header, again with masked area highlighted:
|
||||||
|
\begin{figure}[H]
|
||||||
|
\begin{Verbatim}
|
||||||
|
0 1 2 3
|
||||||
|
.-----------------------------------------------------------------.
|
||||||
|
| | ######## | |
|
||||||
|
| Version| #Traffic Class| Flow Label |
|
||||||
|
| | ######## | |
|
||||||
|
'-----------------------------------------------------------------'
|
||||||
|
\end{Verbatim}
|
||||||
|
\end{figure}
|
||||||
|
\noindent
|
||||||
|
Reading the mask value is analogous to IPv4 with the added complexity that
|
||||||
|
Traffic Class spans over two bytes. Yet, for comparison there's a simple trick:
|
||||||
|
IPv6 has the interesting field shifted by four bits to the left, and the new
|
||||||
|
mask's value is shifted by the same amount. For further information about
|
||||||
|
\filter{u32} and what can be done with it, consult it's man page
|
||||||
|
\man{tc-u32(8)}.
|
||||||
|
|
||||||
|
Of course, the kernel provides many more filters than just \filter{basic},
|
||||||
|
\filter{flow} and \filter{u32} which have been presented above. As of now, the
|
||||||
|
remaining ones are:
|
||||||
|
\begin{description}
|
||||||
|
\item[bpf]
|
||||||
|
Filtering using Berkeley Packet Filter programs. The program's return
|
||||||
|
code determines the packet's destination class ID.
|
||||||
|
|
||||||
|
\item[cgroup]
|
||||||
|
Filter packets based on control groups. This is only useful for packets
|
||||||
|
originating from the local host, as control groups only exist in that
|
||||||
|
scope.
|
||||||
|
|
||||||
|
\item[flower]
|
||||||
|
An extended variant of the flow filter.
|
||||||
|
|
||||||
|
\item[fw]
|
||||||
|
Matches on firewall mark values previously assigned to the packet by
|
||||||
|
netfilter (or a filter action, see below for details). This allows to
|
||||||
|
export the classification algorithm into netfilter, which is very
|
||||||
|
convenient if appropriate rules exist on the same system in there
|
||||||
|
already.
|
||||||
|
|
||||||
|
\item[route]
|
||||||
|
Filter packets based on matching routing table entry. Basically
|
||||||
|
equivalent to the \texttt{fw} filter above, to make use of an already existing
|
||||||
|
extensive routing table setup.
|
||||||
|
|
||||||
|
\item[rsvp, rsvp6]
|
||||||
|
Implementation of the Resource Reservation Protocol in Linux, to react
|
||||||
|
upon requests sent by an RSVP daemon.
|
||||||
|
|
||||||
|
\item[tcindex]
|
||||||
|
Match packets based on tcindex value, which is usually set by the dsmark
|
||||||
|
qdisc. This is part of an approach to support Differentiated Services in
|
||||||
|
Linux, which is another topic on it's own.
|
||||||
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
|
\section*{Filter Actions}
|
||||||
|
|
||||||
|
The tc filter framework provides the infrastructure to another extensible set of
|
||||||
|
tools as well, namely tc actions. As the name suggests, they allow to do things
|
||||||
|
with packets (or associated data). (The list of) Actions are part of a given
|
||||||
|
filter. If it matches, each action it contains is executed in order before
|
||||||
|
returning the classification result. Since the action has direct access to the
|
||||||
|
latter, it is in theory possible for an action to react upon or even change the
|
||||||
|
filtering result - as long as the packet matched, of course. Yet none of the
|
||||||
|
currently in-tree actions make use of this.
|
||||||
|
|
||||||
|
The Generic Actions framework originally evolved out of the filters' ability to
|
||||||
|
police traffic to a given maximum bandwidth. One common use case for that is to
|
||||||
|
limit ingress traffic, dropping packets which exceed the threshold. A classic
|
||||||
|
setup example is like so:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc qdisc add dev eth0 handle ffff: ingress
|
||||||
|
# tc filter add dev eth0 parent ffff: u32 \
|
||||||
|
match u32 0 0
|
||||||
|
police rate 1mbit burst 100k
|
||||||
|
\end{Verbatim}
|
||||||
|
The ingress qdisc is not a real one, but merely a point of reference for filters
|
||||||
|
to attach to which should get applied to incoming traffic. The \filter{u32} filter added
|
||||||
|
above matches on any packet and therefore limits the total incoming bandwidth to
|
||||||
|
1mbit/s, allowing bursts of up to 100kbytes. Using the new syntax, the filter
|
||||||
|
command changes slightly:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc filter add dev eth0 parent ffff: u32 \
|
||||||
|
match u32 0 0 \
|
||||||
|
action police rate 1mbit burst 100k
|
||||||
|
\end{Verbatim}
|
||||||
|
The important detail is that this syntax allows to define multiple actions.
|
||||||
|
E.g. for testing purposes, it is possible to redirect exceeding traffic to the
|
||||||
|
loopback interface instead of dropping it:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc filter add dev eth0 parent ffff: u32 \
|
||||||
|
match u32 0 0 \
|
||||||
|
action police rate 1mbit burst 100k conform-exceed pipe \
|
||||||
|
action mirred egress redirect dev lo
|
||||||
|
\end{Verbatim}
|
||||||
|
The added parameter \texttt{conform-exceed pipe} tells the police action to allow for
|
||||||
|
further actions to handle the exceeding packet.
|
||||||
|
|
||||||
|
Apart from \texttt{police} and \texttt{mirred} actions, there are a few more. Here's a full
|
||||||
|
list of the currently implemented ones:
|
||||||
|
\begin{description}
|
||||||
|
\item[bpf]
|
||||||
|
Apply a Berkeley Packet Filter program to the packet.
|
||||||
|
|
||||||
|
\item[connmark]
|
||||||
|
Set the packet's firewall mark to that of it's connection. This works by
|
||||||
|
searching the conntrack table for a matching entry. If found, the mark
|
||||||
|
is restored.
|
||||||
|
|
||||||
|
\item[csum]
|
||||||
|
Trigger recalculation of packet checksums. The supported protocols are:
|
||||||
|
IPv4, ICMP, IGMP, TCP, UDP and UDPLite.
|
||||||
|
|
||||||
|
\item[ipt]
|
||||||
|
Pass the packet to an iptables target. This allows to use iptables
|
||||||
|
extensions directly instead of having to go the extra mile via setting
|
||||||
|
an arbitrary firewall mark and matching on that from within netfilter.
|
||||||
|
|
||||||
|
\item[mirred]
|
||||||
|
Mirror or redirect packets. This is often combined with the ifb pseudo
|
||||||
|
device to share a common QoS setup between multiple interfaces or even
|
||||||
|
ingress traffic.
|
||||||
|
|
||||||
|
\item[nat]
|
||||||
|
Perform stateless Native Address Translation. This is certainly not
|
||||||
|
complete and therefore inferior to NAT using iptables: Although the
|
||||||
|
kernel module decides between TCP, UDP and ICMP traffic, it does not
|
||||||
|
handle typical problematic protocols such as active FTP or SIP.
|
||||||
|
|
||||||
|
\item[pedit]
|
||||||
|
Generic packet editing. This allows to alter arbitrary bytes of the
|
||||||
|
packet, either by specifying an offset into the packet or by naming a
|
||||||
|
packet header and field name to change. Currently, the latter is
|
||||||
|
implemented only for IPv4 yet.
|
||||||
|
|
||||||
|
\item[police]
|
||||||
|
Apply a bandwidth rate limiting policy. Packets exceeding it are dropped
|
||||||
|
by default, but may optionally be handled differently.
|
||||||
|
|
||||||
|
\item[simple]
|
||||||
|
This is rather an example than real action. All it does is print a
|
||||||
|
user-defined string together with a packet counter. Useful maybe for
|
||||||
|
debugging when filter statistics are not available or too complicated.
|
||||||
|
|
||||||
|
\item[skbedit]
|
||||||
|
Edit associated packet data, supports changing queue mapping, priority
|
||||||
|
field and firewall mark value.
|
||||||
|
|
||||||
|
\item[vlan]
|
||||||
|
Add/remove a VLAN header to/from the packet. This might serve as
|
||||||
|
alternative to using 802.1Q pseudo-interfaces in combination with
|
||||||
|
routing rules when e.g. packets for a given destination need to be
|
||||||
|
encapsulated.
|
||||||
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
|
\section*{Intermediate Functional Block}
|
||||||
|
|
||||||
|
The Intermediate Functional Block (\texttt{ifb}) pseudo network interface acts as a QoS
|
||||||
|
concentrator for multiple different sources of traffic. Packets from or to other
|
||||||
|
interfaces have to be redirected to it using the \texttt{mirred} action in order to be
|
||||||
|
handled, regularly routed traffic will be dropped. This way, a single stack of
|
||||||
|
qdiscs, classes and filters can be shared between multiple interfaces.
|
||||||
|
|
||||||
|
Here's a simple example to feed incoming traffic from multiple interfaces
|
||||||
|
through a Stochastic Fairness Queue (\qdisc{sfq}):
|
||||||
|
\begin{Verbatim}
|
||||||
|
(1) # modprobe ifb
|
||||||
|
(2) # ip link set ifb0 up
|
||||||
|
(3) # tc qdisc add dev ifb0 root sfq
|
||||||
|
\end{Verbatim}
|
||||||
|
The first step is to load the \texttt{ifb} kernel module (1). By default, this will
|
||||||
|
create two ifb devices: \iface{ifb0} and \iface{ifb1}. After setting
|
||||||
|
\iface{ifb0} up in (2), the root
|
||||||
|
qdisc is replaced by \qdisc{sfq} in (3). Finally, one can start redirecting ingress
|
||||||
|
traffic to \iface{ifb0}, e.g. from \iface{eth0}:
|
||||||
|
\begin{Verbatim}
|
||||||
|
# tc qdisc add dev eth0 handle ffff: ingress
|
||||||
|
# tc filter add dev eth0 parent ffff: u32 \
|
||||||
|
match u32 0 0 \
|
||||||
|
action mirred egress redirect dev ifb0
|
||||||
|
\end{Verbatim}
|
||||||
|
The same can be done for other interfaces, just replacing \iface{eth0} in the two
|
||||||
|
commands above. One thing to keep in mind here is the asymmetrical routing this
|
||||||
|
creates within the host doing the QoS: Incoming packets enter the system via
|
||||||
|
\iface{ifb0}, while corresponding replies leave directly via \iface{eth0}. This can be observed
|
||||||
|
using \cmd{tcpdump} on \iface{ifb0}, which shows the input part of the traffic only. What's
|
||||||
|
more confusing is that \cmd{tcpdump} on \iface{eth0} shows both incoming and outgoing traffic,
|
||||||
|
but the redirection is still effective - a simple prove is setting
|
||||||
|
\iface{ifb0} down,
|
||||||
|
which will interrupt the communication. Obviously \cmd{tcpdump} catches the packets to
|
||||||
|
dump before they enter the ingress qdisc, which is why it sees them while the
|
||||||
|
kernel itself doesn't.
|
||||||
|
|
||||||
|
|
||||||
|
\section*{Conclusion}
|
||||||
|
|
||||||
|
Once the steep learning curve has been mastered, the conglomerate of (classful)
|
||||||
|
qdiscs, filters and actions provides a highly sophisticated and flexible
|
||||||
|
infrastructure to perform QoS, which plays nicely along with routing and
|
||||||
|
firewalling setups.
|
||||||
|
|
||||||
|
|
||||||
|
\section*{Further Reading}
|
||||||
|
|
||||||
|
A good starting point for novice users and experienced ones diving into unknown
|
||||||
|
areas is the extensive HOWTO at \url{http://lartc.org}. The iproute2 package ships
|
||||||
|
some examples (usually in /usr/share/doc/, depending on distribution) as well as
|
||||||
|
man pages for \cmd{tc} in general, qdiscs and filters. The latter have been added
|
||||||
|
just recently though, so if your distribution does not ship iproute2 version
|
||||||
|
4.3.0 yet, these are not in there. Apart from that, the internet is a spring of
|
||||||
|
HOWTOs and scripts people wrote - though these should be taken with a grain of
|
||||||
|
salt: The complexity of the matter often leads to copying others' solutions
|
||||||
|
without much validation, which allows for less optimal or even obsolete
|
||||||
|
implementations to survive much longer than desired.
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
@ -5,4 +5,3 @@
|
||||||
4 meta
|
4 meta
|
||||||
7 canid
|
7 canid
|
||||||
8 ipset
|
8 ipset
|
||||||
9 ipt
|
|
||||||
|
|
|
||||||
|
|
@ -14,12 +14,18 @@
|
||||||
13 dnrouted
|
13 dnrouted
|
||||||
14 xorp
|
14 xorp
|
||||||
15 ntk
|
15 ntk
|
||||||
16 dhcp
|
16 dhcp
|
||||||
18 keepalived
|
|
||||||
42 babel
|
42 babel
|
||||||
99 openr
|
|
||||||
186 bgp
|
#
|
||||||
187 isis
|
# Used by me for gated
|
||||||
188 ospf
|
#
|
||||||
189 rip
|
254 gated/aggr
|
||||||
192 eigrp
|
253 gated/bgp
|
||||||
|
252 gated/ospf
|
||||||
|
251 gated/ospfase
|
||||||
|
250 gated/rip
|
||||||
|
249 gated/static
|
||||||
|
248 gated/conn
|
||||||
|
247 gated/inet
|
||||||
|
246 gated/default
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,122 @@
|
||||||
|
# CHANGES
|
||||||
|
# -------
|
||||||
|
# v0.3a2- fixed bug in "if" operator. Thanks kad@dgtu.donetsk.ua.
|
||||||
|
# v0.3a- added TIME parameter. Example:
|
||||||
|
# TIME=00:00-19:00;64Kbit/6Kbit
|
||||||
|
# So, between 00:00 and 19:00 RATE will be 64Kbit.
|
||||||
|
# Just start "cbq.init timecheck" periodically from cron (every 10
|
||||||
|
# minutes for example).
|
||||||
|
# !!! Anyway you MUST start "cbq.init start" for CBQ initialize.
|
||||||
|
# v0.2 - Some cosmetique changes. Now it more compatible with
|
||||||
|
# old bash version. Thanks to Stanislav V. Voronyi
|
||||||
|
# <stas@cnti.uanet.kharkov.ua>.
|
||||||
|
# v0.1 - First public release
|
||||||
|
#
|
||||||
|
# README
|
||||||
|
# ------
|
||||||
|
#
|
||||||
|
# First of all - this is just a SIMPLE EXAMPLE of CBQ power.
|
||||||
|
# Don't ask me "why" and "how" :)
|
||||||
|
#
|
||||||
|
# This is an example of using CBQ (Class Based Queueing) and policy-based
|
||||||
|
# filter for building smart ethernet shapers. All CBQ parameters are
|
||||||
|
# correct only for ETHERNET (eth0,1,2..) linux interfaces. It works for
|
||||||
|
# ARCNET too (just set bandwidth parameter to 2Mbit). It was tested
|
||||||
|
# on 2.1.125-2.1.129 linux kernels (KSI linux, Nostromo version) and
|
||||||
|
# ip-route utility by A.Kuznetsov (iproute2-ss981101 version).
|
||||||
|
# You can download ip-route from ftp://ftp.inr.ac.ru/ip-routing or
|
||||||
|
# get iproute2*.rpm (compiled with glibc) from ftp.ksi-linux.com.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# HOW IT WORKS
|
||||||
|
#
|
||||||
|
# Each shaper must be described by config file in $CBQ_PATH
|
||||||
|
# (/etc/sysconfig/cbq/) directory - one config file for each CBQ shaper.
|
||||||
|
#
|
||||||
|
# Some words about config file name:
|
||||||
|
# Each shaper has its personal ID - two byte HEX number. Really ID is
|
||||||
|
# CBQ class.
|
||||||
|
# So, filename looks like:
|
||||||
|
#
|
||||||
|
# cbq-1280.My_first_shaper
|
||||||
|
# ^^^ ^^^ ^^^^^^^^^^^^^
|
||||||
|
# | | |______ Shaper name - any word
|
||||||
|
# | |___________________ ID (0000-FFFF), let ID looks like shaper's rate
|
||||||
|
# |______________________ Filename must begin from "cbq-"
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Config file describes shaper parameters and source[destination]
|
||||||
|
# address[port].
|
||||||
|
# For example let's prepare /etc/sysconfig/cbq/cbq-1280.My_first_shaper:
|
||||||
|
#
|
||||||
|
# ----------8<---------------------
|
||||||
|
# DEVICE=eth0,10Mbit,1Mbit
|
||||||
|
# RATE=128Kbit
|
||||||
|
# WEIGHT=10Kbit
|
||||||
|
# PRIO=5
|
||||||
|
# RULE=192.168.1.0/24
|
||||||
|
# ----------8<---------------------
|
||||||
|
#
|
||||||
|
# This is minimal configuration, where:
|
||||||
|
# DEVICE: eth0 - device where we do control our traffic
|
||||||
|
# 10Mbit - REAL ethernet card bandwidth
|
||||||
|
# 1Mbit - "weight" of :1 class (parent for all shapers for eth0),
|
||||||
|
# as a rule of thumb weight=batdwidth/10.
|
||||||
|
# 100Mbit adapter's example: DEVICE=eth0,100Mbit,10Mbit
|
||||||
|
# *** If you want to build more than one shaper per device it's
|
||||||
|
# enough to describe bandwidth and weight once - cbq.init
|
||||||
|
# is smart :) You can put only 'DEVICE=eth0' into cbq-*
|
||||||
|
# config file for eth0.
|
||||||
|
#
|
||||||
|
# RATE: Shaper's speed - Kbit,Mbit or bps (bytes per second)
|
||||||
|
#
|
||||||
|
# WEIGHT: "weight" of shaper (CBQ class). Like for DEVICE - approx. RATE/10
|
||||||
|
#
|
||||||
|
# PRIO: shaper's priority from 1 to 8 where 1 is the highest one.
|
||||||
|
# I do always use "5" for all my shapers.
|
||||||
|
#
|
||||||
|
# RULE: [source addr][:source port],[dest addr][:dest port]
|
||||||
|
# Some examples:
|
||||||
|
# RULE=10.1.1.0/24:80 - all traffic for network 10.1.1.0 to port 80
|
||||||
|
# will be shaped.
|
||||||
|
# RULE=10.2.2.5 - shaper works only for IP address 10.2.2.5
|
||||||
|
# RULE=:25,10.2.2.128/25:5000 - all traffic from any address and port 25 to
|
||||||
|
# address 10.2.2.128 - 10.2.2.255 and port 5000
|
||||||
|
# will be shaped.
|
||||||
|
# RULE=10.5.5.5:80, - shaper active only for traffic from port 80 of
|
||||||
|
# address 10.5.5.5
|
||||||
|
# Multiple RULE fields per one config file are allowed. For example:
|
||||||
|
# RULE=10.1.1.2:80
|
||||||
|
# RULE=10.1.1.2:25
|
||||||
|
# RULE=10.1.1.2:110
|
||||||
|
#
|
||||||
|
# *** ATTENTION!!!
|
||||||
|
# All shapers do work only for outgoing traffic!
|
||||||
|
# So, if you want to build bidirectional shaper you must set it up for
|
||||||
|
# both ethernet card. For example let's build shaper for our linux box like:
|
||||||
|
#
|
||||||
|
# --------- 192.168.1.1
|
||||||
|
# BACKBONE -----eth0-| linux |-eth1------*[our client]
|
||||||
|
# ---------
|
||||||
|
#
|
||||||
|
# Let all traffic from backbone to client will be shaped at 28Kbit and
|
||||||
|
# traffic from client to backbone - at 128Kbit. We need two config files:
|
||||||
|
#
|
||||||
|
# ---8<-----/etc/sysconfig/cbq/cbq-28.client-out----
|
||||||
|
# DEVICE=eth1,10Mbit,1Mbit
|
||||||
|
# RATE=28Kbit
|
||||||
|
# WEIGHT=2Kbit
|
||||||
|
# PRIO=5
|
||||||
|
# RULE=192.168.1.1
|
||||||
|
# ---8<---------------------------------------------
|
||||||
|
#
|
||||||
|
# ---8<-----/etc/sysconfig/cbq/cbq-128.client-in----
|
||||||
|
# DEVICE=eth0,10Mbit,1Mbit
|
||||||
|
# RATE=128Kbit
|
||||||
|
# WEIGHT=10Kbit
|
||||||
|
# PRIO=5
|
||||||
|
# RULE=192.168.1.1,
|
||||||
|
# ---8<---------------------------------------------
|
||||||
|
# ^pay attention to "," - this is source address!
|
||||||
|
#
|
||||||
|
# Enjoy.
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
#! /bin/sh -x
|
||||||
|
#
|
||||||
|
# sample script on using the ingress capabilities
|
||||||
|
# this script shows how one can rate limit incoming SYNs
|
||||||
|
# Useful for TCP-SYN attack protection. You can use
|
||||||
|
# IPchains to have more powerful additions to the SYN (eg
|
||||||
|
# in addition the subnet)
|
||||||
|
#
|
||||||
|
#path to various utilities;
|
||||||
|
#change to reflect yours.
|
||||||
|
#
|
||||||
|
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||||
|
TC=$IPROUTE/tc/tc
|
||||||
|
IP=$IPROUTE/ip/ip
|
||||||
|
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||||
|
INDEV=eth2
|
||||||
|
#
|
||||||
|
# tag all incoming SYN packets through $INDEV as mark value 1
|
||||||
|
############################################################
|
||||||
|
$IPCHAINS -A input -i $INDEV -y -m 1
|
||||||
|
############################################################
|
||||||
|
#
|
||||||
|
# install the ingress qdisc on the ingress interface
|
||||||
|
############################################################
|
||||||
|
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# SYN packets are 40 bytes (320 bits) so three SYNs equals
|
||||||
|
# 960 bits (approximately 1kbit); so we rate limit below
|
||||||
|
# the incoming SYNs to 3/sec (not very sueful really; but
|
||||||
|
#serves to show the point - JHS
|
||||||
|
############################################################
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 1 fw \
|
||||||
|
police rate 1kbit burst 40 mtu 9k drop flowid :1
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
echo "---- qdisc parameters Ingress ----------"
|
||||||
|
$TC qdisc ls dev $INDEV
|
||||||
|
echo "---- Class parameters Ingress ----------"
|
||||||
|
$TC class ls dev $INDEV
|
||||||
|
echo "---- filter parameters Ingress ----------"
|
||||||
|
$TC filter ls dev $INDEV parent ffff:
|
||||||
|
|
||||||
|
#deleting the ingress qdisc
|
||||||
|
#$TC qdisc del $INDEV ingress
|
||||||
|
|
@ -1,18 +1,13 @@
|
||||||
eBPF toy code examples (running in kernel) to familiarize yourself
|
eBPF toy code examples (running in kernel) to familiarize yourself
|
||||||
with syntax and features:
|
with syntax and features:
|
||||||
|
|
||||||
- BTF defined map examples
|
- bpf_prog.c -> Classifier examples with using maps
|
||||||
|
- bpf_shared.c -> Ingress/egress map sharing example
|
||||||
|
- bpf_tailcall.c -> Using tail call chains
|
||||||
|
- bpf_cyclic.c -> Simple cycle as tail calls
|
||||||
- bpf_graft.c -> Demo on altering runtime behaviour
|
- bpf_graft.c -> Demo on altering runtime behaviour
|
||||||
- bpf_shared.c -> Ingress/egress map sharing example
|
|
||||||
- bpf_map_in_map.c -> Using map in map example
|
|
||||||
|
|
||||||
- legacy struct bpf_elf_map defined map examples
|
User space code example:
|
||||||
- legacy/bpf_shared.c -> Ingress/egress map sharing example
|
|
||||||
- legacy/bpf_tailcall.c -> Using tail call chains
|
|
||||||
- legacy/bpf_cyclic.c -> Simple cycle as tail calls
|
|
||||||
- legacy/bpf_graft.c -> Demo on altering runtime behaviour
|
|
||||||
- legacy/bpf_map_in_map.c -> Using map in map example
|
|
||||||
|
|
||||||
Note: Users should use new BTF way to defined the maps, the examples
|
- bpf_agent.c -> Counterpart to bpf_prog.c for user
|
||||||
in legacy folder which is using struct bpf_elf_map defined maps is not
|
space to transfer/read out map data
|
||||||
recommanded.
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,258 @@
|
||||||
|
/*
|
||||||
|
* eBPF user space agent part
|
||||||
|
*
|
||||||
|
* Simple, _self-contained_ user space agent for the eBPF kernel
|
||||||
|
* ebpf_prog.c program, which gets all map fds passed from tc via unix
|
||||||
|
* domain socket in one transaction and can thus keep referencing
|
||||||
|
* them from user space in order to read out (or possibly modify)
|
||||||
|
* map data. Here, just as a minimal example to display counters.
|
||||||
|
*
|
||||||
|
* The agent only uses the bpf(2) syscall API to read or possibly
|
||||||
|
* write to eBPF maps, it doesn't need to be aware of the low-level
|
||||||
|
* bytecode parts and/or ELF parsing bits.
|
||||||
|
*
|
||||||
|
* ! For more details, see header comment in bpf_prog.c !
|
||||||
|
*
|
||||||
|
* gcc bpf_agent.c -o bpf_agent -Wall -O2
|
||||||
|
*
|
||||||
|
* For example, a more complex user space agent could run on each
|
||||||
|
* host, reading and writing into eBPF maps used by tc classifier
|
||||||
|
* and actions. It would thus allow for implementing a distributed
|
||||||
|
* tc architecture, for example, which would push down central
|
||||||
|
* policies into eBPF maps, and thus altering run-time behaviour.
|
||||||
|
*
|
||||||
|
* -- Happy eBPF hacking! ;)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include <sys/un.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
|
||||||
|
/* Just some misc macros as min(), offsetof(), etc. */
|
||||||
|
#include "../../include/utils.h"
|
||||||
|
/* Common code from fd passing. */
|
||||||
|
#include "../../include/bpf_scm.h"
|
||||||
|
/* Common, shared definitions with ebpf_prog.c */
|
||||||
|
#include "bpf_shared.h"
|
||||||
|
/* Mini syscall wrapper */
|
||||||
|
#include "bpf_sys.h"
|
||||||
|
|
||||||
|
static void bpf_dump_drops(int fd)
|
||||||
|
{
|
||||||
|
int cpu, max;
|
||||||
|
|
||||||
|
max = sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
|
||||||
|
printf(" `- number of drops:");
|
||||||
|
for (cpu = 0; cpu < max; cpu++) {
|
||||||
|
long drops;
|
||||||
|
|
||||||
|
assert(bpf_lookup_elem(fd, &cpu, &drops) == 0);
|
||||||
|
printf("\tcpu%d: %5ld", cpu, drops);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_dump_queue(int fd)
|
||||||
|
{
|
||||||
|
/* Just for the same of the example. */
|
||||||
|
int max_queue = 4, i;
|
||||||
|
|
||||||
|
printf(" | nic queues:");
|
||||||
|
for (i = 0; i < max_queue; i++) {
|
||||||
|
struct count_queue cq;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
memset(&cq, 0, sizeof(cq));
|
||||||
|
ret = bpf_lookup_elem(fd, &i, &cq);
|
||||||
|
assert(ret == 0 || (ret < 0 && errno == ENOENT));
|
||||||
|
|
||||||
|
printf("\tq%d:[pkts: %ld, mis: %ld]",
|
||||||
|
i, cq.total, cq.mismatch);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_dump_proto(int fd)
|
||||||
|
{
|
||||||
|
uint8_t protos[] = { IPPROTO_TCP, IPPROTO_UDP, IPPROTO_ICMP };
|
||||||
|
char *names[] = { "tcp", "udp", "icmp" };
|
||||||
|
int i;
|
||||||
|
|
||||||
|
printf(" ` protos:");
|
||||||
|
for (i = 0; i < ARRAY_SIZE(protos); i++) {
|
||||||
|
struct count_tuple ct;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
memset(&ct, 0, sizeof(ct));
|
||||||
|
ret = bpf_lookup_elem(fd, &protos[i], &ct);
|
||||||
|
assert(ret == 0 || (ret < 0 && errno == ENOENT));
|
||||||
|
|
||||||
|
printf("\t%s:[pkts: %ld, bytes: %ld]",
|
||||||
|
names[i], ct.packets, ct.bytes);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_dump_map_data(int *tfd)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < 30; i++) {
|
||||||
|
const int period = 5;
|
||||||
|
|
||||||
|
printf("data, period: %dsec\n", period);
|
||||||
|
|
||||||
|
bpf_dump_drops(tfd[BPF_MAP_ID_DROPS]);
|
||||||
|
bpf_dump_queue(tfd[BPF_MAP_ID_QUEUE]);
|
||||||
|
bpf_dump_proto(tfd[BPF_MAP_ID_PROTO]);
|
||||||
|
|
||||||
|
sleep(period);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_info_loop(int *fds, struct bpf_map_aux *aux)
|
||||||
|
{
|
||||||
|
int i, tfd[BPF_MAP_ID_MAX];
|
||||||
|
|
||||||
|
printf("ver: %d\nobj: %s\ndev: %lu\nino: %lu\nmaps: %u\n",
|
||||||
|
aux->uds_ver, aux->obj_name, aux->obj_st.st_dev,
|
||||||
|
aux->obj_st.st_ino, aux->num_ent);
|
||||||
|
|
||||||
|
for (i = 0; i < aux->num_ent; i++) {
|
||||||
|
printf("map%d:\n", i);
|
||||||
|
printf(" `- fd: %u\n", fds[i]);
|
||||||
|
printf(" | serial: %u\n", aux->ent[i].id);
|
||||||
|
printf(" | type: %u\n", aux->ent[i].type);
|
||||||
|
printf(" | max elem: %u\n", aux->ent[i].max_elem);
|
||||||
|
printf(" | size key: %u\n", aux->ent[i].size_key);
|
||||||
|
printf(" ` size val: %u\n", aux->ent[i].size_value);
|
||||||
|
|
||||||
|
tfd[aux->ent[i].id] = fds[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
bpf_dump_map_data(tfd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bpf_map_get_from_env(int *tfd)
|
||||||
|
{
|
||||||
|
char key[64], *val;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < BPF_MAP_ID_MAX; i++) {
|
||||||
|
memset(key, 0, sizeof(key));
|
||||||
|
snprintf(key, sizeof(key), "BPF_MAP%d", i);
|
||||||
|
|
||||||
|
val = getenv(key);
|
||||||
|
assert(val != NULL);
|
||||||
|
|
||||||
|
tfd[i] = atoi(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
|
||||||
|
unsigned int entries)
|
||||||
|
{
|
||||||
|
struct bpf_map_set_msg msg;
|
||||||
|
int *cmsg_buf, min_fd, i;
|
||||||
|
char *amsg_buf, *mmsg_buf;
|
||||||
|
|
||||||
|
cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
|
||||||
|
amsg_buf = (char *)msg.aux.ent;
|
||||||
|
mmsg_buf = (char *)&msg.aux;
|
||||||
|
|
||||||
|
for (i = 0; i < entries; i += min_fd) {
|
||||||
|
struct cmsghdr *cmsg;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
|
||||||
|
|
||||||
|
bpf_map_set_init_single(&msg, min_fd);
|
||||||
|
|
||||||
|
ret = recvmsg(fd, &msg.hdr, 0);
|
||||||
|
if (ret <= 0)
|
||||||
|
return ret ? : -1;
|
||||||
|
|
||||||
|
cmsg = CMSG_FIRSTHDR(&msg.hdr);
|
||||||
|
if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
|
||||||
|
return -EINVAL;
|
||||||
|
if (msg.hdr.msg_flags & MSG_CTRUNC)
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
|
||||||
|
if (min_fd > entries || min_fd <= 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
|
||||||
|
memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
|
||||||
|
memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
|
||||||
|
|
||||||
|
if (i + min_fd == aux->num_ent)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
int fds[BPF_SCM_MAX_FDS];
|
||||||
|
struct bpf_map_aux aux;
|
||||||
|
struct sockaddr_un addr;
|
||||||
|
int fd, ret, i;
|
||||||
|
|
||||||
|
/* When arguments are being passed, we take it as a path
|
||||||
|
* to a Unix domain socket, otherwise we grab the fds
|
||||||
|
* from the environment to demonstrate both possibilities.
|
||||||
|
*/
|
||||||
|
if (argc == 1) {
|
||||||
|
int tfd[BPF_MAP_ID_MAX];
|
||||||
|
|
||||||
|
bpf_map_get_from_env(tfd);
|
||||||
|
bpf_dump_map_data(tfd);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
|
||||||
|
if (fd < 0) {
|
||||||
|
fprintf(stderr, "Cannot open socket: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(&addr, 0, sizeof(addr));
|
||||||
|
addr.sun_family = AF_UNIX;
|
||||||
|
strncpy(addr.sun_path, argv[argc - 1], sizeof(addr.sun_path));
|
||||||
|
|
||||||
|
ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
|
||||||
|
if (ret < 0) {
|
||||||
|
fprintf(stderr, "Cannot bind to socket: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(fds, 0, sizeof(fds));
|
||||||
|
memset(&aux, 0, sizeof(aux));
|
||||||
|
|
||||||
|
ret = bpf_map_set_recv(fd, fds, &aux, BPF_SCM_MAX_FDS);
|
||||||
|
if (ret >= 0)
|
||||||
|
bpf_info_loop(fds, &aux);
|
||||||
|
|
||||||
|
for (i = 0; i < aux.num_ent; i++)
|
||||||
|
close(fds[i]);
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
#include "../../../include/bpf_api.h"
|
#include "../../include/bpf_api.h"
|
||||||
|
|
||||||
/* Cyclic dependency example to test the kernel's runtime upper
|
/* Cyclic dependency example to test the kernel's runtime upper
|
||||||
* bound on loops. Also demonstrates on how to use direct-actions,
|
* bound on loops. Also demonstrates on how to use direct-actions,
|
||||||
|
|
@ -33,13 +33,13 @@
|
||||||
* [...]
|
* [...]
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct {
|
struct bpf_elf_map __section_maps jmp_tc = {
|
||||||
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
|
.type = BPF_MAP_TYPE_PROG_ARRAY,
|
||||||
__uint(key_size, sizeof(uint32_t));
|
.size_key = sizeof(uint32_t),
|
||||||
__uint(value_size, sizeof(uint32_t));
|
.size_value = sizeof(uint32_t),
|
||||||
__uint(max_entries, 1);
|
.pinning = PIN_GLOBAL_NS,
|
||||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
.max_elem = 1,
|
||||||
} jmp_tc __section(".maps");
|
};
|
||||||
|
|
||||||
__section("aaa")
|
__section("aaa")
|
||||||
int cls_aaa(struct __sk_buff *skb)
|
int cls_aaa(struct __sk_buff *skb)
|
||||||
|
|
|
||||||
|
|
@ -1,55 +0,0 @@
|
||||||
#include "../../include/bpf_api.h"
|
|
||||||
|
|
||||||
struct inner_map {
|
|
||||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
|
||||||
__uint(key_size, sizeof(uint32_t));
|
|
||||||
__uint(value_size, sizeof(uint32_t));
|
|
||||||
__uint(max_entries, 1);
|
|
||||||
} map_inner __section(".maps");
|
|
||||||
|
|
||||||
struct {
|
|
||||||
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
|
|
||||||
__uint(key_size, sizeof(uint32_t));
|
|
||||||
__uint(value_size, sizeof(uint32_t));
|
|
||||||
__uint(max_entries, 1);
|
|
||||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
|
||||||
__array(values, struct inner_map);
|
|
||||||
} map_outer __section(".maps") = {
|
|
||||||
.values = {
|
|
||||||
[0] = &map_inner,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
__section("egress")
|
|
||||||
int emain(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
struct bpf_elf_map *map_inner;
|
|
||||||
int key = 0, *val;
|
|
||||||
|
|
||||||
map_inner = map_lookup_elem(&map_outer, &key);
|
|
||||||
if (map_inner) {
|
|
||||||
val = map_lookup_elem(map_inner, &key);
|
|
||||||
if (val)
|
|
||||||
lock_xadd(val, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return BPF_H_DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
__section("ingress")
|
|
||||||
int imain(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
struct bpf_elf_map *map_inner;
|
|
||||||
int key = 0, *val;
|
|
||||||
|
|
||||||
map_inner = map_lookup_elem(&map_outer, &key);
|
|
||||||
if (map_inner) {
|
|
||||||
val = map_lookup_elem(map_inner, &key);
|
|
||||||
if (val)
|
|
||||||
printt("map val: %d\n", *val);
|
|
||||||
}
|
|
||||||
|
|
||||||
return BPF_H_DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
BPF_LICENSE("GPL");
|
|
||||||
|
|
@ -0,0 +1,501 @@
|
||||||
|
/*
|
||||||
|
* eBPF kernel space program part
|
||||||
|
*
|
||||||
|
* Toy eBPF program for demonstration purposes, some parts derived from
|
||||||
|
* kernel tree's samples/bpf/sockex2_kern.c example.
|
||||||
|
*
|
||||||
|
* More background on eBPF, kernel tree: Documentation/networking/filter.txt
|
||||||
|
*
|
||||||
|
* Note, this file is rather large, and most classifier and actions are
|
||||||
|
* likely smaller to accomplish one specific use-case and are tailored
|
||||||
|
* for high performance. For performance reasons, you might also have the
|
||||||
|
* classifier and action already merged inside the classifier.
|
||||||
|
*
|
||||||
|
* In order to show various features it serves as a bigger programming
|
||||||
|
* example, which you should feel free to rip apart and experiment with.
|
||||||
|
*
|
||||||
|
* Compilation, configuration example:
|
||||||
|
*
|
||||||
|
* Note: as long as the BPF backend in LLVM is still experimental,
|
||||||
|
* you need to build LLVM with LLVM with --enable-experimental-targets=BPF
|
||||||
|
* Also, make sure your 4.1+ kernel is compiled with CONFIG_BPF_SYSCALL=y,
|
||||||
|
* and you have libelf.h and gelf.h headers and can link tc against -lelf.
|
||||||
|
*
|
||||||
|
* In case you need to sync kernel headers, go to your kernel source tree:
|
||||||
|
* # make headers_install INSTALL_HDR_PATH=/usr/
|
||||||
|
*
|
||||||
|
* $ export PATH=/home/<...>/llvm/Debug+Asserts/bin/:$PATH
|
||||||
|
* $ clang -O2 -emit-llvm -c bpf_prog.c -o - | llc -march=bpf -filetype=obj -o bpf.o
|
||||||
|
* $ objdump -h bpf.o
|
||||||
|
* [...]
|
||||||
|
* 3 classifier 000007f8 0000000000000000 0000000000000000 00000040 2**3
|
||||||
|
* CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
|
||||||
|
* 4 action-mark 00000088 0000000000000000 0000000000000000 00000838 2**3
|
||||||
|
* CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
|
||||||
|
* 5 action-rand 00000098 0000000000000000 0000000000000000 000008c0 2**3
|
||||||
|
* CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
|
||||||
|
* 6 maps 00000030 0000000000000000 0000000000000000 00000958 2**2
|
||||||
|
* CONTENTS, ALLOC, LOAD, DATA
|
||||||
|
* 7 license 00000004 0000000000000000 0000000000000000 00000988 2**0
|
||||||
|
* CONTENTS, ALLOC, LOAD, DATA
|
||||||
|
* [...]
|
||||||
|
* # echo 1 > /proc/sys/net/core/bpf_jit_enable
|
||||||
|
* $ gcc bpf_agent.c -o bpf_agent -Wall -O2
|
||||||
|
* # ./bpf_agent /tmp/bpf-uds (e.g. on a different terminal)
|
||||||
|
* # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
|
||||||
|
* action bpf obj bpf.o sec action-mark \
|
||||||
|
* action bpf obj bpf.o sec action-rand ok
|
||||||
|
* # tc filter show dev em1
|
||||||
|
* filter parent 1: protocol all pref 49152 bpf
|
||||||
|
* filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid 1:1 bpf.o:[classifier]
|
||||||
|
* action order 1: bpf bpf.o:[action-mark] default-action pipe
|
||||||
|
* index 52 ref 1 bind 1
|
||||||
|
*
|
||||||
|
* action order 2: bpf bpf.o:[action-rand] default-action pipe
|
||||||
|
* index 53 ref 1 bind 1
|
||||||
|
*
|
||||||
|
* action order 3: gact action pass
|
||||||
|
* random type none pass val 0
|
||||||
|
* index 38 ref 1 bind 1
|
||||||
|
*
|
||||||
|
* The same program can also be installed on ingress side (as opposed to above
|
||||||
|
* egress configuration), e.g.:
|
||||||
|
*
|
||||||
|
* # tc qdisc add dev em1 handle ffff: ingress
|
||||||
|
* # tc filter add dev em1 parent ffff: bpf obj ...
|
||||||
|
*
|
||||||
|
* Notes on BPF agent:
|
||||||
|
*
|
||||||
|
* In the above example, the bpf_agent creates the unix domain socket
|
||||||
|
* natively. "tc exec" can also spawn a shell and hold the socktes there:
|
||||||
|
*
|
||||||
|
* # tc exec bpf imp /tmp/bpf-uds
|
||||||
|
* # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
|
||||||
|
* action bpf obj bpf.o sec action-mark \
|
||||||
|
* action bpf obj bpf.o sec action-rand ok
|
||||||
|
* sh-4.2# (shell spawned from tc exec)
|
||||||
|
* sh-4.2# bpf_agent
|
||||||
|
* [...]
|
||||||
|
*
|
||||||
|
* This will read out fds over environment and produce the same data dump
|
||||||
|
* as below. This has the advantage that the spawned shell owns the fds
|
||||||
|
* and thus if the agent is restarted, it can reattach to the same fds, also
|
||||||
|
* various programs can easily read/modify the data simultaneously from user
|
||||||
|
* space side.
|
||||||
|
*
|
||||||
|
* If the shell is unnecessary, the agent can also just be spawned directly
|
||||||
|
* via tc exec:
|
||||||
|
*
|
||||||
|
* # tc exec bpf imp /tmp/bpf-uds run bpf_agent
|
||||||
|
* # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
|
||||||
|
* action bpf obj bpf.o sec action-mark \
|
||||||
|
* action bpf obj bpf.o sec action-rand ok
|
||||||
|
*
|
||||||
|
* BPF agent example output:
|
||||||
|
*
|
||||||
|
* ver: 1
|
||||||
|
* obj: bpf.o
|
||||||
|
* dev: 64770
|
||||||
|
* ino: 6045133
|
||||||
|
* maps: 3
|
||||||
|
* map0:
|
||||||
|
* `- fd: 4
|
||||||
|
* | serial: 1
|
||||||
|
* | type: 1
|
||||||
|
* | max elem: 256
|
||||||
|
* | size key: 1
|
||||||
|
* ` size val: 16
|
||||||
|
* map1:
|
||||||
|
* `- fd: 5
|
||||||
|
* | serial: 2
|
||||||
|
* | type: 1
|
||||||
|
* | max elem: 1024
|
||||||
|
* | size key: 4
|
||||||
|
* ` size val: 16
|
||||||
|
* map2:
|
||||||
|
* `- fd: 6
|
||||||
|
* | serial: 3
|
||||||
|
* | type: 2
|
||||||
|
* | max elem: 64
|
||||||
|
* | size key: 4
|
||||||
|
* ` size val: 8
|
||||||
|
* data, period: 5sec
|
||||||
|
* `- number of drops: cpu0: 0 cpu1: 0 cpu2: 0 cpu3: 0
|
||||||
|
* | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 0, mis: 0] q3:[pkts: 0, mis: 0]
|
||||||
|
* ` protos: tcp:[pkts: 0, bytes: 0] udp:[pkts: 0, bytes: 0] icmp:[pkts: 0, bytes: 0]
|
||||||
|
* data, period: 5sec
|
||||||
|
* `- number of drops: cpu0: 5 cpu1: 0 cpu2: 0 cpu3: 1
|
||||||
|
* | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 24, mis: 14] q3:[pkts: 0, mis: 0]
|
||||||
|
* ` protos: tcp:[pkts: 13, bytes: 1989] udp:[pkts: 10, bytes: 710] icmp:[pkts: 0, bytes: 0]
|
||||||
|
* data, period: 5sec
|
||||||
|
* `- number of drops: cpu0: 5 cpu1: 0 cpu2: 3 cpu3: 3
|
||||||
|
* | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 39, mis: 21] q3:[pkts: 0, mis: 0]
|
||||||
|
* ` protos: tcp:[pkts: 20, bytes: 3549] udp:[pkts: 18, bytes: 1278] icmp:[pkts: 0, bytes: 0]
|
||||||
|
* [...]
|
||||||
|
*
|
||||||
|
* This now means, the below classifier and action pipeline has been loaded
|
||||||
|
* as eBPF bytecode into the kernel, the kernel has verified that the
|
||||||
|
* execution of the bytecode is "safe", and it has JITed the programs
|
||||||
|
* afterwards, so that upon invocation they're running on native speed. tc
|
||||||
|
* has transferred all map file descriptors to the bpf_agent via IPC and
|
||||||
|
* even after tc exits, the agent can read out or modify all map data.
|
||||||
|
*
|
||||||
|
* Note that the export to the uds is done only once in the classifier and
|
||||||
|
* not in the action. It's enough to export the (here) shared descriptors
|
||||||
|
* once.
|
||||||
|
*
|
||||||
|
* If you need to disassemble the generated JIT image (echo with 2), the
|
||||||
|
* kernel tree has under tools/net/ a small helper, you can invoke e.g.
|
||||||
|
* `bpf_jit_disasm -o`.
|
||||||
|
*
|
||||||
|
* Please find in the code below further comments.
|
||||||
|
*
|
||||||
|
* -- Happy eBPF hacking! ;)
|
||||||
|
*/
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <asm/types.h>
|
||||||
|
#include <linux/in.h>
|
||||||
|
#include <linux/if.h>
|
||||||
|
#include <linux/if_ether.h>
|
||||||
|
#include <linux/ip.h>
|
||||||
|
#include <linux/ipv6.h>
|
||||||
|
#include <linux/if_tunnel.h>
|
||||||
|
#include <linux/filter.h>
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
|
||||||
|
/* Common, shared definitions with ebpf_agent.c. */
|
||||||
|
#include "bpf_shared.h"
|
||||||
|
/* BPF helper functions for our example. */
|
||||||
|
#include "../../include/bpf_api.h"
|
||||||
|
|
||||||
|
/* Could be defined here as well, or included from the header. */
|
||||||
|
#define TC_ACT_UNSPEC (-1)
|
||||||
|
#define TC_ACT_OK 0
|
||||||
|
#define TC_ACT_RECLASSIFY 1
|
||||||
|
#define TC_ACT_SHOT 2
|
||||||
|
#define TC_ACT_PIPE 3
|
||||||
|
#define TC_ACT_STOLEN 4
|
||||||
|
#define TC_ACT_QUEUED 5
|
||||||
|
#define TC_ACT_REPEAT 6
|
||||||
|
|
||||||
|
/* Other, misc stuff. */
|
||||||
|
#define IP_MF 0x2000
|
||||||
|
#define IP_OFFSET 0x1FFF
|
||||||
|
|
||||||
|
/* eBPF map definitions, all placed in section "maps". */
|
||||||
|
struct bpf_elf_map __section("maps") map_proto = {
|
||||||
|
.type = BPF_MAP_TYPE_HASH,
|
||||||
|
.id = BPF_MAP_ID_PROTO,
|
||||||
|
.size_key = sizeof(uint8_t),
|
||||||
|
.size_value = sizeof(struct count_tuple),
|
||||||
|
.max_elem = 256,
|
||||||
|
.flags = BPF_F_NO_PREALLOC,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_elf_map __section("maps") map_queue = {
|
||||||
|
.type = BPF_MAP_TYPE_HASH,
|
||||||
|
.id = BPF_MAP_ID_QUEUE,
|
||||||
|
.size_key = sizeof(uint32_t),
|
||||||
|
.size_value = sizeof(struct count_queue),
|
||||||
|
.max_elem = 1024,
|
||||||
|
.flags = BPF_F_NO_PREALLOC,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_elf_map __section("maps") map_drops = {
|
||||||
|
.type = BPF_MAP_TYPE_ARRAY,
|
||||||
|
.id = BPF_MAP_ID_DROPS,
|
||||||
|
.size_key = sizeof(uint32_t),
|
||||||
|
.size_value = sizeof(long),
|
||||||
|
.max_elem = 64,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Helper functions and definitions for the flow dissector used by the
|
||||||
|
* example classifier. This resembles the kernel's flow dissector to
|
||||||
|
* some extend and is just used as an example to show what's possible
|
||||||
|
* with eBPF.
|
||||||
|
*/
|
||||||
|
struct sockaddr;
|
||||||
|
|
||||||
|
struct vlan_hdr {
|
||||||
|
__be16 h_vlan_TCI;
|
||||||
|
__be16 h_vlan_encapsulated_proto;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct flow_keys {
|
||||||
|
__u32 src;
|
||||||
|
__u32 dst;
|
||||||
|
union {
|
||||||
|
__u32 ports;
|
||||||
|
__u16 port16[2];
|
||||||
|
};
|
||||||
|
__s32 th_off;
|
||||||
|
__u8 ip_proto;
|
||||||
|
};
|
||||||
|
|
||||||
|
static __inline__ int flow_ports_offset(__u8 ip_proto)
|
||||||
|
{
|
||||||
|
switch (ip_proto) {
|
||||||
|
case IPPROTO_TCP:
|
||||||
|
case IPPROTO_UDP:
|
||||||
|
case IPPROTO_DCCP:
|
||||||
|
case IPPROTO_ESP:
|
||||||
|
case IPPROTO_SCTP:
|
||||||
|
case IPPROTO_UDPLITE:
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
case IPPROTO_AH:
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ bool flow_is_frag(struct __sk_buff *skb, int nh_off)
|
||||||
|
{
|
||||||
|
return !!(load_half(skb, nh_off + offsetof(struct iphdr, frag_off)) &
|
||||||
|
(IP_MF | IP_OFFSET));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ int flow_parse_ipv4(struct __sk_buff *skb, int nh_off,
|
||||||
|
__u8 *ip_proto, struct flow_keys *flow)
|
||||||
|
{
|
||||||
|
__u8 ip_ver_len;
|
||||||
|
|
||||||
|
if (unlikely(flow_is_frag(skb, nh_off)))
|
||||||
|
*ip_proto = 0;
|
||||||
|
else
|
||||||
|
*ip_proto = load_byte(skb, nh_off + offsetof(struct iphdr,
|
||||||
|
protocol));
|
||||||
|
if (*ip_proto != IPPROTO_GRE) {
|
||||||
|
flow->src = load_word(skb, nh_off + offsetof(struct iphdr, saddr));
|
||||||
|
flow->dst = load_word(skb, nh_off + offsetof(struct iphdr, daddr));
|
||||||
|
}
|
||||||
|
|
||||||
|
ip_ver_len = load_byte(skb, nh_off + 0 /* offsetof(struct iphdr, ihl) */);
|
||||||
|
if (likely(ip_ver_len == 0x45))
|
||||||
|
nh_off += 20;
|
||||||
|
else
|
||||||
|
nh_off += (ip_ver_len & 0xF) << 2;
|
||||||
|
|
||||||
|
return nh_off;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, int off)
|
||||||
|
{
|
||||||
|
__u32 w0 = load_word(skb, off);
|
||||||
|
__u32 w1 = load_word(skb, off + sizeof(w0));
|
||||||
|
__u32 w2 = load_word(skb, off + sizeof(w0) * 2);
|
||||||
|
__u32 w3 = load_word(skb, off + sizeof(w0) * 3);
|
||||||
|
|
||||||
|
return w0 ^ w1 ^ w2 ^ w3;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ int flow_parse_ipv6(struct __sk_buff *skb, int nh_off,
|
||||||
|
__u8 *ip_proto, struct flow_keys *flow)
|
||||||
|
{
|
||||||
|
*ip_proto = load_byte(skb, nh_off + offsetof(struct ipv6hdr, nexthdr));
|
||||||
|
|
||||||
|
flow->src = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, saddr));
|
||||||
|
flow->dst = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, daddr));
|
||||||
|
|
||||||
|
return nh_off + sizeof(struct ipv6hdr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ bool flow_dissector(struct __sk_buff *skb,
|
||||||
|
struct flow_keys *flow)
|
||||||
|
{
|
||||||
|
int poff, nh_off = BPF_LL_OFF + ETH_HLEN;
|
||||||
|
__be16 proto = skb->protocol;
|
||||||
|
__u8 ip_proto;
|
||||||
|
|
||||||
|
/* TODO: check for skb->vlan_tci, skb->vlan_proto first */
|
||||||
|
if (proto == htons(ETH_P_8021AD)) {
|
||||||
|
proto = load_half(skb, nh_off +
|
||||||
|
offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
|
||||||
|
nh_off += sizeof(struct vlan_hdr);
|
||||||
|
}
|
||||||
|
if (proto == htons(ETH_P_8021Q)) {
|
||||||
|
proto = load_half(skb, nh_off +
|
||||||
|
offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
|
||||||
|
nh_off += sizeof(struct vlan_hdr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (likely(proto == htons(ETH_P_IP)))
|
||||||
|
nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
|
||||||
|
else if (proto == htons(ETH_P_IPV6))
|
||||||
|
nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
switch (ip_proto) {
|
||||||
|
case IPPROTO_GRE: {
|
||||||
|
struct gre_hdr {
|
||||||
|
__be16 flags;
|
||||||
|
__be16 proto;
|
||||||
|
};
|
||||||
|
|
||||||
|
__u16 gre_flags = load_half(skb, nh_off +
|
||||||
|
offsetof(struct gre_hdr, flags));
|
||||||
|
__u16 gre_proto = load_half(skb, nh_off +
|
||||||
|
offsetof(struct gre_hdr, proto));
|
||||||
|
|
||||||
|
if (gre_flags & (GRE_VERSION | GRE_ROUTING))
|
||||||
|
break;
|
||||||
|
|
||||||
|
nh_off += 4;
|
||||||
|
if (gre_flags & GRE_CSUM)
|
||||||
|
nh_off += 4;
|
||||||
|
if (gre_flags & GRE_KEY)
|
||||||
|
nh_off += 4;
|
||||||
|
if (gre_flags & GRE_SEQ)
|
||||||
|
nh_off += 4;
|
||||||
|
|
||||||
|
if (gre_proto == ETH_P_8021Q) {
|
||||||
|
gre_proto = load_half(skb, nh_off +
|
||||||
|
offsetof(struct vlan_hdr,
|
||||||
|
h_vlan_encapsulated_proto));
|
||||||
|
nh_off += sizeof(struct vlan_hdr);
|
||||||
|
}
|
||||||
|
if (gre_proto == ETH_P_IP)
|
||||||
|
nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
|
||||||
|
else if (gre_proto == ETH_P_IPV6)
|
||||||
|
nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case IPPROTO_IPIP:
|
||||||
|
nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
|
||||||
|
break;
|
||||||
|
case IPPROTO_IPV6:
|
||||||
|
nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
nh_off += flow_ports_offset(ip_proto);
|
||||||
|
|
||||||
|
flow->ports = load_word(skb, nh_off);
|
||||||
|
flow->th_off = nh_off;
|
||||||
|
flow->ip_proto = ip_proto;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ void cls_update_proto_map(const struct __sk_buff *skb,
|
||||||
|
const struct flow_keys *flow)
|
||||||
|
{
|
||||||
|
uint8_t proto = flow->ip_proto;
|
||||||
|
struct count_tuple *ct, _ct;
|
||||||
|
|
||||||
|
ct = map_lookup_elem(&map_proto, &proto);
|
||||||
|
if (likely(ct)) {
|
||||||
|
lock_xadd(&ct->packets, 1);
|
||||||
|
lock_xadd(&ct->bytes, skb->len);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No hit yet, we need to create a new entry. */
|
||||||
|
_ct.packets = 1;
|
||||||
|
_ct.bytes = skb->len;
|
||||||
|
|
||||||
|
map_update_elem(&map_proto, &proto, &_ct, BPF_ANY);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ void cls_update_queue_map(const struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
uint32_t queue = skb->queue_mapping;
|
||||||
|
struct count_queue *cq, _cq;
|
||||||
|
bool mismatch;
|
||||||
|
|
||||||
|
mismatch = skb->queue_mapping != get_smp_processor_id();
|
||||||
|
|
||||||
|
cq = map_lookup_elem(&map_queue, &queue);
|
||||||
|
if (likely(cq)) {
|
||||||
|
lock_xadd(&cq->total, 1);
|
||||||
|
if (mismatch)
|
||||||
|
lock_xadd(&cq->mismatch, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No hit yet, we need to create a new entry. */
|
||||||
|
_cq.total = 1;
|
||||||
|
_cq.mismatch = mismatch ? 1 : 0;
|
||||||
|
|
||||||
|
map_update_elem(&map_queue, &queue, &_cq, BPF_ANY);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* eBPF program definitions, placed in various sections, which can
|
||||||
|
* have custom section names. If custom names are in use, it's
|
||||||
|
* required to point tc to the correct section, e.g.
|
||||||
|
*
|
||||||
|
* tc filter add [...] bpf obj cls.o sec cls-tos [...]
|
||||||
|
*
|
||||||
|
* in case the program resides in __section("cls-tos").
|
||||||
|
*
|
||||||
|
* Default section for cls_bpf is: "classifier", for act_bpf is:
|
||||||
|
* "action". Naturally, if for example multiple actions are present
|
||||||
|
* in the same file, they need to have distinct section names.
|
||||||
|
*
|
||||||
|
* It is however not required to have multiple programs sharing
|
||||||
|
* a file.
|
||||||
|
*/
|
||||||
|
__section("classifier")
|
||||||
|
int cls_main(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct flow_keys flow;
|
||||||
|
|
||||||
|
if (!flow_dissector(skb, &flow))
|
||||||
|
return 0; /* No match in cls_bpf. */
|
||||||
|
|
||||||
|
cls_update_proto_map(skb, &flow);
|
||||||
|
cls_update_queue_map(skb);
|
||||||
|
|
||||||
|
return flow.ip_proto;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ void act_update_drop_map(void)
|
||||||
|
{
|
||||||
|
uint32_t *count, cpu = get_smp_processor_id();
|
||||||
|
|
||||||
|
count = map_lookup_elem(&map_drops, &cpu);
|
||||||
|
if (count)
|
||||||
|
/* Only this cpu is accessing this element. */
|
||||||
|
(*count)++;
|
||||||
|
}
|
||||||
|
|
||||||
|
__section("action-mark")
|
||||||
|
int act_mark_main(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
/* You could also mangle skb data here with the helper function
|
||||||
|
* BPF_FUNC_skb_store_bytes, etc. Or, alternatively you could
|
||||||
|
* do that already in the classifier itself as a merged combination
|
||||||
|
* of classifier'n'action model.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (skb->mark == 0xcafe) {
|
||||||
|
act_update_drop_map();
|
||||||
|
return TC_ACT_SHOT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Default configured tc opcode. */
|
||||||
|
return TC_ACT_UNSPEC;
|
||||||
|
}
|
||||||
|
|
||||||
|
__section("action-rand")
|
||||||
|
int act_rand_main(struct __sk_buff *skb)
|
||||||
|
{
|
||||||
|
/* Sorry, we're near event horizon ... */
|
||||||
|
if ((get_prandom_u32() & 3) == 0) {
|
||||||
|
act_update_drop_map();
|
||||||
|
return TC_ACT_SHOT;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TC_ACT_UNSPEC;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Last but not least, the file contains a license. Some future helper
|
||||||
|
* functions may only be available with a GPL license.
|
||||||
|
*/
|
||||||
|
BPF_LICENSE("GPL");
|
||||||
|
|
@ -18,13 +18,13 @@
|
||||||
* instance is being created.
|
* instance is being created.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct {
|
struct bpf_elf_map __section_maps map_sh = {
|
||||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
.type = BPF_MAP_TYPE_ARRAY,
|
||||||
__uint(key_size, sizeof(uint32_t));
|
.size_key = sizeof(uint32_t),
|
||||||
__uint(value_size, sizeof(uint32_t));
|
.size_value = sizeof(uint32_t),
|
||||||
__uint(max_entries, 1);
|
.pinning = PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */
|
||||||
__uint(pinning, LIBBPF_PIN_BY_NAME); /* or LIBBPF_PIN_NONE */
|
.max_elem = 1,
|
||||||
} map_sh __section(".maps");
|
};
|
||||||
|
|
||||||
__section("egress")
|
__section("egress")
|
||||||
int emain(struct __sk_buff *skb)
|
int emain(struct __sk_buff *skb)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
#ifndef __BPF_SHARED__
|
||||||
|
#define __BPF_SHARED__
|
||||||
|
|
||||||
|
enum {
|
||||||
|
BPF_MAP_ID_PROTO,
|
||||||
|
BPF_MAP_ID_QUEUE,
|
||||||
|
BPF_MAP_ID_DROPS,
|
||||||
|
__BPF_MAP_ID_MAX,
|
||||||
|
#define BPF_MAP_ID_MAX __BPF_MAP_ID_MAX
|
||||||
|
};
|
||||||
|
|
||||||
|
struct count_tuple {
|
||||||
|
long packets; /* type long for lock_xadd() */
|
||||||
|
long bytes;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct count_queue {
|
||||||
|
long total;
|
||||||
|
long mismatch;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __BPF_SHARED__ */
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
#ifndef __BPF_SYS__
|
||||||
|
#define __BPF_SYS__
|
||||||
|
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
|
||||||
|
static inline __u64 bpf_ptr_to_u64(const void *ptr)
|
||||||
|
{
|
||||||
|
return (__u64) (unsigned long) ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int bpf_lookup_elem(int fd, void *key, void *value)
|
||||||
|
{
|
||||||
|
union bpf_attr attr = {
|
||||||
|
.map_fd = fd,
|
||||||
|
.key = bpf_ptr_to_u64(key),
|
||||||
|
.value = bpf_ptr_to_u64(value),
|
||||||
|
};
|
||||||
|
|
||||||
|
return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __BPF_SYS__ */
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
#include "../../include/bpf_api.h"
|
||||||
#include "../../../include/bpf_api.h"
|
|
||||||
|
|
||||||
#define ENTRY_INIT 3
|
#define ENTRY_INIT 3
|
||||||
#define ENTRY_0 0
|
#define ENTRY_0 0
|
||||||
|
|
@ -1,66 +0,0 @@
|
||||||
#include "../../../include/bpf_api.h"
|
|
||||||
|
|
||||||
/* This example demonstrates how classifier run-time behaviour
|
|
||||||
* can be altered with tail calls. We start out with an empty
|
|
||||||
* jmp_tc array, then add section aaa to the array slot 0, and
|
|
||||||
* later on atomically replace it with section bbb. Note that
|
|
||||||
* as shown in other examples, the tc loader can prepopulate
|
|
||||||
* tail called sections, here we start out with an empty one
|
|
||||||
* on purpose to show it can also be done this way.
|
|
||||||
*
|
|
||||||
* tc filter add dev foo parent ffff: bpf obj graft.o
|
|
||||||
* tc exec bpf dbg
|
|
||||||
* [...]
|
|
||||||
* Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough
|
|
||||||
* <idle>-0 [001] ..s. 138993.202265: : fallthrough
|
|
||||||
* Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough
|
|
||||||
* [...]
|
|
||||||
*
|
|
||||||
* tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa
|
|
||||||
* tc exec bpf dbg
|
|
||||||
* [...]
|
|
||||||
* Socket Thread-19818 [002] ..s. 139012.053587: : aaa
|
|
||||||
* <idle>-0 [002] ..s. 139012.172359: : aaa
|
|
||||||
* Socket Thread-19818 [001] ..s. 139012.173556: : aaa
|
|
||||||
* [...]
|
|
||||||
*
|
|
||||||
* tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb
|
|
||||||
* tc exec bpf dbg
|
|
||||||
* [...]
|
|
||||||
* Socket Thread-19818 [002] ..s. 139022.102967: : bbb
|
|
||||||
* <idle>-0 [002] ..s. 139022.155640: : bbb
|
|
||||||
* Socket Thread-19818 [001] ..s. 139022.156730: : bbb
|
|
||||||
* [...]
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct bpf_elf_map __section_maps jmp_tc = {
|
|
||||||
.type = BPF_MAP_TYPE_PROG_ARRAY,
|
|
||||||
.size_key = sizeof(uint32_t),
|
|
||||||
.size_value = sizeof(uint32_t),
|
|
||||||
.pinning = PIN_GLOBAL_NS,
|
|
||||||
.max_elem = 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
__section("aaa")
|
|
||||||
int cls_aaa(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
printt("aaa\n");
|
|
||||||
return TC_H_MAKE(1, 42);
|
|
||||||
}
|
|
||||||
|
|
||||||
__section("bbb")
|
|
||||||
int cls_bbb(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
printt("bbb\n");
|
|
||||||
return TC_H_MAKE(1, 43);
|
|
||||||
}
|
|
||||||
|
|
||||||
__section_cls_entry
|
|
||||||
int cls_entry(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
tail_call(skb, &jmp_tc, 0);
|
|
||||||
printt("fallthrough\n");
|
|
||||||
return BPF_H_DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
BPF_LICENSE("GPL");
|
|
||||||
|
|
@ -1,56 +0,0 @@
|
||||||
#include "../../../include/bpf_api.h"
|
|
||||||
|
|
||||||
#define MAP_INNER_ID 42
|
|
||||||
|
|
||||||
struct bpf_elf_map __section_maps map_inner = {
|
|
||||||
.type = BPF_MAP_TYPE_ARRAY,
|
|
||||||
.size_key = sizeof(uint32_t),
|
|
||||||
.size_value = sizeof(uint32_t),
|
|
||||||
.id = MAP_INNER_ID,
|
|
||||||
.inner_idx = 0,
|
|
||||||
.pinning = PIN_GLOBAL_NS,
|
|
||||||
.max_elem = 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct bpf_elf_map __section_maps map_outer = {
|
|
||||||
.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
|
|
||||||
.size_key = sizeof(uint32_t),
|
|
||||||
.size_value = sizeof(uint32_t),
|
|
||||||
.inner_id = MAP_INNER_ID,
|
|
||||||
.pinning = PIN_GLOBAL_NS,
|
|
||||||
.max_elem = 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
__section("egress")
|
|
||||||
int emain(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
struct bpf_elf_map *map_inner;
|
|
||||||
int key = 0, *val;
|
|
||||||
|
|
||||||
map_inner = map_lookup_elem(&map_outer, &key);
|
|
||||||
if (map_inner) {
|
|
||||||
val = map_lookup_elem(map_inner, &key);
|
|
||||||
if (val)
|
|
||||||
lock_xadd(val, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return BPF_H_DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
__section("ingress")
|
|
||||||
int imain(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
struct bpf_elf_map *map_inner;
|
|
||||||
int key = 0, *val;
|
|
||||||
|
|
||||||
map_inner = map_lookup_elem(&map_outer, &key);
|
|
||||||
if (map_inner) {
|
|
||||||
val = map_lookup_elem(map_inner, &key);
|
|
||||||
if (val)
|
|
||||||
printt("map val: %d\n", *val);
|
|
||||||
}
|
|
||||||
|
|
||||||
return BPF_H_DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
BPF_LICENSE("GPL");
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
#include "../../../include/bpf_api.h"
|
|
||||||
|
|
||||||
/* Minimal, stand-alone toy map pinning example:
|
|
||||||
*
|
|
||||||
* clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c
|
|
||||||
* tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress
|
|
||||||
* tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress
|
|
||||||
*
|
|
||||||
* Both classifier will share the very same map instance in this example,
|
|
||||||
* so map content can be accessed from ingress *and* egress side!
|
|
||||||
*
|
|
||||||
* This example has a pinning of PIN_OBJECT_NS, so it's private and
|
|
||||||
* thus shared among various program sections within the object.
|
|
||||||
*
|
|
||||||
* A setting of PIN_GLOBAL_NS would place it into a global namespace,
|
|
||||||
* so that it can be shared among different object files. A setting
|
|
||||||
* of PIN_NONE (= 0) means no sharing, so each tc invocation a new map
|
|
||||||
* instance is being created.
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct bpf_elf_map __section_maps map_sh = {
|
|
||||||
.type = BPF_MAP_TYPE_ARRAY,
|
|
||||||
.size_key = sizeof(uint32_t),
|
|
||||||
.size_value = sizeof(uint32_t),
|
|
||||||
.pinning = PIN_OBJECT_NS, /* or PIN_GLOBAL_NS, or PIN_NONE */
|
|
||||||
.max_elem = 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
__section("egress")
|
|
||||||
int emain(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
int key = 0, *val;
|
|
||||||
|
|
||||||
val = map_lookup_elem(&map_sh, &key);
|
|
||||||
if (val)
|
|
||||||
lock_xadd(val, 1);
|
|
||||||
|
|
||||||
return BPF_H_DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
__section("ingress")
|
|
||||||
int imain(struct __sk_buff *skb)
|
|
||||||
{
|
|
||||||
int key = 0, *val;
|
|
||||||
|
|
||||||
val = map_lookup_elem(&map_sh, &key);
|
|
||||||
if (val)
|
|
||||||
printt("map val: %d\n", *val);
|
|
||||||
|
|
||||||
return BPF_H_DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
BPF_LICENSE("GPL");
|
|
||||||
|
|
@ -0,0 +1,983 @@
|
||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# cbq.init v0.7.3
|
||||||
|
# Copyright (C) 1999 Pavel Golubev <pg@ksi-linux.com>
|
||||||
|
# Copyright (C) 2001-2004 Lubomir Bulej <pallas@kadan.cz>
|
||||||
|
#
|
||||||
|
# chkconfig: 2345 11 89
|
||||||
|
# description: sets up CBQ-based traffic control
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
# To get the latest version, check on Freshmeat for actual location:
|
||||||
|
#
|
||||||
|
# http://freshmeat.net/projects/cbq.init
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# VERSION HISTORY
|
||||||
|
# ---------------
|
||||||
|
# v0.7.3- Deepak Singhal <singhal at users.sourceforge.net>
|
||||||
|
# - fix timecheck to not ignore regular TIME rules after
|
||||||
|
# encountering a TIME rule that spans over midnight
|
||||||
|
# - Nathan Shafer <nicodemus at users.sourceforge.net>
|
||||||
|
# - allow symlinks to class files
|
||||||
|
# - Seth J. Blank <antifreeze at users.sourceforge.net>
|
||||||
|
# - replace hardcoded ip/tc location with variables
|
||||||
|
# - Mark Davis <mark.davis at gmx.de>
|
||||||
|
# - allow setting of PRIO_{MARK,RULE,REALM} in class file
|
||||||
|
# - Fernando Sanch <toptnc at users.sourceforge.net>
|
||||||
|
# - allow underscores in interface names
|
||||||
|
# v0.7.2- Paulo Sedrez
|
||||||
|
# - fix time2abs to allow hours with leading zero in TIME rules
|
||||||
|
# - Svetlin Simeonov <zvero at yahoo.com>
|
||||||
|
# - fix cbq_device_list to allow VLAN interfaces
|
||||||
|
# - Mark Davis <mark.davis at gmx.de>
|
||||||
|
# - ignore *~ backup files when looking for classes
|
||||||
|
# - Mike Boyer <boyer at administrative.com>
|
||||||
|
# - fix to allow arguments to be passed to "restart" command
|
||||||
|
# v0.7.1- Lubomir Bulej <pallas at kadan.cz>
|
||||||
|
# - default value for PERTURB
|
||||||
|
# - fixed small bug in RULE parser to correctly parse rules with
|
||||||
|
# identical source and destination fields
|
||||||
|
# - faster initial scanning of DEVICE fields
|
||||||
|
# v0.7 - Lubomir Bulej <pallas at kadan.cz>
|
||||||
|
# - lots of various cleanups and reorganizations; the parsing is now
|
||||||
|
# some 40% faster, but the class ID must be in range 0x0002-0xffff
|
||||||
|
# (again). Because of the number of internal changes and the above
|
||||||
|
# class ID restriction, I bumped the version to 0.7 to indicate
|
||||||
|
# something might have got broken :)
|
||||||
|
# - changed PRIO_{U32,FW,ROUTE} to PRIO_{RULE,MARK,REALM}
|
||||||
|
# for consistency with filter keywords
|
||||||
|
# - exposed "compile" command
|
||||||
|
# - Catalin Petrescu <taz at dntis.ro>
|
||||||
|
# - support for port masks in RULE (u32) filter
|
||||||
|
# - Jordan Vrtanoski <obeliks at mt.net.mk>
|
||||||
|
# - support for week days in TIME rules
|
||||||
|
# v0.6.4- Lubomir Bulej <pallas at kadan.cz>
|
||||||
|
# - added PRIO_* variables to allow easy control of filter priorities
|
||||||
|
# - added caching to speed up CBQ start, the cache is invalidated
|
||||||
|
# whenever any of the configuration files changes
|
||||||
|
# - updated the readme section + some cosmetic fixes
|
||||||
|
# v0.6.3- Lubomir Bulej <pallas at kadan.cz>
|
||||||
|
# - removed setup of (unnecessary) class 1:1 - all classes
|
||||||
|
# now use qdisc's default class 1:0 as their parent
|
||||||
|
# - minor fix in the timecheck branch - classes
|
||||||
|
# without leaf qdisc were not updated
|
||||||
|
# - minor fix to avoid timecheck failure when run
|
||||||
|
# at time with minutes equal to 08 or 09
|
||||||
|
# - respect CBQ_PATH setting in environment
|
||||||
|
# - made PRIO=5 default, rendering it optional in configs
|
||||||
|
# - added support for route filter, see notes about REALM keyword
|
||||||
|
# - added support for fw filter, see notes about MARK keyword
|
||||||
|
# - added filter display to "list" and "stats" commands
|
||||||
|
# - readme section update + various cosmetic fixes
|
||||||
|
# v0.6.2- Catalin Petrescu <taz at dntis.ro>
|
||||||
|
# - added tunnels interface handling
|
||||||
|
# v0.6.1- Pavel Golubev <pg at ksi-linux.com>
|
||||||
|
# - added sch_prio module loading
|
||||||
|
# (thanks johan at iglo.virtual.or.id for reminding)
|
||||||
|
# - resolved errors resulting from stricter syntax checking in bash2
|
||||||
|
# - Lubomir Bulej <pallas at kadan.cz>
|
||||||
|
# - various cosmetic fixes
|
||||||
|
# v0.6 - Lubomir Bulej <pallas at kadan.cz>
|
||||||
|
# - attempt to limit number of spawned processes by utilizing
|
||||||
|
# more of sed power (use sed instead of grep+cut)
|
||||||
|
# - simplified TIME parser, using bash builtins
|
||||||
|
# - added initial support for SFQ as leaf qdisc
|
||||||
|
# - reworked the documentation part a little
|
||||||
|
# - incorporated pending patches and ideas submitted by
|
||||||
|
# following people for versions 0.3 into version 0.6
|
||||||
|
# - Miguel Freitas <miguel at cetuc.puc-rio.br>
|
||||||
|
# - in case of overlapping TIME parameters, the last match is taken
|
||||||
|
# - Juanjo Ciarlante <jjo at mendoza.gov.ar>
|
||||||
|
# - chkconfig tags, list + stats startup parameters
|
||||||
|
# - optional tc & ip command logging (into /var/run/cbq-*)
|
||||||
|
# - Rafal Maszkowski <rzm at icm.edu.pl>
|
||||||
|
# - PEAK parameter for setting TBF's burst peak rate
|
||||||
|
# - fix for many config files (use find instead of ls)
|
||||||
|
# v0.5.1- Lubomir Bulej <pallas at kadan.cz>
|
||||||
|
# - fixed little but serious bug in RULE parser
|
||||||
|
# v0.5 - Lubomir Bulej <pallas at kadan.cz>
|
||||||
|
# - added options PARENT, LEAF, ISOLATED and BOUNDED. This allows
|
||||||
|
# (with some attention to config file ordering) for creating
|
||||||
|
# hierarchical structures of shapers with classes able (or unable)
|
||||||
|
# to borrow bandwidth from their parents.
|
||||||
|
# - class ID check allows hexadecimal numbers
|
||||||
|
# - rewritten & simplified RULE parser
|
||||||
|
# - cosmetic changes to improve readability
|
||||||
|
# - reorganization to avoid duplicate code (timecheck etc.)
|
||||||
|
# - timecheck doesn't check classes without TIME fields anymore
|
||||||
|
# v0.4 - Lubomir Bulej <pallas at kadan.cz>
|
||||||
|
# - small bugfix in RULE parsing code
|
||||||
|
# - simplified configuration parsing code
|
||||||
|
# - several small cosmetic changes
|
||||||
|
# - TIME parameter can be now specified more than once allowing you to
|
||||||
|
# differentiate RATE throughout the whole day. Time overlapping is
|
||||||
|
# not checked, first match is taken. Midnight wrap (eg. 20:00-6:00)
|
||||||
|
# is allowed and taken care of.
|
||||||
|
# v0.3a4- fixed small bug in IF operator. Thanks to
|
||||||
|
# Rafal Maszkowski <rzm at icm.edu.pl>
|
||||||
|
# v0.3a3- fixed grep bug when using more than 10 eth devices. Thanks to David
|
||||||
|
# Trcka <trcka at poda.cz>.
|
||||||
|
# v0.3a2- fixed bug in "if" operator. Thanks kad at dgtu.donetsk.ua.
|
||||||
|
# v0.3a - added TIME parameter. Example: TIME=00:00-19:00;64Kbit/6Kbit
|
||||||
|
# So, between 00:00 and 19:00 the RATE will be 64Kbit.
|
||||||
|
# Just start "cbq.init timecheck" periodically from cron
|
||||||
|
# (every 10 minutes for example). DON'T FORGET though, to run
|
||||||
|
# "cbq.init start" for CBQ to initialize.
|
||||||
|
# v0.2 - Some cosmetic changes. Now it is more compatible with old bash
|
||||||
|
# version. Thanks to Stanislav V. Voronyi <stas at cnti.uanet.kharkov.ua>.
|
||||||
|
# v0.1 - First public release
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# README
|
||||||
|
# ------
|
||||||
|
#
|
||||||
|
# First of all - this is just a SIMPLE EXAMPLE of CBQ power.
|
||||||
|
# Don't ask me "why" and "how" :)
|
||||||
|
#
|
||||||
|
# This script is meant to simplify setup and management of relatively simple
|
||||||
|
# CBQ-based traffic control on Linux. Access to advanced networking features
|
||||||
|
# of Linux kernel is provided by "ip" and "tc" utilities from A. Kuznetsov's
|
||||||
|
# iproute2 package, available at ftp://ftp.inr.ac.ru/ip-routing. Because the
|
||||||
|
# utilities serve primarily to translate user wishes to RTNETLINK commands,
|
||||||
|
# their interface is rather spartan, intolerant and requires quite a lot of
|
||||||
|
# typing. And typing is what this script attempts to reduce :)
|
||||||
|
#
|
||||||
|
# The advanced networking stuff in Linux is pretty flexible and this script
|
||||||
|
# aims to bring some of its features to the not-so-hard-core Linux users. Of
|
||||||
|
# course, there is a tradeoff between simplicity and flexibility and you may
|
||||||
|
# realize that the flexibility suffered too much for your needs -- time to
|
||||||
|
# face "ip" and "tc" interface.
|
||||||
|
#
|
||||||
|
# To speed up the "start" command, simple caching was introduced in version
|
||||||
|
# 0.6.4. The caching works so that the sequence of "tc" commands for given
|
||||||
|
# configuration is stored in a file (/var/cache/cbq.init by default) which
|
||||||
|
# is used next time the "start" command is run to avoid repeated parsing of
|
||||||
|
# configuration files. This cache is invalidated whenever any of the CBQ
|
||||||
|
# configuration files changes. If you want to run "cbq.init start" without
|
||||||
|
# caching, run it as "cbq.init start nocache". If you want to force cache
|
||||||
|
# invalidation, run it as "cbq.init start invalidate". Caching is disabled
|
||||||
|
# if you have logging enabled (ie. CBQ_DEBUG is not empty).
|
||||||
|
#
|
||||||
|
# If you only want cqb.init to translate your configuration to "tc" commands,
|
||||||
|
# use "compile" command which will output "tc" commands required to build
|
||||||
|
# your configuration. Bear in mind that "compile" does not check if the "tc"
|
||||||
|
# commands were successful - this is done (in certain places) only when the
|
||||||
|
# "start nocache" command is used, which is also useful when creating the
|
||||||
|
# configuration to check whether it is completely valid.
|
||||||
|
#
|
||||||
|
# All CBQ parameters are valid for Ethernet interfaces only, The script was
|
||||||
|
# tested on various Linux kernel versions from series 2.1 to 2.4 and several
|
||||||
|
# distributions with KSI Linux (Nostromo version) as the premier one.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# HOW DOES IT WORK?
|
||||||
|
# -----------------
|
||||||
|
#
|
||||||
|
# Every traffic class must be described by a file in the $CBQ_PATH directory
|
||||||
|
# (/etc/sysconfig/cbq by default) - one file per class.
|
||||||
|
#
|
||||||
|
# The config file names must obey mandatory format: cbq-<clsid>.<name> where
|
||||||
|
# <clsid> is two-byte hexadecimal number in range <0002-FFFF> (which in fact
|
||||||
|
# is a CBQ class ID) and <name> is the name of the class -- anything to help
|
||||||
|
# you distinguish the configuration files. For small amount of classes it is
|
||||||
|
# often possible (and convenient) to let <clsid> resemble bandwidth of the
|
||||||
|
# class.
|
||||||
|
#
|
||||||
|
# Example of valid config name:
|
||||||
|
# cbq-1280.My_first_shaper
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# The configuration file may contain the following parameters:
|
||||||
|
#
|
||||||
|
### Device parameters
|
||||||
|
#
|
||||||
|
# DEVICE=<ifname>,<bandwidth>[,<weight>] mandatory
|
||||||
|
# DEVICE=eth0,10Mbit,1Mbit
|
||||||
|
#
|
||||||
|
# <ifname> is the name of the interface you want to control
|
||||||
|
# traffic on, e.g. eth0
|
||||||
|
# <bandwidth> is the physical bandwidth of the device, e.g. for
|
||||||
|
# ethernet 10Mbit or 100Mbit, for arcnet 2Mbit
|
||||||
|
# <weight> is tuning parameter that should be proportional to
|
||||||
|
# <bandwidth>. As a rule of thumb: <weight> = <bandwidth> / 10
|
||||||
|
#
|
||||||
|
# When you have more classes on one interface, it is enough to specify
|
||||||
|
# <bandwidth> [and <weight>] only once, therefore in other files you only
|
||||||
|
# need to set DEVICE=<ifname>.
|
||||||
|
#
|
||||||
|
### Class parameters
|
||||||
|
#
|
||||||
|
# RATE=<speed> mandatory
|
||||||
|
# RATE=5Mbit
|
||||||
|
#
|
||||||
|
# Bandwidth allocated to the class. Traffic going through the class is
|
||||||
|
# shaped to conform to specified rate. You can use Kbit, Mbit or bps,
|
||||||
|
# Kbps and Mbps as suffices. If you don't specify any unit, bits/sec
|
||||||
|
# are used. Also note that "bps" means "bytes per second", not bits.
|
||||||
|
#
|
||||||
|
# WEIGHT=<speed> mandatory
|
||||||
|
# WEIGHT=500Kbit
|
||||||
|
#
|
||||||
|
# Tuning parameter that should be proportional to RATE. As a rule
|
||||||
|
# of thumb, use WEIGHT ~= RATE / 10.
|
||||||
|
#
|
||||||
|
# PRIO=<1-8> optional, default 5
|
||||||
|
# PRIO=5
|
||||||
|
#
|
||||||
|
# Priority of class traffic. The higher the number, the lesser
|
||||||
|
# the priority. Priority of 5 is just fine.
|
||||||
|
#
|
||||||
|
# PARENT=<clsid> optional, default not set
|
||||||
|
# PARENT=1280
|
||||||
|
#
|
||||||
|
# Specifies ID of the parent class to which you want this class be
|
||||||
|
# attached. You might want to use LEAF=none for the parent class as
|
||||||
|
# mentioned below. By using this parameter and carefully ordering the
|
||||||
|
# configuration files, it is possible to create simple hierarchical
|
||||||
|
# structures of CBQ classes. The ordering is important so that parent
|
||||||
|
# classes are constructed prior to their children.
|
||||||
|
#
|
||||||
|
# LEAF=none|tbf|sfq optional, default "tbf"
|
||||||
|
#
|
||||||
|
# Tells the script to attach specified leaf queueing discipline to CBQ
|
||||||
|
# class. By default, TBF is used. Note that attaching TBF to CBQ class
|
||||||
|
# shapes the traffic to conform to TBF parameters and prevents the class
|
||||||
|
# from borrowing bandwidth from its parent even if you have BOUNDED set
|
||||||
|
# to "no". To allow the class to borrow bandwith (provided it is not
|
||||||
|
# bounded), you must set LEAF to "none" or "sfq".
|
||||||
|
#
|
||||||
|
# If you want to ensure (approximately) fair sharing of bandwidth among
|
||||||
|
# several hosts in the same class, you might want to specify LEAF=sfq to
|
||||||
|
# attach SFQ as leaf queueing discipline to that class.
|
||||||
|
#
|
||||||
|
# BOUNDED=yes|no optional, default "yes"
|
||||||
|
#
|
||||||
|
# If set to "yes", the class is not allowed to borrow bandwidth from
|
||||||
|
# its parent class in overlimit situation. If set to "no", the class
|
||||||
|
# will be allowed to borrow bandwidth from its parent.
|
||||||
|
#
|
||||||
|
# Note: Don't forget to set LEAF to "none" or "sfq", otherwise the class will
|
||||||
|
# have TBF attached to itself and will not be able to borrow unused
|
||||||
|
# bandwith from its parent.
|
||||||
|
#
|
||||||
|
# ISOLATED=yes|no optional, default "no"
|
||||||
|
#
|
||||||
|
# If set to "yes", the class will not lend unused bandwidth to
|
||||||
|
# its children.
|
||||||
|
#
|
||||||
|
### TBF qdisc parameters
|
||||||
|
#
|
||||||
|
# BUFFER=<bytes>[/<bytes>] optional, default "10Kb/8"
|
||||||
|
#
|
||||||
|
# This parameter controls the depth of the token bucket. In other
|
||||||
|
# words it represents the maximal burst size the class can send.
|
||||||
|
# The optional part of parameter is used to determine the length
|
||||||
|
# of intervals in packet sizes, for which the transmission times
|
||||||
|
# are kept.
|
||||||
|
#
|
||||||
|
# LIMIT=<bytes> optional, default "15Kb"
|
||||||
|
#
|
||||||
|
# This parameter determines the maximal length of backlog. If
|
||||||
|
# the queue contains more data than specified by LIMIT, the
|
||||||
|
# newly arriving packets are dropped. The length of backlog
|
||||||
|
# determines queue latency in case of congestion.
|
||||||
|
#
|
||||||
|
# PEAK=<speed> optional, default not set
|
||||||
|
#
|
||||||
|
# Maximal peak rate for short-term burst traffic. This allows you
|
||||||
|
# to control the absolute peak rate the class can send at, because
|
||||||
|
# single TBF that allows 256Kbit/s would of course allow rate of
|
||||||
|
# 512Kbit for half a second or 1Mbit for a quarter of second.
|
||||||
|
#
|
||||||
|
# MTU=<bytes> optional, default "1500"
|
||||||
|
#
|
||||||
|
# Maximum number of bytes that can be sent at once over the
|
||||||
|
# physical medium. This parameter is required when you specify
|
||||||
|
# PEAK parameter. It defaults to MTU of ethernet - for other
|
||||||
|
# media types you might want to change it.
|
||||||
|
#
|
||||||
|
# Note: Setting TBF as leaf qdisc will effectively prevent the class from
|
||||||
|
# borrowing bandwidth from the ancestor class, because even if the
|
||||||
|
# class allows more traffic to pass through, it is then shaped to
|
||||||
|
# conform to TBF.
|
||||||
|
#
|
||||||
|
### SFQ qdisc parameters
|
||||||
|
#
|
||||||
|
# The SFQ queueing discipline is a cheap way for sharing class bandwidth
|
||||||
|
# among several hosts. As it is stochastic, the fairness is approximate but
|
||||||
|
# it will do the job in most cases. If you want real fairness, you should
|
||||||
|
# probably use WRR (weighted round robin) or WFQ queueing disciplines. Note
|
||||||
|
# that SFQ does not do any traffic shaping - the shaping is done by the CBQ
|
||||||
|
# class the SFQ is attached to.
|
||||||
|
#
|
||||||
|
# QUANTUM=<bytes> optional, default not set
|
||||||
|
#
|
||||||
|
# This parameter should not be set lower than link MTU, for ethernet
|
||||||
|
# it is 1500b, or (with MAC header) 1514b which is the value used
|
||||||
|
# in Alexey Kuznetsov's examples.
|
||||||
|
#
|
||||||
|
# PERTURB=<seconds> optional, default "10"
|
||||||
|
#
|
||||||
|
# Period of hash function perturbation. If unset, hash reconfiguration
|
||||||
|
# will never take place which is what you probably don't want. The
|
||||||
|
# default value of 10 seconds is probably a good one.
|
||||||
|
#
|
||||||
|
### Filter parameters
|
||||||
|
#
|
||||||
|
# RULE=[[saddr[/prefix]][:port[/mask]],][daddr[/prefix]][:port[/mask]]
|
||||||
|
#
|
||||||
|
# These parameters make up "u32" filter rules that select traffic for
|
||||||
|
# each of the classes. You can use multiple RULE fields per config.
|
||||||
|
#
|
||||||
|
# The optional port mask should only be used by advanced users who
|
||||||
|
# understand how the u32 filter works.
|
||||||
|
#
|
||||||
|
# Some examples:
|
||||||
|
#
|
||||||
|
# RULE=10.1.1.0/24:80
|
||||||
|
# selects traffic going to port 80 in network 10.1.1.0
|
||||||
|
#
|
||||||
|
# RULE=10.2.2.5
|
||||||
|
# selects traffic going to any port on single host 10.2.2.5
|
||||||
|
#
|
||||||
|
# RULE=10.2.2.5:20/0xfffe
|
||||||
|
# selects traffic going to ports 20 and 21 on host 10.2.2.5
|
||||||
|
#
|
||||||
|
# RULE=:25,10.2.2.128/26:5000
|
||||||
|
# selects traffic going from anywhere on port 50 to
|
||||||
|
# port 5000 in network 10.2.2.128
|
||||||
|
#
|
||||||
|
# RULE=10.5.5.5:80,
|
||||||
|
# selects traffic going from port 80 of single host 10.5.5.5
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# REALM=[srealm,][drealm]
|
||||||
|
#
|
||||||
|
# These parameters make up "route" filter rules that classify traffic
|
||||||
|
# according to packet source/destination realms. For information about
|
||||||
|
# realms, see Alexey Kuznetsov's IP Command Reference. This script
|
||||||
|
# does not define any realms, it justs builds "tc filter" commands
|
||||||
|
# for you if you need to classify traffic this way.
|
||||||
|
#
|
||||||
|
# Realm is either a decimal number or a string referencing entry in
|
||||||
|
# /etc/iproute2/rt_realms (usually).
|
||||||
|
#
|
||||||
|
# Some examples:
|
||||||
|
#
|
||||||
|
# REALM=russia,internet
|
||||||
|
# selects traffic going from realm "russia" to realm "internet"
|
||||||
|
#
|
||||||
|
# REALM=freenet,
|
||||||
|
# selects traffic going from realm "freenet"
|
||||||
|
#
|
||||||
|
# REALM=10
|
||||||
|
# selects traffic going to realm 10
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# MARK=<mark>
|
||||||
|
#
|
||||||
|
# These parameters make up "fw" filter rules that select traffic for
|
||||||
|
# each of the classes accoring to firewall "mark". Mark is a decimal
|
||||||
|
# number packets are tagged with if firewall rules say so. You can
|
||||||
|
# use multiple MARK fields per config.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Note: Rules for different filter types can be combined. Attention must be
|
||||||
|
# paid to the priority of filter rules, which can be set below using
|
||||||
|
# PRIO_{RULE,MARK,REALM} variables.
|
||||||
|
#
|
||||||
|
### Time ranging parameters
|
||||||
|
#
|
||||||
|
# TIME=[<dow>,<dow>, ...,<dow>/]<from>-<till>;<rate>/<weight>[/<peak>]
|
||||||
|
# TIME=0,1,2,5/18:00-06:00;256Kbit/25Kbit
|
||||||
|
# TIME=60123/18:00-06:00;256Kbit/25Kbit
|
||||||
|
# TIME=18:00-06:00;256Kbit/25Kbit
|
||||||
|
#
|
||||||
|
# This parameter allows you to differentiate the class bandwidth
|
||||||
|
# throughout the day. You can specify multiple TIME parameters, if
|
||||||
|
# the times overlap, last match is taken. The fields <rate>, <weight>
|
||||||
|
# and <peak> correspond to parameters RATE, WEIGHT and PEAK (which
|
||||||
|
# is optional and applies to TBF leaf qdisc only).
|
||||||
|
#
|
||||||
|
# You can also specify days of week when the TIME rule applies. <dow>
|
||||||
|
# is numeric, 0 corresponds to sunday, 1 corresponds to monday, etc.
|
||||||
|
#
|
||||||
|
###
|
||||||
|
#
|
||||||
|
# Sample configuration file: cbq-1280.My_first_shaper
|
||||||
|
#
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# DEVICE=eth0,10Mbit,1Mbit
|
||||||
|
# RATE=128Kbit
|
||||||
|
# WEIGHT=10Kbit
|
||||||
|
# PRIO=5
|
||||||
|
# RULE=192.128.1.0/24
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# The configuration says that we will control traffic on 10Mbit ethernet
|
||||||
|
# device eth0 and the traffic going to network 192.168.1.0 will be
|
||||||
|
# processed with priority 5 and shaped to rate of 128Kbit.
|
||||||
|
#
|
||||||
|
# Note that you can control outgoing traffic only. If you want to control
|
||||||
|
# traffic in both directions, you must set up CBQ for both interfaces.
|
||||||
|
#
|
||||||
|
# Consider the following example:
|
||||||
|
#
|
||||||
|
# +---------+ 192.168.1.1
|
||||||
|
# BACKBONE -----eth0-| linux |-eth1------*-[client]
|
||||||
|
# +---------+
|
||||||
|
#
|
||||||
|
# Imagine you want to shape traffic from backbone to the client to 28Kbit
|
||||||
|
# and traffic in the opposite direction to 128Kbit. You need to setup CBQ
|
||||||
|
# on both eth0 and eth1 interfaces, thus you need two config files:
|
||||||
|
#
|
||||||
|
# cbq-028.backbone-client
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# DEVICE=eth1,10Mbit,1Mbit
|
||||||
|
# RATE=28Kbit
|
||||||
|
# WEIGHT=2Kbit
|
||||||
|
# PRIO=5
|
||||||
|
# RULE=192.168.1.1
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# cbq-128.client-backbone
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# DEVICE=eth0,10Mbit,1Mbit
|
||||||
|
# RATE=128Kbit
|
||||||
|
# WEIGHT=10Kbit
|
||||||
|
# PRIO=5
|
||||||
|
# RULE=192.168.1.1,
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Pay attention to comma "," in the RULE field - it denotes source address!
|
||||||
|
#
|
||||||
|
# Enjoy.
|
||||||
|
#
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
export LC_ALL=C
|
||||||
|
|
||||||
|
### Command locations
|
||||||
|
TC=/sbin/tc
|
||||||
|
IP=/sbin/ip
|
||||||
|
MP=/sbin/modprobe
|
||||||
|
|
||||||
|
### Default filter priorities (must be different)
|
||||||
|
PRIO_RULE_DEFAULT=${PRIO_RULE:-100}
|
||||||
|
PRIO_MARK_DEFAULT=${PRIO_MARK:-200}
|
||||||
|
PRIO_REALM_DEFAULT=${PRIO_REALM:-300}
|
||||||
|
|
||||||
|
### Default CBQ_PATH & CBQ_CACHE settings
|
||||||
|
CBQ_PATH=${CBQ_PATH:-/etc/sysconfig/cbq}
|
||||||
|
CBQ_CACHE=${CBQ_CACHE:-/var/cache/cbq.init}
|
||||||
|
|
||||||
|
### Uncomment to enable logfile for debugging
|
||||||
|
#CBQ_DEBUG="/var/run/cbq-$1"
|
||||||
|
|
||||||
|
### Modules to probe for. Uncomment the last CBQ_PROBE
|
||||||
|
### line if you have QoS support compiled into kernel
|
||||||
|
CBQ_PROBE="sch_cbq sch_tbf sch_sfq sch_prio"
|
||||||
|
CBQ_PROBE="$CBQ_PROBE cls_fw cls_u32 cls_route"
|
||||||
|
#CBQ_PROBE=""
|
||||||
|
|
||||||
|
### Keywords required for qdisc & class configuration
|
||||||
|
CBQ_WORDS="DEVICE|RATE|WEIGHT|PRIO|PARENT|LEAF|BOUNDED|ISOLATED"
|
||||||
|
CBQ_WORDS="$CBQ_WORDS|PRIO_MARK|PRIO_RULE|PRIO_REALM|BUFFER"
|
||||||
|
CBQ_WORDS="$CBQ_WORDS|LIMIT|PEAK|MTU|QUANTUM|PERTURB"
|
||||||
|
|
||||||
|
### Source AVPKT if it exists
|
||||||
|
[ -r /etc/sysconfig/cbq/avpkt ] && . /etc/sysconfig/cbq/avpkt
|
||||||
|
AVPKT=${AVPKT:-3000}
|
||||||
|
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
############################# SUPPORT FUNCTIONS #############################
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
### Get list of network devices
|
||||||
|
cbq_device_list () {
|
||||||
|
ip link show| sed -n "/^[0-9]/ \
|
||||||
|
{ s/^[0-9]\+: \([a-z0-9._]\+\)[:@].*/\1/; p; }"
|
||||||
|
} # cbq_device_list
|
||||||
|
|
||||||
|
|
||||||
|
### Remove root class from device $1
|
||||||
|
cbq_device_off () {
|
||||||
|
tc qdisc del dev $1 root 2> /dev/null
|
||||||
|
} # cbq_device_off
|
||||||
|
|
||||||
|
|
||||||
|
### Remove CBQ from all devices
|
||||||
|
cbq_off () {
|
||||||
|
for dev in `cbq_device_list`; do
|
||||||
|
cbq_device_off $dev
|
||||||
|
done
|
||||||
|
} # cbq_off
|
||||||
|
|
||||||
|
|
||||||
|
### Prefixed message
|
||||||
|
cbq_message () {
|
||||||
|
echo -e "**CBQ: $@"
|
||||||
|
} # cbq_message
|
||||||
|
|
||||||
|
### Failure message
|
||||||
|
cbq_failure () {
|
||||||
|
cbq_message "$@"
|
||||||
|
exit 1
|
||||||
|
} # cbq_failure
|
||||||
|
|
||||||
|
### Failure w/ cbq-off
|
||||||
|
cbq_fail_off () {
|
||||||
|
cbq_message "$@"
|
||||||
|
cbq_off
|
||||||
|
exit 1
|
||||||
|
} # cbq_fail_off
|
||||||
|
|
||||||
|
|
||||||
|
### Convert time to absolute value
|
||||||
|
cbq_time2abs () {
|
||||||
|
local min=${1##*:}; min=${min##0}
|
||||||
|
local hrs=${1%%:*}; hrs=${hrs##0}
|
||||||
|
echo $[hrs*60 + min]
|
||||||
|
} # cbq_time2abs
|
||||||
|
|
||||||
|
|
||||||
|
### Display CBQ setup
|
||||||
|
cbq_show () {
|
||||||
|
for dev in `cbq_device_list`; do
|
||||||
|
[ `tc qdisc show dev $dev| wc -l` -eq 0 ] && continue
|
||||||
|
echo -e "### $dev: queueing disciplines\n"
|
||||||
|
tc $1 qdisc show dev $dev; echo
|
||||||
|
|
||||||
|
[ `tc class show dev $dev| wc -l` -eq 0 ] && continue
|
||||||
|
echo -e "### $dev: traffic classes\n"
|
||||||
|
tc $1 class show dev $dev; echo
|
||||||
|
|
||||||
|
[ `tc filter show dev $dev| wc -l` -eq 0 ] && continue
|
||||||
|
echo -e "### $dev: filtering rules\n"
|
||||||
|
tc $1 filter show dev $dev; echo
|
||||||
|
done
|
||||||
|
} # cbq_show
|
||||||
|
|
||||||
|
|
||||||
|
### Check configuration and load DEVICES, DEVFIELDS and CLASSLIST from $1
|
||||||
|
cbq_init () {
|
||||||
|
### Get a list of configured classes
|
||||||
|
CLASSLIST=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \
|
||||||
|
-not -name '*~' -printf "%f\n"| sort`
|
||||||
|
[ -z "$CLASSLIST" ] &&
|
||||||
|
cbq_failure "no configuration files found in $1!"
|
||||||
|
|
||||||
|
### Gather all DEVICE fields from $1/cbq-*
|
||||||
|
DEVFIELDS=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \
|
||||||
|
-not -name '*~' | xargs sed -n 's/#.*//; \
|
||||||
|
s/[[:space:]]//g; /^DEVICE=[^,]*,[^,]*\(,[^,]*\)\?/ \
|
||||||
|
{ s/.*=//; p; }'| sort -u`
|
||||||
|
[ -z "$DEVFIELDS" ] &&
|
||||||
|
cbq_failure "no DEVICE field found in $1/cbq-*!"
|
||||||
|
|
||||||
|
### Check for different DEVICE fields for the same device
|
||||||
|
DEVICES=`echo "$DEVFIELDS"| sed 's/,.*//'| sort -u`
|
||||||
|
[ `echo "$DEVICES"| wc -l` -ne `echo "$DEVFIELDS"| wc -l` ] &&
|
||||||
|
cbq_failure "different DEVICE fields for single device!\n$DEVFIELDS"
|
||||||
|
} # cbq_init
|
||||||
|
|
||||||
|
|
||||||
|
### Load class configuration from $1/$2
|
||||||
|
cbq_load_class () {
|
||||||
|
CLASS=`echo $2| sed 's/^cbq-0*//; s/^\([0-9a-fA-F]\+\).*/\1/'`
|
||||||
|
CFILE=`sed -n 's/#.*//; s/[[:space:]]//g; /^[[:alnum:]_]\+=[[:alnum:].,:;/*@-_]\+$/ p' $1/$2`
|
||||||
|
|
||||||
|
### Check class number
|
||||||
|
IDVAL=`/usr/bin/printf "%d" 0x$CLASS 2> /dev/null`
|
||||||
|
[ $? -ne 0 -o $IDVAL -lt 2 -o $IDVAL -gt 65535 ] &&
|
||||||
|
cbq_fail_off "class ID of $2 must be in range <0002-FFFF>!"
|
||||||
|
|
||||||
|
### Set defaults & load class
|
||||||
|
RATE=""; WEIGHT=""; PARENT=""; PRIO=5
|
||||||
|
LEAF=tbf; BOUNDED=yes; ISOLATED=no
|
||||||
|
BUFFER=10Kb/8; LIMIT=15Kb; MTU=1500
|
||||||
|
PEAK=""; PERTURB=10; QUANTUM=""
|
||||||
|
|
||||||
|
PRIO_RULE=$PRIO_RULE_DEFAULT
|
||||||
|
PRIO_MARK=$PRIO_MARK_DEFAULT
|
||||||
|
PRIO_REALM=$PRIO_REALM_DEFAULT
|
||||||
|
|
||||||
|
eval `echo "$CFILE"| grep -E "^($CBQ_WORDS)="`
|
||||||
|
|
||||||
|
### Require RATE/WEIGHT
|
||||||
|
[ -z "$RATE" -o -z "$WEIGHT" ] &&
|
||||||
|
cbq_fail_off "missing RATE or WEIGHT in $2!"
|
||||||
|
|
||||||
|
### Class device
|
||||||
|
DEVICE=${DEVICE%%,*}
|
||||||
|
[ -z "$DEVICE" ] && cbq_fail_off "missing DEVICE field in $2!"
|
||||||
|
|
||||||
|
BANDWIDTH=`echo "$DEVFIELDS"| sed -n "/^$DEVICE,/ \
|
||||||
|
{ s/[^,]*,\([^,]*\).*/\1/; p; q; }"`
|
||||||
|
|
||||||
|
### Convert to "tc" options
|
||||||
|
PEAK=${PEAK:+peakrate $PEAK}
|
||||||
|
PERTURB=${PERTURB:+perturb $PERTURB}
|
||||||
|
QUANTUM=${QUANTUM:+quantum $QUANTUM}
|
||||||
|
|
||||||
|
[ "$BOUNDED" = "no" ] && BOUNDED="" || BOUNDED="bounded"
|
||||||
|
[ "$ISOLATED" = "yes" ] && ISOLATED="isolated" || ISOLATED=""
|
||||||
|
} # cbq_load_class
|
||||||
|
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
#################################### INIT ###################################
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
### Check for presence of ip-route2 in usual place
|
||||||
|
[ -x $TC -a -x $IP ] ||
|
||||||
|
cbq_failure "ip-route2 utilities not installed or executable!"
|
||||||
|
|
||||||
|
|
||||||
|
### ip/tc wrappers
|
||||||
|
if [ "$1" = "compile" ]; then
|
||||||
|
### no module probing
|
||||||
|
CBQ_PROBE=""
|
||||||
|
|
||||||
|
ip () {
|
||||||
|
$IP "$@"
|
||||||
|
} # ip
|
||||||
|
|
||||||
|
### echo-only version of "tc" command
|
||||||
|
tc () {
|
||||||
|
echo "$TC $@"
|
||||||
|
} # tc
|
||||||
|
|
||||||
|
elif [ -n "$CBQ_DEBUG" ]; then
|
||||||
|
echo -e "# `date`" > $CBQ_DEBUG
|
||||||
|
|
||||||
|
### Logging version of "ip" command
|
||||||
|
ip () {
|
||||||
|
echo -e "\n# ip $@" >> $CBQ_DEBUG
|
||||||
|
$IP "$@" 2>&1 | tee -a $CBQ_DEBUG
|
||||||
|
} # ip
|
||||||
|
|
||||||
|
### Logging version of "tc" command
|
||||||
|
tc () {
|
||||||
|
echo -e "\n# tc $@" >> $CBQ_DEBUG
|
||||||
|
$TC "$@" 2>&1 | tee -a $CBQ_DEBUG
|
||||||
|
} # tc
|
||||||
|
else
|
||||||
|
### Default wrappers
|
||||||
|
|
||||||
|
ip () {
|
||||||
|
$IP "$@"
|
||||||
|
} # ip
|
||||||
|
|
||||||
|
tc () {
|
||||||
|
$TC "$@"
|
||||||
|
} # tc
|
||||||
|
fi # ip/tc wrappers
|
||||||
|
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
############################### START/COMPILE ###############################
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
start|compile)
|
||||||
|
|
||||||
|
### Probe QoS modules (start only)
|
||||||
|
for module in $CBQ_PROBE; do
|
||||||
|
$MP $module || cbq_failure "failed to load module $module"
|
||||||
|
done
|
||||||
|
|
||||||
|
### If we are in compile/nocache/logging mode, don't bother with cache
|
||||||
|
if [ "$1" != "compile" -a "$2" != "nocache" -a -z "$CBQ_DEBUG" ]; then
|
||||||
|
VALID=1
|
||||||
|
|
||||||
|
### validate the cache
|
||||||
|
[ "$2" = "invalidate" -o ! -f $CBQ_CACHE ] && VALID=0
|
||||||
|
if [ $VALID -eq 1 ]; then
|
||||||
|
[ `find $CBQ_PATH -maxdepth 1 -newer $CBQ_CACHE| \
|
||||||
|
wc -l` -gt 0 ] && VALID=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
### compile the config if the cache is invalid
|
||||||
|
if [ $VALID -ne 1 ]; then
|
||||||
|
$0 compile > $CBQ_CACHE ||
|
||||||
|
cbq_fail_off "failed to compile CBQ configuration!"
|
||||||
|
fi
|
||||||
|
|
||||||
|
### run the cached commands
|
||||||
|
exec /bin/sh $CBQ_CACHE 2> /dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
### Load DEVICES, DEVFIELDS and CLASSLIST
|
||||||
|
cbq_init $CBQ_PATH
|
||||||
|
|
||||||
|
|
||||||
|
### Setup root qdisc on all configured devices
|
||||||
|
for dev in $DEVICES; do
|
||||||
|
### Retrieve device bandwidth and, optionally, weight
|
||||||
|
DEVTEMP=`echo "$DEVFIELDS"| sed -n "/^$dev,/ { s/$dev,//; p; q; }"`
|
||||||
|
DEVBWDT=${DEVTEMP%%,*}; DEVWGHT=${DEVTEMP##*,}
|
||||||
|
[ "$DEVBWDT" = "$DEVWGHT" ] && DEVWGHT=""
|
||||||
|
|
||||||
|
### Device bandwidth is required
|
||||||
|
if [ -z "$DEVBWDT" ]; then
|
||||||
|
cbq_message "could not determine bandwidth for device $dev!"
|
||||||
|
cbq_failure "please set up the DEVICE fields properly!"
|
||||||
|
fi
|
||||||
|
|
||||||
|
### Check if the device is there
|
||||||
|
ip link show $dev &> /dev/null ||
|
||||||
|
cbq_fail_off "device $dev not found!"
|
||||||
|
|
||||||
|
### Remove old root qdisc from device
|
||||||
|
cbq_device_off $dev
|
||||||
|
|
||||||
|
|
||||||
|
### Setup root qdisc + class for device
|
||||||
|
tc qdisc add dev $dev root handle 1 cbq \
|
||||||
|
bandwidth $DEVBWDT avpkt $AVPKT cell 8
|
||||||
|
|
||||||
|
### Set weight of the root class if set
|
||||||
|
[ -n "$DEVWGHT" ] &&
|
||||||
|
tc class change dev $dev root cbq weight $DEVWGHT allot 1514
|
||||||
|
|
||||||
|
[ "$1" = "compile" ] && echo
|
||||||
|
done # dev
|
||||||
|
|
||||||
|
|
||||||
|
### Setup traffic classes
|
||||||
|
for classfile in $CLASSLIST; do
|
||||||
|
cbq_load_class $CBQ_PATH $classfile
|
||||||
|
|
||||||
|
### Create the class
|
||||||
|
tc class add dev $DEVICE parent 1:$PARENT classid 1:$CLASS cbq \
|
||||||
|
bandwidth $BANDWIDTH rate $RATE weight $WEIGHT prio $PRIO \
|
||||||
|
allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED ||
|
||||||
|
cbq_fail_off "failed to add class $CLASS with parent $PARENT on $DEVICE!"
|
||||||
|
|
||||||
|
### Create leaf qdisc if set
|
||||||
|
if [ "$LEAF" = "tbf" ]; then
|
||||||
|
tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS tbf \
|
||||||
|
rate $RATE buffer $BUFFER limit $LIMIT mtu $MTU $PEAK
|
||||||
|
elif [ "$LEAF" = "sfq" ]; then
|
||||||
|
tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS sfq \
|
||||||
|
$PERTURB $QUANTUM
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
### Create fw filter for MARK fields
|
||||||
|
for mark in `echo "$CFILE"| sed -n '/^MARK/ { s/.*=//; p; }'`; do
|
||||||
|
### Attach fw filter to root class
|
||||||
|
tc filter add dev $DEVICE parent 1:0 protocol ip \
|
||||||
|
prio $PRIO_MARK handle $mark fw classid 1:$CLASS
|
||||||
|
done ### mark
|
||||||
|
|
||||||
|
### Create route filter for REALM fields
|
||||||
|
for realm in `echo "$CFILE"| sed -n '/^REALM/ { s/.*=//; p; }'`; do
|
||||||
|
### Split realm into source & destination realms
|
||||||
|
SREALM=${realm%%,*}; DREALM=${realm##*,}
|
||||||
|
[ "$SREALM" = "$DREALM" ] && SREALM=""
|
||||||
|
|
||||||
|
### Convert asterisks to empty strings
|
||||||
|
SREALM=${SREALM#\*}; DREALM=${DREALM#\*}
|
||||||
|
|
||||||
|
### Attach route filter to the root class
|
||||||
|
tc filter add dev $DEVICE parent 1:0 protocol ip \
|
||||||
|
prio $PRIO_REALM route ${SREALM:+from $SREALM} \
|
||||||
|
${DREALM:+to $DREALM} classid 1:$CLASS
|
||||||
|
done ### realm
|
||||||
|
|
||||||
|
### Create u32 filter for RULE fields
|
||||||
|
for rule in `echo "$CFILE"| sed -n '/^RULE/ { s/.*=//; p; }'`; do
|
||||||
|
### Split rule into source & destination
|
||||||
|
SRC=${rule%%,*}; DST=${rule##*,}
|
||||||
|
[ "$SRC" = "$rule" ] && SRC=""
|
||||||
|
|
||||||
|
|
||||||
|
### Split destination into address, port & mask fields
|
||||||
|
DADDR=${DST%%:*}; DTEMP=${DST##*:}
|
||||||
|
[ "$DADDR" = "$DST" ] && DTEMP=""
|
||||||
|
|
||||||
|
DPORT=${DTEMP%%/*}; DMASK=${DTEMP##*/}
|
||||||
|
[ "$DPORT" = "$DTEMP" ] && DMASK="0xffff"
|
||||||
|
|
||||||
|
|
||||||
|
### Split up source (if specified)
|
||||||
|
SADDR=""; SPORT=""
|
||||||
|
if [ -n "$SRC" ]; then
|
||||||
|
SADDR=${SRC%%:*}; STEMP=${SRC##*:}
|
||||||
|
[ "$SADDR" = "$SRC" ] && STEMP=""
|
||||||
|
|
||||||
|
SPORT=${STEMP%%/*}; SMASK=${STEMP##*/}
|
||||||
|
[ "$SPORT" = "$STEMP" ] && SMASK="0xffff"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
### Convert asterisks to empty strings
|
||||||
|
SADDR=${SADDR#\*}; DADDR=${DADDR#\*}
|
||||||
|
|
||||||
|
### Compose u32 filter rules
|
||||||
|
u32_s="${SPORT:+match ip sport $SPORT $SMASK}"
|
||||||
|
u32_s="${SADDR:+match ip src $SADDR} $u32_s"
|
||||||
|
u32_d="${DPORT:+match ip dport $DPORT $DMASK}"
|
||||||
|
u32_d="${DADDR:+match ip dst $DADDR} $u32_d"
|
||||||
|
|
||||||
|
### Uncomment the following if you want to see parsed rules
|
||||||
|
#echo "$rule: $u32_s $u32_d"
|
||||||
|
|
||||||
|
### Attach u32 filter to the appropriate class
|
||||||
|
tc filter add dev $DEVICE parent 1:0 protocol ip \
|
||||||
|
prio $PRIO_RULE u32 $u32_s $u32_d classid 1:$CLASS
|
||||||
|
done ### rule
|
||||||
|
|
||||||
|
[ "$1" = "compile" ] && echo
|
||||||
|
done ### classfile
|
||||||
|
;;
|
||||||
|
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
################################# TIME CHECK ################################
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
timecheck)
|
||||||
|
|
||||||
|
### Get time + weekday
|
||||||
|
TIME_TMP=`date +%w/%k:%M`
|
||||||
|
TIME_DOW=${TIME_TMP%%/*}
|
||||||
|
TIME_NOW=${TIME_TMP##*/}
|
||||||
|
|
||||||
|
### Load DEVICES, DEVFIELDS and CLASSLIST
|
||||||
|
cbq_init $CBQ_PATH
|
||||||
|
|
||||||
|
### Run through all classes
|
||||||
|
for classfile in $CLASSLIST; do
|
||||||
|
### Gather all TIME rules from class config
|
||||||
|
TIMESET=`sed -n 's/#.*//; s/[[:space:]]//g; /^TIME/ { s/.*=//; p; }' \
|
||||||
|
$CBQ_PATH/$classfile`
|
||||||
|
[ -z "$TIMESET" ] && continue
|
||||||
|
|
||||||
|
MATCH=0; CHANGE=0
|
||||||
|
for timerule in $TIMESET; do
|
||||||
|
TIME_ABS=`cbq_time2abs $TIME_NOW`
|
||||||
|
|
||||||
|
### Split TIME rule to pieces
|
||||||
|
TIMESPEC=${timerule%%;*}; PARAMS=${timerule##*;}
|
||||||
|
WEEKDAYS=${TIMESPEC%%/*}; INTERVAL=${TIMESPEC##*/}
|
||||||
|
BEG_TIME=${INTERVAL%%-*}; END_TIME=${INTERVAL##*-}
|
||||||
|
|
||||||
|
### Check the day-of-week (if present)
|
||||||
|
[ "$WEEKDAYS" != "$INTERVAL" -a \
|
||||||
|
-n "${WEEKDAYS##*$TIME_DOW*}" ] && continue
|
||||||
|
|
||||||
|
### Compute interval boundaries
|
||||||
|
BEG_ABS=`cbq_time2abs $BEG_TIME`
|
||||||
|
END_ABS=`cbq_time2abs $END_TIME`
|
||||||
|
|
||||||
|
### Midnight wrap fixup
|
||||||
|
if [ $BEG_ABS -gt $END_ABS ]; then
|
||||||
|
[ $TIME_ABS -le $END_ABS ] &&
|
||||||
|
TIME_ABS=$[TIME_ABS + 24*60]
|
||||||
|
|
||||||
|
END_ABS=$[END_ABS + 24*60]
|
||||||
|
fi
|
||||||
|
|
||||||
|
### If the time matches, remember params and set MATCH flag
|
||||||
|
if [ $TIME_ABS -ge $BEG_ABS -a $TIME_ABS -lt $END_ABS ]; then
|
||||||
|
TMP_RATE=${PARAMS%%/*}; PARAMS=${PARAMS#*/}
|
||||||
|
TMP_WGHT=${PARAMS%%/*}; TMP_PEAK=${PARAMS##*/}
|
||||||
|
|
||||||
|
[ "$TMP_PEAK" = "$TMP_WGHT" ] && TMP_PEAK=""
|
||||||
|
TMP_PEAK=${TMP_PEAK:+peakrate $TMP_PEAK}
|
||||||
|
|
||||||
|
MATCH=1
|
||||||
|
fi
|
||||||
|
done ### timerule
|
||||||
|
|
||||||
|
|
||||||
|
cbq_load_class $CBQ_PATH $classfile
|
||||||
|
|
||||||
|
### Get current RATE of CBQ class
|
||||||
|
RATE_NOW=`tc class show dev $DEVICE| sed -n \
|
||||||
|
"/cbq 1:$CLASS / { s/.*rate //; s/ .*//; p; q; }"`
|
||||||
|
[ -z "$RATE_NOW" ] && continue
|
||||||
|
|
||||||
|
### Time interval matched
|
||||||
|
if [ $MATCH -ne 0 ]; then
|
||||||
|
|
||||||
|
### Check if there is any change in class RATE
|
||||||
|
if [ "$RATE_NOW" != "$TMP_RATE" ]; then
|
||||||
|
NEW_RATE="$TMP_RATE"
|
||||||
|
NEW_WGHT="$TMP_WGHT"
|
||||||
|
NEW_PEAK="$TMP_PEAK"
|
||||||
|
CHANGE=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
### Match not found, reset to default RATE if necessary
|
||||||
|
elif [ "$RATE_NOW" != "$RATE" ]; then
|
||||||
|
NEW_WGHT="$WEIGHT"
|
||||||
|
NEW_RATE="$RATE"
|
||||||
|
NEW_PEAK="$PEAK"
|
||||||
|
CHANGE=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
### If there are no changes, go for next class
|
||||||
|
[ $CHANGE -eq 0 ] && continue
|
||||||
|
|
||||||
|
### Replace CBQ class
|
||||||
|
tc class replace dev $DEVICE classid 1:$CLASS cbq \
|
||||||
|
bandwidth $BANDWIDTH rate $NEW_RATE weight $NEW_WGHT prio $PRIO \
|
||||||
|
allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED
|
||||||
|
|
||||||
|
### Replace leaf qdisc (if any)
|
||||||
|
if [ "$LEAF" = "tbf" ]; then
|
||||||
|
tc qdisc replace dev $DEVICE handle $CLASS tbf \
|
||||||
|
rate $NEW_RATE buffer $BUFFER limit $LIMIT mtu $MTU $NEW_PEAK
|
||||||
|
fi
|
||||||
|
|
||||||
|
cbq_message "$TIME_NOW: class $CLASS on $DEVICE changed rate ($RATE_NOW -> $NEW_RATE)"
|
||||||
|
done ### class file
|
||||||
|
;;
|
||||||
|
|
||||||
|
|
||||||
|
#############################################################################
|
||||||
|
################################## THE REST #################################
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
stop)
|
||||||
|
cbq_off
|
||||||
|
;;
|
||||||
|
|
||||||
|
list)
|
||||||
|
cbq_show
|
||||||
|
;;
|
||||||
|
|
||||||
|
stats)
|
||||||
|
cbq_show -s
|
||||||
|
;;
|
||||||
|
|
||||||
|
restart)
|
||||||
|
shift
|
||||||
|
$0 stop
|
||||||
|
$0 start "$@"
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo "Usage: `basename $0` {start|compile|stop|restart|timecheck|list|stats}"
|
||||||
|
esac
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
#! /bin/sh
|
||||||
|
|
||||||
|
TC=/home/root/tc
|
||||||
|
IP=/home/root/ip
|
||||||
|
DEVICE=eth1
|
||||||
|
BANDWIDTH="bandwidth 10Mbit"
|
||||||
|
|
||||||
|
# Attach CBQ on $DEVICE. It will have handle 1:.
|
||||||
|
# $BANDWIDTH is real $DEVICE bandwidth (10Mbit).
|
||||||
|
# avpkt is average packet size.
|
||||||
|
# mpu is minimal packet size.
|
||||||
|
|
||||||
|
$TC qdisc add dev $DEVICE root handle 1: cbq \
|
||||||
|
$BANDWIDTH avpkt 1000 mpu 64
|
||||||
|
|
||||||
|
# Create root class with classid 1:1. This step is not necessary.
|
||||||
|
# bandwidth is the same as on CBQ itself.
|
||||||
|
# rate == all the bandwidth
|
||||||
|
# allot is MTU + MAC header
|
||||||
|
# maxburst measure allowed class burstiness (please,read S.Floyd and VJ papers)
|
||||||
|
# est 1sec 8sec means, that kernel will evaluate average rate
|
||||||
|
# on this class with period 1sec and time constant 8sec.
|
||||||
|
# This rate is viewed with "tc -s class ls dev $DEVICE"
|
||||||
|
|
||||||
|
$TC class add dev $DEVICE parent 1:0 classid :1 est 1sec 8sec cbq \
|
||||||
|
$BANDWIDTH rate 10Mbit allot 1514 maxburst 50 avpkt 1000
|
||||||
|
|
||||||
|
# Bulk.
|
||||||
|
# New parameters are:
|
||||||
|
# weight, which is set to be proportional to
|
||||||
|
# "rate". It is not necessary, weight=1 will work as well.
|
||||||
|
# defmap and split say that best effort ttraffic, not classfied
|
||||||
|
# by another means will fall to this class.
|
||||||
|
|
||||||
|
$TC class add dev $DEVICE parent 1:1 classid :2 est 1sec 8sec cbq \
|
||||||
|
$BANDWIDTH rate 4Mbit allot 1514 weight 500Kbit \
|
||||||
|
prio 6 maxburst 50 avpkt 1000 split 1:0 defmap ff3d
|
||||||
|
|
||||||
|
# OPTIONAL.
|
||||||
|
# Attach "sfq" qdisc to this class, quantum is MTU, perturb
|
||||||
|
# gives period of hash function perturbation in seconds.
|
||||||
|
#
|
||||||
|
$TC qdisc add dev $DEVICE parent 1:2 sfq quantum 1514b perturb 15
|
||||||
|
|
||||||
|
# Interactive-burst class
|
||||||
|
|
||||||
|
$TC class add dev $DEVICE parent 1:1 classid :3 est 2sec 16sec cbq \
|
||||||
|
$BANDWIDTH rate 1Mbit allot 1514 weight 100Kbit \
|
||||||
|
prio 2 maxburst 100 avpkt 1000 split 1:0 defmap c0
|
||||||
|
|
||||||
|
$TC qdisc add dev $DEVICE parent 1:3 sfq quantum 1514b perturb 15
|
||||||
|
|
||||||
|
# Background.
|
||||||
|
|
||||||
|
$TC class add dev $DEVICE parent 1:1 classid :4 est 1sec 8sec cbq \
|
||||||
|
$BANDWIDTH rate 100Kbit allot 1514 weight 10Mbit \
|
||||||
|
prio 7 maxburst 10 avpkt 1000 split 1:0 defmap 2
|
||||||
|
|
||||||
|
$TC qdisc add dev $DEVICE parent 1:4 sfq quantum 1514b perturb 15
|
||||||
|
|
||||||
|
# Realtime class for RSVP
|
||||||
|
|
||||||
|
$TC class add dev $DEVICE parent 1:1 classid 1:7FFE cbq \
|
||||||
|
rate 5Mbit $BANDWIDTH allot 1514b avpkt 1000 \
|
||||||
|
maxburst 20
|
||||||
|
|
||||||
|
# Reclassified realtime traffic
|
||||||
|
#
|
||||||
|
# New element: split is not 1:0, but 1:7FFE. It means,
|
||||||
|
# that only real-time packets, which violated policing filters
|
||||||
|
# or exceeded reshaping buffers will fall to it.
|
||||||
|
|
||||||
|
$TC class add dev $DEVICE parent 1:7FFE classid 1:7FFF est 4sec 32sec cbq \
|
||||||
|
rate 1Mbit $BANDWIDTH allot 1514b avpkt 1000 weight 10Kbit \
|
||||||
|
prio 6 maxburst 10 split 1:7FFE defmap ffff
|
||||||
|
|
||||||
|
|
@ -0,0 +1,446 @@
|
||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# dhclient-script for Linux.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version
|
||||||
|
# 2 of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
||||||
|
#
|
||||||
|
# Probably, I did not understand, what this funny feature as "alias"
|
||||||
|
# means exactly. For now I suppose, that it is a static address, which
|
||||||
|
# we should install and preserve.
|
||||||
|
#
|
||||||
|
|
||||||
|
exec >> /var/log/DHS.log 2>&1
|
||||||
|
|
||||||
|
echo dhc-script $* reason=$reason
|
||||||
|
set | grep "^\(old_\|new_\|check_\)"
|
||||||
|
|
||||||
|
LOG () {
|
||||||
|
echo LOG $* ;
|
||||||
|
}
|
||||||
|
|
||||||
|
# convert 8bit mask to length
|
||||||
|
# arg: $1 = mask
|
||||||
|
#
|
||||||
|
Mask8ToLen() {
|
||||||
|
local l=0;
|
||||||
|
|
||||||
|
while [ $l -le 7 ]; do
|
||||||
|
if [ $[ ( 1 << $l ) + $1 ] -eq 256 ]; then
|
||||||
|
return $[ 8 - $l ]
|
||||||
|
fi
|
||||||
|
l=$[ $l + 1 ]
|
||||||
|
done
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
# convert inet dotted quad mask to length
|
||||||
|
# arg: $1 = dotquad mask
|
||||||
|
#
|
||||||
|
MaskToLen() {
|
||||||
|
local masklen=0
|
||||||
|
local mask8=$1
|
||||||
|
|
||||||
|
case $1 in
|
||||||
|
0.0.0.0)
|
||||||
|
return 0;
|
||||||
|
;;
|
||||||
|
255.*.0.0)
|
||||||
|
masklen=8
|
||||||
|
mask8=${mask8#255.}
|
||||||
|
mask8=${mask8%.0.0}
|
||||||
|
;;
|
||||||
|
255.255.*.0)
|
||||||
|
masklen=16
|
||||||
|
mask8=${mask8#255.255.}
|
||||||
|
mask8=${mask8%.0}
|
||||||
|
;;
|
||||||
|
255.255.255.*)
|
||||||
|
masklen=24
|
||||||
|
mask8=${mask8#255.255.255.}
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
return 255
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
Mask8ToLen $mask8
|
||||||
|
return $[ $? + $masklen ]
|
||||||
|
}
|
||||||
|
|
||||||
|
# calculate ABC "natural" mask
|
||||||
|
# arg: $1 = dotquad address
|
||||||
|
#
|
||||||
|
ABCMask () {
|
||||||
|
local class;
|
||||||
|
|
||||||
|
class=${1%%.*}
|
||||||
|
|
||||||
|
if [ "$1" = "255.255.255.255" ]; then
|
||||||
|
echo $1
|
||||||
|
elif [ "$1" = "0.0.0.0" ]; then
|
||||||
|
echo $1
|
||||||
|
elif [ $class -ge 224 ]; then
|
||||||
|
echo 240.0.0.0
|
||||||
|
elif [ $class -ge 192 ]; then
|
||||||
|
echo 255.255.255.0
|
||||||
|
elif [ $class -ge 128 ]; then
|
||||||
|
echo 255.255.0.0
|
||||||
|
else
|
||||||
|
echo 255.0.0.0
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# calculate ABC "natural" mask length
|
||||||
|
# arg: $1 = dotquad address
|
||||||
|
#
|
||||||
|
ABCMaskLen () {
|
||||||
|
local class;
|
||||||
|
|
||||||
|
class=${1%%.*}
|
||||||
|
|
||||||
|
if [ "$1" = "255.255.255.255" ]; then
|
||||||
|
return 32
|
||||||
|
elif [ "$1" = "0.0.0.0" ]; then
|
||||||
|
return 0
|
||||||
|
elif [ $class -ge 224 ]; then
|
||||||
|
return 4;
|
||||||
|
elif [ $class -ge 192 ]; then
|
||||||
|
return 24;
|
||||||
|
elif [ $class -ge 128 ]; then
|
||||||
|
return 16;
|
||||||
|
else
|
||||||
|
return 8;
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Delete IP address
|
||||||
|
# args: $1 = interface
|
||||||
|
# $2 = address
|
||||||
|
# $3 = mask
|
||||||
|
# $4 = broadcast
|
||||||
|
# $5 = label
|
||||||
|
#
|
||||||
|
DelINETAddr () {
|
||||||
|
local masklen=32
|
||||||
|
local addrid=$1
|
||||||
|
|
||||||
|
LOG DelINETAddr $*
|
||||||
|
|
||||||
|
if [ "$5" ]; then
|
||||||
|
addrid=$addrid:$5
|
||||||
|
fi
|
||||||
|
LOG ifconfig $addrid down
|
||||||
|
ifconfig $addrid down
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add IP address
|
||||||
|
# args: $1 = interface
|
||||||
|
# $2 = address
|
||||||
|
# $3 = mask
|
||||||
|
# $4 = broadcast
|
||||||
|
# $5 = label
|
||||||
|
#
|
||||||
|
AddINETAddr () {
|
||||||
|
local mask_arg
|
||||||
|
local brd_arg
|
||||||
|
local addrid=$1
|
||||||
|
|
||||||
|
LOG AddINETAddr $*
|
||||||
|
|
||||||
|
if [ "$5" ]; then
|
||||||
|
addrid=$addrid:$5
|
||||||
|
fi
|
||||||
|
if [ "$3" ]; then
|
||||||
|
mask_arg="netmask $3"
|
||||||
|
fi
|
||||||
|
if [ "$4" ]; then
|
||||||
|
brd_arg="broadcast $4"
|
||||||
|
fi
|
||||||
|
|
||||||
|
LOG ifconfig $addrid $2 $mask_arg $brd_arg up
|
||||||
|
ifconfig $addrid $2 $mask_arg $brd_arg up
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add default routes
|
||||||
|
# args: $1 = routers list
|
||||||
|
#
|
||||||
|
AddDefaultRoutes() {
|
||||||
|
local router
|
||||||
|
|
||||||
|
if [ "$1" ]; then
|
||||||
|
LOG AddDefaultRoutes $*
|
||||||
|
for router in $1; do
|
||||||
|
LOG route add default gw $router
|
||||||
|
route add default gw $router
|
||||||
|
done ;
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Delete default routes
|
||||||
|
# args: $1 = routers list
|
||||||
|
#
|
||||||
|
DelDefaultRoutes() {
|
||||||
|
local router
|
||||||
|
|
||||||
|
if [ "$1" ]; then
|
||||||
|
LOG DelDefaultRoutes $*
|
||||||
|
|
||||||
|
for router in $1; do
|
||||||
|
LOG route del default gw $router
|
||||||
|
route del default gw $router
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# ping a host
|
||||||
|
# args: $1 = dotquad address of the host
|
||||||
|
#
|
||||||
|
PingNode() {
|
||||||
|
LOG PingNode $*
|
||||||
|
if ping -q -c 1 -w 2 $1 ; then
|
||||||
|
return 0;
|
||||||
|
fi
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check (and add route, if alive) default routers
|
||||||
|
# args: $1 = routers list
|
||||||
|
# returns: 0 if at least one router is alive.
|
||||||
|
#
|
||||||
|
CheckRouterList() {
|
||||||
|
local router
|
||||||
|
local succeed=1
|
||||||
|
|
||||||
|
LOG CheckRouterList $*
|
||||||
|
|
||||||
|
for router in $1; do
|
||||||
|
if PingNode $router ; then
|
||||||
|
succeed=0
|
||||||
|
route add default gw $router
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
return $succeed
|
||||||
|
}
|
||||||
|
|
||||||
|
# Delete/create static routes.
|
||||||
|
# args: $1 = operation (del/add)
|
||||||
|
# $2 = routes list in format "dst1 nexthop1 dst2 ..."
|
||||||
|
#
|
||||||
|
# BEWARE: this feature of DHCP is obsolete, because does not
|
||||||
|
# support subnetting.
|
||||||
|
#
|
||||||
|
X-StaticRouteList() {
|
||||||
|
local op=$1
|
||||||
|
local lst="$2"
|
||||||
|
local masklen
|
||||||
|
|
||||||
|
LOG X-StaticRouteList $*
|
||||||
|
|
||||||
|
if [ "$lst" ]; then
|
||||||
|
set $lst
|
||||||
|
while [ $# -gt 1 ]; do
|
||||||
|
route $op -net $1 netmask `ABCMask "$1"` gw $2
|
||||||
|
shift; shift;
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create static routes.
|
||||||
|
# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
|
||||||
|
#
|
||||||
|
AddStaticRouteList() {
|
||||||
|
LOG AddStaticRouteList $*
|
||||||
|
X-StaticRouteList add "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Delete static routes.
|
||||||
|
# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
|
||||||
|
#
|
||||||
|
DelStaticRouteList() {
|
||||||
|
LOG DelStaticRouteList $*
|
||||||
|
X-StaticRouteList del "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Broadcast unsolicited ARP to update neighbours' caches.
|
||||||
|
# args: $1 = interface
|
||||||
|
# $2 = address
|
||||||
|
#
|
||||||
|
UnsolicitedARP() {
|
||||||
|
if [ -f /sbin/arping ]; then
|
||||||
|
/sbin/arping -A -c 1 -I "$1" "$2" &
|
||||||
|
(sleep 2 ; /sbin/arping -U -c 1 -I "$1" "$2" ) &
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Duplicate address detection.
|
||||||
|
# args: $1 = interface
|
||||||
|
# $2 = test address
|
||||||
|
# returns: 0, if DAD succeeded.
|
||||||
|
DAD() {
|
||||||
|
if [ -f /sbin/arping ]; then
|
||||||
|
/sbin/arping -c 2 -w 3 -D -I "$1" "$2"
|
||||||
|
return $?
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Setup resolver.
|
||||||
|
# args: NO
|
||||||
|
# domain and nameserver list are passed in global variables.
|
||||||
|
#
|
||||||
|
# NOTE: we try to be careful and not to break user supplied resolv.conf.
|
||||||
|
# The script mangles it, only if it has dhcp magic signature.
|
||||||
|
#
|
||||||
|
UpdateDNS() {
|
||||||
|
local nameserver
|
||||||
|
local idstring="#### Generated by DHCPCD"
|
||||||
|
|
||||||
|
LOG UpdateDNS $*
|
||||||
|
|
||||||
|
if [ "$new_domain_name" = "" -a "$new_domain_name_servers" = "" ]; then
|
||||||
|
return 0;
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo $idstring > /etc/resolv.conf.dhcp
|
||||||
|
if [ "$new_domain_name" ]; then
|
||||||
|
echo search $new_domain_name >> /etc/resolv.conf.dhcp
|
||||||
|
fi
|
||||||
|
echo options ndots:1 >> /etc/resolv.conf.dhcp
|
||||||
|
|
||||||
|
if [ "$new_domain_name_servers" ]; then
|
||||||
|
for nameserver in $new_domain_name_servers; do
|
||||||
|
echo nameserver $nameserver >> /etc/resolv.conf.dhcp
|
||||||
|
done
|
||||||
|
else
|
||||||
|
echo nameserver 127.0.0.1 >> /etc/resolv.conf.dhcp
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -f /etc/resolv.conf ]; then
|
||||||
|
if [ "`head -1 /etc/resolv.conf`" != "$idstring" ]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ "$old_domain_name" = "$new_domain_name" -a
|
||||||
|
"$new_domain_name_servers" = "$old_domain_name_servers" ]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
mv /etc/resolv.conf.dhcp /etc/resolv.conf
|
||||||
|
}
|
||||||
|
|
||||||
|
case $reason in
|
||||||
|
NBI)
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
MEDIUM)
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
|
||||||
|
PREINIT)
|
||||||
|
ifconfig $interface:dhcp down
|
||||||
|
ifconfig $interface:dhcp1 down
|
||||||
|
if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
|
||||||
|
ifconfig $interface:dhcp 10.10.10.10 netmask 255.255.255.255
|
||||||
|
ifconfig $interface:dhcp down
|
||||||
|
if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
|
||||||
|
LOG The interface $interface already configured.
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
ifconfig $interface:dhcp up
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
|
||||||
|
ARPSEND)
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
|
||||||
|
ARPCHECK)
|
||||||
|
if DAD "$interface" "$check_ip_address" ; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
BOUND|RENEW|REBIND|REBOOT)
|
||||||
|
if [ "$old_ip_address" -a "$alias_ip_address" -a \
|
||||||
|
"$alias_ip_address" != "$old_ip_address" ]; then
|
||||||
|
DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||||
|
fi
|
||||||
|
if [ "$old_ip_address" -a "$old_ip_address" != "$new_ip_address" ]; then
|
||||||
|
DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
|
||||||
|
DelDefaultRoutes "$old_routers"
|
||||||
|
DelStaticRouteList "$old_static_routes"
|
||||||
|
fi
|
||||||
|
if [ "$old_ip_address" = "" -o "$old_ip_address" != "$new_ip_address" -o \
|
||||||
|
"$reason" = "BOUND" -o "$reason" = "REBOOT" ]; then
|
||||||
|
AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
|
||||||
|
AddStaticRouteList "$new_static_routes"
|
||||||
|
AddDefaultRoutes "$new_routers"
|
||||||
|
UnsolicitedARP "$interface" "$new_ip_address"
|
||||||
|
fi
|
||||||
|
if [ "$new_ip_address" != "$alias_ip_address" -a "$alias_ip_address" ]; then
|
||||||
|
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||||
|
fi
|
||||||
|
UpdateDNS
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
|
||||||
|
EXPIRE|FAIL)
|
||||||
|
if [ "$alias_ip_address" ]; then
|
||||||
|
DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||||
|
fi
|
||||||
|
if [ "$old_ip_address" ]; then
|
||||||
|
DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
|
||||||
|
DelDefaultRoutes "$old_routers"
|
||||||
|
DelStaticRouteList "$old_static_routes"
|
||||||
|
fi
|
||||||
|
if [ "$alias_ip_address" ]; then
|
||||||
|
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
|
||||||
|
TIMEOUT)
|
||||||
|
if [ "$alias_ip_address" ]; then
|
||||||
|
DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||||
|
fi
|
||||||
|
# Seems, <null address> means, that no more old leases found.
|
||||||
|
# Or does it mean bug in dhcpcd? 8) Fail for now.
|
||||||
|
if [ "$new_ip_address" = "<null address>" ]; then
|
||||||
|
if [ "$old_ip_address" ]; then
|
||||||
|
DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
|
||||||
|
fi
|
||||||
|
if [ "$alias_ip_address" ]; then
|
||||||
|
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||||
|
fi
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if DAD "$interface" "$new_ip_address" ; then
|
||||||
|
AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
|
||||||
|
UnsolicitedARP "$interface" "$new_ip_address"
|
||||||
|
if [ "$alias_ip_address" -a "$alias_ip_address" != "$new_ip_address" ]; then
|
||||||
|
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||||
|
UnsolicitedARP "$interface" "$alias_ip_address"
|
||||||
|
fi
|
||||||
|
if CheckRouterList "$new_routers" ; then
|
||||||
|
AddStaticRouteList "$new_static_routes"
|
||||||
|
UpdateDNS
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
DelINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
|
||||||
|
DelDefaultRoutes "$old_routers"
|
||||||
|
DelStaticRouteList "$old_static_routes"
|
||||||
|
if [ "$alias_ip_address" ]; then
|
||||||
|
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
|
||||||
|
fi
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
#! /bin/sh -x
|
||||||
|
#
|
||||||
|
# sample script on using the ingress capabilities
|
||||||
|
# This script just tags on the ingress interfac using Ipchains
|
||||||
|
# the result is used for fast classification and re-marking
|
||||||
|
# on the egress interface
|
||||||
|
#
|
||||||
|
#path to various utilities;
|
||||||
|
#change to reflect yours.
|
||||||
|
#
|
||||||
|
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||||
|
TC=$IPROUTE/tc/tc
|
||||||
|
IP=$IPROUTE/ip/ip
|
||||||
|
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||||
|
INDEV=eth2
|
||||||
|
EGDEV="dev eth1"
|
||||||
|
#
|
||||||
|
# tag all incoming packets from host 10.2.0.24 to value 1
|
||||||
|
# tag all incoming packets from host 10.2.0.3 to value 2
|
||||||
|
# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
|
||||||
|
#These values are used in the egress
|
||||||
|
#
|
||||||
|
############################################################
|
||||||
|
$IPCHAINS -A input -s 10.2.0.4/24 -m 3
|
||||||
|
$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
|
||||||
|
$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
|
||||||
|
|
||||||
|
######################## Egress side ########################
|
||||||
|
|
||||||
|
|
||||||
|
# attach a dsmarker
|
||||||
|
#
|
||||||
|
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64 set_tc_index
|
||||||
|
#
|
||||||
|
# values of the DSCP to change depending on the class
|
||||||
|
#
|
||||||
|
#becomes EF
|
||||||
|
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||||
|
value 0xb8
|
||||||
|
#becomes AF11
|
||||||
|
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||||
|
value 0x28
|
||||||
|
#becomes AF21
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x48
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# The class mapping
|
||||||
|
#
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
echo "---- qdisc parameters Ingress ----------"
|
||||||
|
$TC qdisc ls dev $INDEV
|
||||||
|
echo "---- Class parameters Ingress ----------"
|
||||||
|
$TC class ls dev $INDEV
|
||||||
|
echo "---- filter parameters Ingress ----------"
|
||||||
|
$TC filter ls dev $INDEV parent 1:0
|
||||||
|
|
||||||
|
echo "---- qdisc parameters Egress ----------"
|
||||||
|
$TC qdisc ls $EGDEV
|
||||||
|
echo "---- Class parameters Egress ----------"
|
||||||
|
$TC class ls $EGDEV
|
||||||
|
echo "---- filter parameters Egress ----------"
|
||||||
|
$TC filter ls $EGDEV parent 1:0
|
||||||
|
|
@ -0,0 +1,87 @@
|
||||||
|
#! /bin/sh -x
|
||||||
|
#
|
||||||
|
# sample script on using the ingress capabilities
|
||||||
|
# This script tags the fwmark on the ingress interface using IPchains
|
||||||
|
# the result is used first for policing on the Ingress interface then
|
||||||
|
# for fast classification and re-marking
|
||||||
|
# on the egress interface
|
||||||
|
#
|
||||||
|
#path to various utilities;
|
||||||
|
#change to reflect yours.
|
||||||
|
#
|
||||||
|
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||||
|
TC=$IPROUTE/tc/tc
|
||||||
|
IP=$IPROUTE/ip/ip
|
||||||
|
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||||
|
INDEV=eth2
|
||||||
|
EGDEV="dev eth1"
|
||||||
|
#
|
||||||
|
# tag all incoming packets from host 10.2.0.24 to value 1
|
||||||
|
# tag all incoming packets from host 10.2.0.3 to value 2
|
||||||
|
# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
|
||||||
|
#These values are used in the egress
|
||||||
|
############################################################
|
||||||
|
$IPCHAINS -A input -s 10.2.0.0/24 -m 3
|
||||||
|
$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
|
||||||
|
$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
|
||||||
|
############################################################
|
||||||
|
#
|
||||||
|
# install the ingress qdisc on the ingress interface
|
||||||
|
############################################################
|
||||||
|
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
#
|
||||||
|
# attach a fw classifier to the ingress which polices anything marked
|
||||||
|
# by ipchains to tag value 3 (The rest of the subnet packets -- not
|
||||||
|
# tag 1 or 2) to not go beyond 1.5Mbps
|
||||||
|
# Allow up to at least 60 packets to burst (assuming maximum packet
|
||||||
|
# size of # 1.5 KB) in the long run and upto about 6 packets in the
|
||||||
|
# shot run
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 3 fw \
|
||||||
|
police rate 1500kbit burst 90k mtu 9k drop flowid :1
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
######################## Egress side ########################
|
||||||
|
|
||||||
|
|
||||||
|
# attach a dsmarker
|
||||||
|
#
|
||||||
|
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||||
|
#
|
||||||
|
# values of the DSCP to change depending on the class
|
||||||
|
#
|
||||||
|
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||||
|
value 0xb8
|
||||||
|
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||||
|
value 0x28
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x48
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# The class mapping
|
||||||
|
#
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
echo "---- qdisc parameters Ingress ----------"
|
||||||
|
$TC qdisc ls dev $INDEV
|
||||||
|
echo "---- Class parameters Ingress ----------"
|
||||||
|
$TC class ls dev $INDEV
|
||||||
|
echo "---- filter parameters Ingress ----------"
|
||||||
|
$TC filter ls dev $INDEV parent ffff:
|
||||||
|
|
||||||
|
echo "---- qdisc parameters Egress ----------"
|
||||||
|
$TC qdisc ls $EGDEV
|
||||||
|
echo "---- Class parameters Egress ----------"
|
||||||
|
$TC class ls $EGDEV
|
||||||
|
echo "---- filter parameters Egress ----------"
|
||||||
|
$TC filter ls $EGDEV parent 1:0
|
||||||
|
#
|
||||||
|
#deleting the ingress qdisc
|
||||||
|
#$TC qdisc del $DEV ingress
|
||||||
|
|
@ -0,0 +1,170 @@
|
||||||
|
#! /bin/sh -x
|
||||||
|
#
|
||||||
|
# sample script on using the ingress capabilities using u32 classifier
|
||||||
|
# This script tags tcindex based on metering on the ingress
|
||||||
|
# interface the result is used for fast classification and re-marking
|
||||||
|
# on the egress interface
|
||||||
|
# This is an example of a color aware mode marker with PIR configured
|
||||||
|
# based on draft-wahjak-mcm-00.txt (section 3.1)
|
||||||
|
#
|
||||||
|
# The colors are defined using the Diffserv Fields
|
||||||
|
#path to various utilities;
|
||||||
|
#change to reflect yours.
|
||||||
|
#
|
||||||
|
IPROUTE=/usr/src/iproute2-current
|
||||||
|
TC=$IPROUTE/tc/tc
|
||||||
|
IP=$IPROUTE/ip/ip
|
||||||
|
INDEV=eth0
|
||||||
|
EGDEV="dev eth1"
|
||||||
|
CIR1=1500kbit
|
||||||
|
CIR2=1000kbit
|
||||||
|
|
||||||
|
#The CBS is about 60 MTU sized packets
|
||||||
|
CBS1=90k
|
||||||
|
CBS2=90k
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
#
|
||||||
|
# install the ingress qdisc on the ingress interface
|
||||||
|
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||||
|
############################################################
|
||||||
|
#
|
||||||
|
# Create u32 filters
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1: u32 \
|
||||||
|
divisor 1
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
# The meters: Note that we have shared meters in this case as identified
|
||||||
|
# by the index parameter
|
||||||
|
meter1=" police index 1 rate $CIR1 burst $CBS1 "
|
||||||
|
meter2=" police index 2 rate $CIR2 burst $CBS1 "
|
||||||
|
meter3=" police index 3 rate $CIR2 burst $CBS2 "
|
||||||
|
meter4=" police index 4 rate $CIR1 burst $CBS2 "
|
||||||
|
meter5=" police index 5 rate $CIR1 burst $CBS2 "
|
||||||
|
|
||||||
|
# All packets are marked with a tcindex value which is used on the egress
|
||||||
|
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||||
|
|
||||||
|
# *********************** AF41 ***************************
|
||||||
|
#AF41 (DSCP 0x22) is passed on with a tcindex value 1
|
||||||
|
#if it doesnt exceed its CIR/CBS
|
||||||
|
#policer 1 is used.
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
|
||||||
|
match ip tos 0x88 0xfc \
|
||||||
|
$meter1 \
|
||||||
|
continue flowid :1
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||||
|
# tcindex value of 2
|
||||||
|
# policer 2 is used
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
|
||||||
|
match ip tos 0x88 0xfc \
|
||||||
|
$meter2 \
|
||||||
|
continue flowid :2
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||||
|
# of 3 (policer 3)
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||||
|
match ip tos 0x88 0xfc \
|
||||||
|
$meter3 \
|
||||||
|
drop flowid :3
|
||||||
|
#
|
||||||
|
|
||||||
|
# *********************** AF42 ***************************
|
||||||
|
#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
|
||||||
|
#if it doesnt exceed its CIR/CBS
|
||||||
|
#policer 2 is used. Note that this is shared with the AF41
|
||||||
|
#
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
|
||||||
|
match ip tos 0x90 0xfc \
|
||||||
|
$meter2 \
|
||||||
|
continue flowid :2
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||||
|
# of 3 (policer 3)
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||||
|
match ip tos 0x90 0xfc \
|
||||||
|
$meter3 \
|
||||||
|
drop flowid :3
|
||||||
|
#
|
||||||
|
# *********************** AF43 ***************************
|
||||||
|
#
|
||||||
|
#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
|
||||||
|
#if it doesnt exceed its CIR/CBS
|
||||||
|
#policer 3 is used. Note that this is shared with the AF41 and AF42
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||||
|
match ip tos 0x98 0xfc \
|
||||||
|
$meter3 \
|
||||||
|
drop flowid :3
|
||||||
|
#
|
||||||
|
# *********************** BE ***************************
|
||||||
|
#
|
||||||
|
# Anything else (not from the AF4*) gets discarded if it
|
||||||
|
# exceeds 1Mbps and by default goes to BE if it doesnt
|
||||||
|
# Note that the BE class is also used by the AF4* in the worst
|
||||||
|
# case
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
|
||||||
|
match ip src 0/0\
|
||||||
|
$meter4 \
|
||||||
|
drop flowid :4
|
||||||
|
|
||||||
|
######################## Egress side ########################
|
||||||
|
|
||||||
|
# attach a dsmarker
|
||||||
|
#
|
||||||
|
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||||
|
#
|
||||||
|
# values of the DSCP to change depending on the class
|
||||||
|
#note that the ECN bits are masked out
|
||||||
|
#
|
||||||
|
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||||
|
#
|
||||||
|
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||||
|
value 0x88
|
||||||
|
#AF42
|
||||||
|
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||||
|
value 0x90
|
||||||
|
#AF43
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x98
|
||||||
|
#BE
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x0
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# The class mapping
|
||||||
|
#
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 1 tcindex classid 1:1
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 2 tcindex classid 1:2
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 3 tcindex classid 1:3
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 4 tcindex classid 1:4
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
echo "---- qdisc parameters Ingress ----------"
|
||||||
|
$TC qdisc ls dev $INDEV
|
||||||
|
echo "---- Class parameters Ingress ----------"
|
||||||
|
$TC class ls dev $INDEV
|
||||||
|
echo "---- filter parameters Ingress ----------"
|
||||||
|
$TC filter ls dev $INDEV parent ffff:
|
||||||
|
|
||||||
|
echo "---- qdisc parameters Egress ----------"
|
||||||
|
$TC qdisc ls $EGDEV
|
||||||
|
echo "---- Class parameters Egress ----------"
|
||||||
|
$TC class ls $EGDEV
|
||||||
|
echo "---- filter parameters Egress ----------"
|
||||||
|
$TC filter ls $EGDEV parent 1:0
|
||||||
|
#
|
||||||
|
#deleting the ingress qdisc
|
||||||
|
#$TC qdisc del $INDEV ingress
|
||||||
|
|
@ -0,0 +1,132 @@
|
||||||
|
#! /bin/sh -x
|
||||||
|
#
|
||||||
|
# sample script on using the ingress capabilities
|
||||||
|
# This script fwmark tags(IPchains) based on metering on the ingress
|
||||||
|
# interface the result is used for fast classification and re-marking
|
||||||
|
# on the egress interface
|
||||||
|
# This is an example of a color blind mode marker with no PIR configured
|
||||||
|
# based on draft-wahjak-mcm-00.txt (section 3.1)
|
||||||
|
#
|
||||||
|
#path to various utilities;
|
||||||
|
#change to reflect yours.
|
||||||
|
#
|
||||||
|
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||||
|
TC=$IPROUTE/tc/tc
|
||||||
|
IP=$IPROUTE/ip/ip
|
||||||
|
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||||
|
INDEV=eth2
|
||||||
|
EGDEV="dev eth1"
|
||||||
|
CIR1=1500kbit
|
||||||
|
CIR2=1000kbit
|
||||||
|
|
||||||
|
#The CBS is about 60 MTU sized packets
|
||||||
|
CBS1=90k
|
||||||
|
CBS2=90k
|
||||||
|
|
||||||
|
meter1="police rate $CIR1 burst $CBS1 "
|
||||||
|
meter2="police rate $CIR1 burst $CBS2 "
|
||||||
|
meter3="police rate $CIR2 burst $CBS1 "
|
||||||
|
meter4="police rate $CIR2 burst $CBS2 "
|
||||||
|
meter5="police rate $CIR2 burst $CBS2 "
|
||||||
|
#
|
||||||
|
# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
|
||||||
|
# tag all incoming packets from any other subnet to fw tag 2
|
||||||
|
############################################################
|
||||||
|
$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
|
||||||
|
$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
|
||||||
|
#
|
||||||
|
############################################################
|
||||||
|
# install the ingress qdisc on the ingress interface
|
||||||
|
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||||
|
#
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
# All packets are marked with a tcindex value which is used on the egress
|
||||||
|
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||||
|
#
|
||||||
|
############################################################
|
||||||
|
#
|
||||||
|
# anything with fw tag of 1 is passed on with a tcindex value 1
|
||||||
|
#if it doesnt exceed its allocated rate (CIR/CBS)
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
|
||||||
|
$meter1 \
|
||||||
|
continue flowid 4:1
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||||
|
#tcindex value of 2
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
|
||||||
|
$meter2 \
|
||||||
|
continue flowid 4:2
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||||
|
# of 3
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
|
||||||
|
$meter3 \
|
||||||
|
drop flowid 4:3
|
||||||
|
#
|
||||||
|
# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it
|
||||||
|
# exceeds 1Mbps and by default goes to BE if it doesnt
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 2 fw \
|
||||||
|
$meter5 \
|
||||||
|
drop flowid 4:4
|
||||||
|
|
||||||
|
|
||||||
|
######################## Egress side ########################
|
||||||
|
|
||||||
|
|
||||||
|
# attach a dsmarker
|
||||||
|
#
|
||||||
|
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||||
|
#
|
||||||
|
# values of the DSCP to change depending on the class
|
||||||
|
#note that the ECN bits are masked out
|
||||||
|
#
|
||||||
|
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||||
|
#
|
||||||
|
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||||
|
value 0x88
|
||||||
|
#AF42
|
||||||
|
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||||
|
value 0x90
|
||||||
|
#AF43
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x98
|
||||||
|
#BE
|
||||||
|
$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
|
||||||
|
value 0x0
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# The class mapping (using tcindex; could easily have
|
||||||
|
# replaced it with the fw classifier instead)
|
||||||
|
#
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 1 tcindex classid 1:1
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 2 tcindex classid 1:2
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 3 tcindex classid 1:3
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 4 tcindex classid 1:4
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
echo "---- qdisc parameters Ingress ----------"
|
||||||
|
$TC qdisc ls dev $INDEV
|
||||||
|
echo "---- Class parameters Ingress ----------"
|
||||||
|
$TC class ls dev $INDEV
|
||||||
|
echo "---- filter parameters Ingress ----------"
|
||||||
|
$TC filter ls dev $INDEV parent ffff:
|
||||||
|
|
||||||
|
echo "---- qdisc parameters Egress ----------"
|
||||||
|
$TC qdisc ls $EGDEV
|
||||||
|
echo "---- Class parameters Egress ----------"
|
||||||
|
$TC class ls $EGDEV
|
||||||
|
echo "---- filter parameters Egress ----------"
|
||||||
|
$TC filter ls $EGDEV parent 1:0
|
||||||
|
#
|
||||||
|
#deleting the ingress qdisc
|
||||||
|
#$TC qdisc del $INDEV ingress
|
||||||
|
|
@ -0,0 +1,198 @@
|
||||||
|
#! /bin/sh -x
|
||||||
|
#
|
||||||
|
# sample script on using the ingress capabilities using u32 classifier
|
||||||
|
# This script tags tcindex based on metering on the ingress
|
||||||
|
# interface the result is used for fast classification and re-marking
|
||||||
|
# on the egress interface
|
||||||
|
# This is an example of a color aware mode marker with PIR configured
|
||||||
|
# based on draft-wahjak-mcm-00.txt (section 3.2)
|
||||||
|
#
|
||||||
|
# The colors are defined using the Diffserv Fields
|
||||||
|
#path to various utilities;
|
||||||
|
#change to reflect yours.
|
||||||
|
#
|
||||||
|
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||||
|
TC=$IPROUTE/tc/tc
|
||||||
|
IP=$IPROUTE/ip/ip
|
||||||
|
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||||
|
INDEV=eth2
|
||||||
|
EGDEV="dev eth1"
|
||||||
|
CIR1=1000kbit
|
||||||
|
CIR2=500kbit
|
||||||
|
# the PIR is what is in excess of the CIR
|
||||||
|
PIR1=1000kbit
|
||||||
|
PIR2=500kbit
|
||||||
|
|
||||||
|
#The CBS is about 60 MTU sized packets
|
||||||
|
CBS1=90k
|
||||||
|
CBS2=90k
|
||||||
|
#the EBS is about 20 max sized packets
|
||||||
|
EBS1=30k
|
||||||
|
EBS2=30k
|
||||||
|
|
||||||
|
# The meters: Note that we have shared meters in this case as identified
|
||||||
|
# by the index parameter
|
||||||
|
meter1=" police index 1 rate $CIR1 burst $CBS1 "
|
||||||
|
meter1a=" police index 2 rate $PIR1 burst $EBS1 "
|
||||||
|
meter2=" police index 3 rate $CIR2 burst $CBS1 "
|
||||||
|
meter2a=" police index 4 rate $PIR2 burst $EBS1 "
|
||||||
|
meter3=" police index 5 rate $CIR2 burst $CBS2 "
|
||||||
|
meter3a=" police index 6 rate $PIR2 burst $EBS2 "
|
||||||
|
meter4=" police index 7 rate $CIR1 burst $CBS2 "
|
||||||
|
|
||||||
|
############################################################
|
||||||
|
#
|
||||||
|
# install the ingress qdisc on the ingress interface
|
||||||
|
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||||
|
############################################################
|
||||||
|
#
|
||||||
|
# All packets are marked with a tcindex value which is used on the egress
|
||||||
|
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||||
|
#
|
||||||
|
# *********************** AF41 ***************************
|
||||||
|
#AF41 (DSCP 0x22) from is passed on with a tcindex value 1
|
||||||
|
#if it doesnt exceed its CIR/CBS + PIR/EBS
|
||||||
|
#policer 1 is used.
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
|
||||||
|
match ip tos 0x88 0xfc \
|
||||||
|
$meter1 \
|
||||||
|
continue flowid :1
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
|
||||||
|
match ip tos 0x88 0xfc \
|
||||||
|
$meter1a \
|
||||||
|
continue flowid :1
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||||
|
# tcindex value of 2
|
||||||
|
# policer 2 is used
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
|
||||||
|
match ip tos 0x88 0xfc \
|
||||||
|
$meter2 \
|
||||||
|
continue flowid :2
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
|
||||||
|
match ip tos 0x88 0xfc \
|
||||||
|
$meter2a \
|
||||||
|
continue flowid :2
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||||
|
# of 3 (policer 3)
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
|
||||||
|
match ip tos 0x88 0xfc \
|
||||||
|
$meter3 \
|
||||||
|
continue flowid :3
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||||
|
match ip tos 0x88 0xfc \
|
||||||
|
$meter3a \
|
||||||
|
drop flowid :3
|
||||||
|
#
|
||||||
|
# *********************** AF42 ***************************
|
||||||
|
#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
|
||||||
|
#if it doesnt exceed its CIR/CBS + PIR/EBS
|
||||||
|
#policer 2 is used. Note that this is shared with the AF41
|
||||||
|
#
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 8 u32 \
|
||||||
|
match ip tos 0x90 0xfc \
|
||||||
|
$meter2 \
|
||||||
|
continue flowid :2
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 9 u32 \
|
||||||
|
match ip tos 0x90 0xfc \
|
||||||
|
$meter2a \
|
||||||
|
continue flowid :2
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||||
|
# of 3 (policer 3)
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 10 u32 \
|
||||||
|
match ip tos 0x90 0xfc \
|
||||||
|
$meter3 \
|
||||||
|
continue flowid :3
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 11 u32 \
|
||||||
|
match ip tos 0x90 0xfc \
|
||||||
|
$meter3a \
|
||||||
|
drop flowid :3
|
||||||
|
|
||||||
|
#
|
||||||
|
# *********************** AF43 ***************************
|
||||||
|
#
|
||||||
|
#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
|
||||||
|
#if it doesnt exceed its CIR/CBS + PIR/EBS
|
||||||
|
#policer 3 is used. Note that this is shared with the AF41 and AF42
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 13 u32 \
|
||||||
|
match ip tos 0x98 0xfc \
|
||||||
|
$meter3 \
|
||||||
|
continue flowid :3
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 14 u32 \
|
||||||
|
match ip tos 0x98 0xfc \
|
||||||
|
$meter3a \
|
||||||
|
drop flowid :3
|
||||||
|
#
|
||||||
|
## *********************** BE ***************************
|
||||||
|
##
|
||||||
|
## Anything else (not from the AF4*) gets discarded if it
|
||||||
|
## exceeds 1Mbps and by default goes to BE if it doesnt
|
||||||
|
## Note that the BE class is also used by the AF4* in the worst
|
||||||
|
## case
|
||||||
|
##
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 16 u32 \
|
||||||
|
match ip src 0/0\
|
||||||
|
$meter4 \
|
||||||
|
drop flowid :4
|
||||||
|
|
||||||
|
######################## Egress side ########################
|
||||||
|
|
||||||
|
# attach a dsmarker
|
||||||
|
#
|
||||||
|
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||||
|
#
|
||||||
|
# values of the DSCP to change depending on the class
|
||||||
|
#note that the ECN bits are masked out
|
||||||
|
#
|
||||||
|
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||||
|
#
|
||||||
|
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||||
|
value 0x88
|
||||||
|
#AF42
|
||||||
|
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||||
|
value 0x90
|
||||||
|
#AF43
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x98
|
||||||
|
#BE
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x0
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# The class mapping
|
||||||
|
#
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 1 tcindex classid 1:1
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 2 tcindex classid 1:2
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 3 tcindex classid 1:3
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 4 tcindex classid 1:4
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
echo "---- qdisc parameters Ingress ----------"
|
||||||
|
$TC qdisc ls dev $INDEV
|
||||||
|
echo "---- Class parameters Ingress ----------"
|
||||||
|
$TC class ls dev $INDEV
|
||||||
|
echo "---- filter parameters Ingress ----------"
|
||||||
|
$TC filter ls dev $INDEV parent ffff:
|
||||||
|
|
||||||
|
echo "---- qdisc parameters Egress ----------"
|
||||||
|
$TC qdisc ls $EGDEV
|
||||||
|
echo "---- Class parameters Egress ----------"
|
||||||
|
$TC class ls $EGDEV
|
||||||
|
echo "---- filter parameters Egress ----------"
|
||||||
|
$TC filter ls $EGDEV parent 1:0
|
||||||
|
#
|
||||||
|
#deleting the ingress qdisc
|
||||||
|
#$TC qdisc del $INDEV ingress
|
||||||
|
|
@ -0,0 +1,144 @@
|
||||||
|
#! /bin/sh -x
|
||||||
|
#
|
||||||
|
# sample script on using the ingress capabilities
|
||||||
|
# This script fwmark tags(IPchains) based on metering on the ingress
|
||||||
|
# interface the result is used for fast classification and re-marking
|
||||||
|
# on the egress interface
|
||||||
|
# This is an example of a color blind mode marker with no PIR configured
|
||||||
|
# based on draft-wahjak-mcm-00.txt (section 3.1)
|
||||||
|
#
|
||||||
|
#path to various utilities;
|
||||||
|
#change to reflect yours.
|
||||||
|
#
|
||||||
|
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||||
|
TC=$IPROUTE/tc/tc
|
||||||
|
IP=$IPROUTE/ip/ip
|
||||||
|
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
|
||||||
|
INDEV=eth2
|
||||||
|
EGDEV="dev eth1"
|
||||||
|
CIR1=1500kbit
|
||||||
|
CIR2=500kbit
|
||||||
|
|
||||||
|
#The CBS is about 60 MTU sized packets
|
||||||
|
CBS1=90k
|
||||||
|
CBS2=90k
|
||||||
|
|
||||||
|
meter1="police rate $CIR1 burst $CBS1 "
|
||||||
|
meter1a="police rate $CIR2 burst $CBS1 "
|
||||||
|
meter2="police rate $CIR1 burst $CBS2 "
|
||||||
|
meter2a="police rate $CIR2 burst $CBS2 "
|
||||||
|
meter3="police rate $CIR2 burst $CBS1 "
|
||||||
|
meter3a="police rate $CIR2 burst $CBS1 "
|
||||||
|
meter4="police rate $CIR2 burst $CBS2 "
|
||||||
|
meter5="police rate $CIR1 burst $CBS2 "
|
||||||
|
#
|
||||||
|
# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
|
||||||
|
# tag all incoming packets from any other subnet to fw tag 2
|
||||||
|
############################################################
|
||||||
|
$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
|
||||||
|
$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
|
||||||
|
#
|
||||||
|
############################################################
|
||||||
|
# install the ingress qdisc on the ingress interface
|
||||||
|
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||||
|
#
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
# All packets are marked with a tcindex value which is used on the egress
|
||||||
|
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||||
|
#
|
||||||
|
############################################################
|
||||||
|
#
|
||||||
|
# anything with fw tag of 1 is passed on with a tcindex value 1
|
||||||
|
#if it doesnt exceed its allocated rate (CIR/CBS)
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 1 handle 1 fw \
|
||||||
|
$meter1 \
|
||||||
|
continue flowid 4:1
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 2 handle 1 fw \
|
||||||
|
$meter1a \
|
||||||
|
continue flowid 4:1
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||||
|
#tcindex value of 2
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 3 handle 1 fw \
|
||||||
|
$meter2 \
|
||||||
|
continue flowid 4:2
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
|
||||||
|
$meter2a \
|
||||||
|
continue flowid 4:2
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||||
|
# of 3
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
|
||||||
|
$meter3 \
|
||||||
|
continue flowid 4:3
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
|
||||||
|
$meter3a \
|
||||||
|
drop flowid 4:3
|
||||||
|
#
|
||||||
|
# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it
|
||||||
|
# exceeds 1Mbps and by default goes to BE if it doesnt
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 7 handle 2 fw \
|
||||||
|
$meter5 \
|
||||||
|
drop flowid 4:4
|
||||||
|
|
||||||
|
|
||||||
|
######################## Egress side ########################
|
||||||
|
|
||||||
|
|
||||||
|
# attach a dsmarker
|
||||||
|
#
|
||||||
|
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||||
|
#
|
||||||
|
# values of the DSCP to change depending on the class
|
||||||
|
#note that the ECN bits are masked out
|
||||||
|
#
|
||||||
|
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||||
|
#
|
||||||
|
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||||
|
value 0x88
|
||||||
|
#AF42
|
||||||
|
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||||
|
value 0x90
|
||||||
|
#AF43
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x98
|
||||||
|
#BE
|
||||||
|
$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
|
||||||
|
value 0x0
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# The class mapping (using tcindex; could easily have
|
||||||
|
# replaced it with the fw classifier instead)
|
||||||
|
#
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 1 tcindex classid 1:1
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 2 tcindex classid 1:2
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 3 tcindex classid 1:3
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 4 tcindex classid 1:4
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
echo "---- qdisc parameters Ingress ----------"
|
||||||
|
$TC qdisc ls dev $INDEV
|
||||||
|
echo "---- Class parameters Ingress ----------"
|
||||||
|
$TC class ls dev $INDEV
|
||||||
|
echo "---- filter parameters Ingress ----------"
|
||||||
|
$TC filter ls dev $INDEV parent ffff:
|
||||||
|
|
||||||
|
echo "---- qdisc parameters Egress ----------"
|
||||||
|
$TC qdisc ls $EGDEV
|
||||||
|
echo "---- Class parameters Egress ----------"
|
||||||
|
$TC class ls $EGDEV
|
||||||
|
echo "---- filter parameters Egress ----------"
|
||||||
|
$TC filter ls $EGDEV parent 1:0
|
||||||
|
#
|
||||||
|
#deleting the ingress qdisc
|
||||||
|
#$TC qdisc del $INDEV ingress
|
||||||
|
|
@ -0,0 +1,145 @@
|
||||||
|
#! /bin/sh
|
||||||
|
#
|
||||||
|
# sample script on using the ingress capabilities using u32 classifier
|
||||||
|
# This script tags tcindex based on metering on the ingress
|
||||||
|
# interface the result is used for fast classification and re-marking
|
||||||
|
# on the egress interface
|
||||||
|
# This is an example of a color blind mode marker with PIR configured
|
||||||
|
# based on draft-wahjak-mcm-00.txt (section 3.2)
|
||||||
|
#
|
||||||
|
#path to various utilities;
|
||||||
|
#change to reflect yours.
|
||||||
|
#
|
||||||
|
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
|
||||||
|
TC=$IPROUTE/tc/tc
|
||||||
|
IP=$IPROUTE/ip/ip
|
||||||
|
INDEV=eth2
|
||||||
|
EGDEV="dev eth1"
|
||||||
|
CIR1=1000kbit
|
||||||
|
CIR2=1000kbit
|
||||||
|
# The PIR is the excess (in addition to the CIR i.e if always
|
||||||
|
# going to the PIR --> average rate is CIR+PIR)
|
||||||
|
PIR1=1000kbit
|
||||||
|
PIR2=500kbit
|
||||||
|
|
||||||
|
#The CBS is about 60 MTU sized packets
|
||||||
|
CBS1=90k
|
||||||
|
CBS2=90k
|
||||||
|
#the EBS is about 10 max sized packets
|
||||||
|
EBS1=15k
|
||||||
|
EBS2=15k
|
||||||
|
# The meters
|
||||||
|
meter1=" police rate $CIR1 burst $CBS1 "
|
||||||
|
meter1a=" police rate $PIR1 burst $EBS1 "
|
||||||
|
meter2=" police rate $CIR2 burst $CBS1 "
|
||||||
|
meter2a="police rate $PIR2 burst $CBS1 "
|
||||||
|
meter3=" police rate $CIR2 burst $CBS2 "
|
||||||
|
meter3a=" police rate $PIR2 burst $EBS2 "
|
||||||
|
meter4=" police rate $CIR1 burst $CBS2 "
|
||||||
|
meter5=" police rate $CIR1 burst $CBS2 "
|
||||||
|
|
||||||
|
|
||||||
|
# install the ingress qdisc on the ingress interface
|
||||||
|
############################################################
|
||||||
|
$TC qdisc add dev $INDEV handle ffff: ingress
|
||||||
|
############################################################
|
||||||
|
#
|
||||||
|
############################################################
|
||||||
|
|
||||||
|
# All packets are marked with a tcindex value which is used on the egress
|
||||||
|
# NOTE: tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
|
||||||
|
#
|
||||||
|
#anything from subnet 10.2.0.2/24 is passed on with a tcindex value 1
|
||||||
|
#if it doesnt exceed its CIR/CBS + PIR/EBS
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
|
||||||
|
match ip src 10.2.0.0/24 $meter1 \
|
||||||
|
continue flowid :1
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
|
||||||
|
match ip src 10.2.0.0/24 $meter1a \
|
||||||
|
continue flowid :1
|
||||||
|
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the extra rate/burst below, it gets a
|
||||||
|
#tcindex value of 2
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
|
||||||
|
match ip src 10.2.0.0/24 $meter2 \
|
||||||
|
continue flowid :2
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
|
||||||
|
match ip src 10.2.0.0/24 $meter2a \
|
||||||
|
continue flowid :2
|
||||||
|
#
|
||||||
|
# if it exceeds the above but not the rule below, it gets a tcindex value
|
||||||
|
# of 3
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
|
||||||
|
match ip src 10.2.0.0/24 $meter3 \
|
||||||
|
continue flowid :3
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
|
||||||
|
match ip src 10.2.0.0/24 $meter3a \
|
||||||
|
drop flowid :3
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it
|
||||||
|
# exceeds 1Mbps and by default goes to BE if it doesnt
|
||||||
|
#
|
||||||
|
$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
|
||||||
|
match ip src 0/0 $meter5 \
|
||||||
|
drop flowid :4
|
||||||
|
|
||||||
|
|
||||||
|
######################## Egress side ########################
|
||||||
|
|
||||||
|
|
||||||
|
# attach a dsmarker
|
||||||
|
#
|
||||||
|
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
|
||||||
|
#
|
||||||
|
# values of the DSCP to change depending on the class
|
||||||
|
#note that the ECN bits are masked out
|
||||||
|
#
|
||||||
|
#AF41 (0x88 is 0x22 shifted to the right by two bits)
|
||||||
|
#
|
||||||
|
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
|
||||||
|
value 0x88
|
||||||
|
#AF42
|
||||||
|
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
|
||||||
|
value 0x90
|
||||||
|
#AF43
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x98
|
||||||
|
#BE
|
||||||
|
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
|
||||||
|
value 0x0
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# The class mapping
|
||||||
|
#
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 1 tcindex classid 1:1
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 2 tcindex classid 1:2
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 3 tcindex classid 1:3
|
||||||
|
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
|
||||||
|
handle 4 tcindex classid 1:4
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
echo "---- qdisc parameters Ingress ----------"
|
||||||
|
$TC qdisc ls dev $INDEV
|
||||||
|
echo "---- Class parameters Ingress ----------"
|
||||||
|
$TC class ls dev $INDEV
|
||||||
|
echo "---- filter parameters Ingress ----------"
|
||||||
|
$TC filter ls dev $INDEV parent ffff:
|
||||||
|
|
||||||
|
echo "---- qdisc parameters Egress ----------"
|
||||||
|
$TC qdisc ls $EGDEV
|
||||||
|
echo "---- Class parameters Egress ----------"
|
||||||
|
$TC class ls $EGDEV
|
||||||
|
echo "---- filter parameters Egress ----------"
|
||||||
|
$TC filter ls $EGDEV parent 1:0
|
||||||
|
#
|
||||||
|
#deleting the ingress qdisc
|
||||||
|
#$TC qdisc del $INDEV ingress
|
||||||
|
|
@ -0,0 +1,98 @@
|
||||||
|
|
||||||
|
Note all these are mere examples which can be customized to your needs
|
||||||
|
|
||||||
|
AFCBQ
|
||||||
|
-----
|
||||||
|
AF PHB built using CBQ, DSMARK,GRED (default in GRIO mode) ,RED for BE
|
||||||
|
and the tcindex classifier with some algorithmic mapping
|
||||||
|
|
||||||
|
EFCBQ
|
||||||
|
-----
|
||||||
|
EF PHB built using CBQ (for rate control and prioritization),
|
||||||
|
DSMARK( to remark DSCPs), tcindex classifier and RED for the BE
|
||||||
|
traffic.
|
||||||
|
|
||||||
|
EFPRIO
|
||||||
|
------
|
||||||
|
EF PHB using the PRIO scheduler, Token Bucket to rate control EF,
|
||||||
|
tcindex classifier, DSMARK to remark, and RED for the BE traffic
|
||||||
|
|
||||||
|
EDGE scripts
|
||||||
|
==============
|
||||||
|
|
||||||
|
CB-3(1|2)-(u32/chains)
|
||||||
|
======================
|
||||||
|
|
||||||
|
|
||||||
|
The major differences are that the classifier is u32 on -u32 extension
|
||||||
|
and IPchains on the chains extension. CB stands for color Blind
|
||||||
|
and 31 is for the mode where only a CIR and CBS are defined whereas
|
||||||
|
32 stands for a mode where a CIR/CBS + PIR/EBS are defined.
|
||||||
|
|
||||||
|
Color Blind (CB)
|
||||||
|
==========-----=
|
||||||
|
We look at one special subnet that we are interested in for simplicty
|
||||||
|
reasons to demonstrate the capability. We send the packets from that
|
||||||
|
subnet to AF4*, BE or end up dropping depending on the metering results.
|
||||||
|
|
||||||
|
|
||||||
|
The algorithm overview is as follows:
|
||||||
|
|
||||||
|
*classify:
|
||||||
|
|
||||||
|
**case: subnet X
|
||||||
|
----------------
|
||||||
|
if !exceed meter1 tag as AF41
|
||||||
|
else
|
||||||
|
if !exceed meter2 tag as AF42
|
||||||
|
else
|
||||||
|
if !exceed meter 3 tag as AF43
|
||||||
|
else
|
||||||
|
drop
|
||||||
|
|
||||||
|
default case: Any other subnet
|
||||||
|
-------------------------------
|
||||||
|
if !exceed meter 5 tag as AF43
|
||||||
|
else
|
||||||
|
drop
|
||||||
|
|
||||||
|
|
||||||
|
One Egress side change the DSCPs of the packets to reflect AF4* and BE
|
||||||
|
based on the tags from the ingress.
|
||||||
|
|
||||||
|
-------------------------------------------------------------
|
||||||
|
|
||||||
|
Color Aware
|
||||||
|
===========
|
||||||
|
|
||||||
|
Define some meters with + policing and give them IDs eg
|
||||||
|
|
||||||
|
meter1=police index 1 rate $CIR1 burst $CBS1
|
||||||
|
meter2=police index 2 rate $CIR2 burst $CBS2 etc
|
||||||
|
|
||||||
|
General overview:
|
||||||
|
classify based on the DSCPs and use the policer ids to decide tagging
|
||||||
|
|
||||||
|
|
||||||
|
*classify on ingress:
|
||||||
|
|
||||||
|
switch (dscp) {
|
||||||
|
case AF41: /* tos&0xfc == 0x88 */
|
||||||
|
if (!exceed meter1) break;
|
||||||
|
case AF42: /* tos&0xfc == 0x90 */
|
||||||
|
if (!exceed meter2) {
|
||||||
|
tag as AF42;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AF43: /* tos&0xfc == 0x98 */
|
||||||
|
if (!exceed meter3) {
|
||||||
|
tag as AF43;
|
||||||
|
break;
|
||||||
|
} else
|
||||||
|
drop;
|
||||||
|
default:
|
||||||
|
if (!exceed meter4) tag as BE;
|
||||||
|
else drop;
|
||||||
|
}
|
||||||
|
|
||||||
|
On the Egress side mark the proper AF tags
|
||||||
|
|
@ -0,0 +1,105 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# AF using CBQ for a single interface eth0
|
||||||
|
# 4 AF classes using GRED and one BE using RED
|
||||||
|
# Things you might want to change:
|
||||||
|
# - the device bandwidth (set at 10Mbits)
|
||||||
|
# - the bandwidth allocated for each AF class and the BE class
|
||||||
|
# - the drop probability associated with each AF virtual queue
|
||||||
|
#
|
||||||
|
# AF DSCP values used (based on AF draft 04)
|
||||||
|
# -----------------------------------------
|
||||||
|
# AF DSCP values
|
||||||
|
# AF1 1. 0x0a 2. 0x0c 3. 0x0e
|
||||||
|
# AF2 1. 0x12 2. 0x14 3. 0x16
|
||||||
|
# AF3 1. 0x1a 2. 0x1c 3. 0x1e
|
||||||
|
# AF4 1. 0x22 2. 0x24 3. 0x26
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# A simple DSCP-class relationship formula used to generate
|
||||||
|
# values in the for loop of this script; $drop stands for the
|
||||||
|
# DP
|
||||||
|
# $dscp = ($class*8+$drop*2)
|
||||||
|
#
|
||||||
|
# if you use GRIO buffer sharing, then GRED priority is set as follows:
|
||||||
|
# $gprio=$drop+1;
|
||||||
|
#
|
||||||
|
|
||||||
|
$TC = "/usr/src/iproute2-current/tc/tc";
|
||||||
|
$DEV = "dev lo";
|
||||||
|
$DEV = "dev eth1";
|
||||||
|
$DEV = "dev eth0";
|
||||||
|
# the BE-class number
|
||||||
|
$beclass = "5";
|
||||||
|
|
||||||
|
#GRIO buffer sharing on or off?
|
||||||
|
$GRIO = "";
|
||||||
|
$GRIO = "grio";
|
||||||
|
# The bandwidth of your device
|
||||||
|
$linerate="10Mbit";
|
||||||
|
# The BE and AF rates
|
||||||
|
%rate_table=();
|
||||||
|
$berate="1500Kbit";
|
||||||
|
$rate_table{"AF1rate"}="1500Kbit";
|
||||||
|
$rate_table{"AF2rate"}="1500Kbit";
|
||||||
|
$rate_table{"AF3rate"}="1500Kbit";
|
||||||
|
$rate_table{"AF4rate"}="1500Kbit";
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
print "\n# --- General setup ---\n";
|
||||||
|
print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
|
||||||
|
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex mask 0xfc " .
|
||||||
|
"shift 2 pass_on\n";
|
||||||
|
#"shift 2\n";
|
||||||
|
print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth $linerate ".
|
||||||
|
"cell 8 avpkt 1000 mpu 64\n";
|
||||||
|
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 tcindex ".
|
||||||
|
"mask 0xf0 shift 4 pass_on\n";
|
||||||
|
for $class (1..4) {
|
||||||
|
print "\n# --- AF Class $class specific setup---\n";
|
||||||
|
$AFrate=sprintf("AF%drate",$class);
|
||||||
|
print "$TC class add $DEV parent 2:0 classid 2:$class cbq ".
|
||||||
|
"bandwidth $linerate rate $rate_table{$AFrate} avpkt 1000 prio ".
|
||||||
|
(6-$class)." bounded allot 1514 weight 1 maxburst 21\n";
|
||||||
|
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle $class ".
|
||||||
|
"tcindex classid 2:$class\n";
|
||||||
|
print "$TC qdisc add $DEV parent 2:$class gred setup DPs 3 default 2 ".
|
||||||
|
"$GRIO\n";
|
||||||
|
#
|
||||||
|
# per DP setup
|
||||||
|
#
|
||||||
|
for $drop (1..3) {
|
||||||
|
print "\n# --- AF Class $class DP $drop---\n";
|
||||||
|
$dscp = $class*8+$drop*2;
|
||||||
|
$tcindex = sprintf("1%x%x",$class,$drop);
|
||||||
|
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 ".
|
||||||
|
"handle $dscp tcindex classid 1:$tcindex\n";
|
||||||
|
$prob = $drop*0.02;
|
||||||
|
if ($GRIO) {
|
||||||
|
$gprio = $drop+1;
|
||||||
|
print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
|
||||||
|
"max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
|
||||||
|
"probability $prob ".
|
||||||
|
"prio $gprio\n";
|
||||||
|
} else {
|
||||||
|
print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
|
||||||
|
"max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
|
||||||
|
"probability $prob \n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#
|
||||||
|
#
|
||||||
|
print "\n#------BE Queue setup------\n";
|
||||||
|
print "$TC filter add $DEV parent 1:0 protocol ip prio 2 ".
|
||||||
|
"handle 0 tcindex mask 0 classid 1:1\n";
|
||||||
|
print "$TC class add $DEV parent 2:0 classid 2:$beclass cbq ".
|
||||||
|
"bandwidth $linerate rate $berate avpkt 1000 prio 6 " .
|
||||||
|
"bounded allot 1514 weight 1 maxburst 21 \n";
|
||||||
|
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle 0 tcindex ".
|
||||||
|
"classid 2:5\n";
|
||||||
|
print "$TC qdisc add $DEV parent 2:5 red limit 60KB min 15KB max 45KB ".
|
||||||
|
"burst 20 avpkt 1000 bandwidth $linerate probability 0.4\n";
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
|
||||||
|
$DEV = "dev eth1";
|
||||||
|
$efrate="1.5Mbit";
|
||||||
|
$MTU="1.5kB";
|
||||||
|
print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
|
||||||
|
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
|
||||||
|
"mask 0xfc shift 2\n";
|
||||||
|
print "$TC qdisc add $DEV parent 1:0 handle 2:0 prio\n";
|
||||||
|
#
|
||||||
|
# EF class: Maximum about one MTU sized packet allowed on the queue
|
||||||
|
#
|
||||||
|
print "$TC qdisc add $DEV parent 2:1 tbf rate $efrate burst $MTU limit 1.6kB\n";
|
||||||
|
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
|
||||||
|
"handle 0x2e tcindex classid 2:1 pass_on\n";
|
||||||
|
#
|
||||||
|
# BE class
|
||||||
|
#
|
||||||
|
print "#BE class(2:2) \n";
|
||||||
|
print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
|
||||||
|
"min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
|
||||||
|
"probability 0.4\n";
|
||||||
|
#
|
||||||
|
print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
|
||||||
|
"handle 0 tcindex mask 0 classid 2:2 pass_on\n";
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
#
|
||||||
|
$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
|
||||||
|
$DEV = "dev eth1";
|
||||||
|
print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
|
||||||
|
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
|
||||||
|
"mask 0xfc shift 2\n";
|
||||||
|
print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth ".
|
||||||
|
"10Mbit cell 8 avpkt 1000 mpu 64\n";
|
||||||
|
#
|
||||||
|
# EF class
|
||||||
|
#
|
||||||
|
print "$TC class add $DEV parent 2:0 classid 2:1 cbq bandwidth ".
|
||||||
|
"10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated ".
|
||||||
|
"allot 1514 weight 1 maxburst 10 \n";
|
||||||
|
# packet fifo for EF?
|
||||||
|
print "$TC qdisc add $DEV parent 2:1 pfifo limit 5\n";
|
||||||
|
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
|
||||||
|
"handle 0x2e tcindex classid 2:1 pass_on\n";
|
||||||
|
#
|
||||||
|
# BE class
|
||||||
|
#
|
||||||
|
print "#BE class(2:2) \n";
|
||||||
|
print "$TC class add $DEV parent 2:0 classid 2:2 cbq bandwidth ".
|
||||||
|
"10Mbit rate 5Mbit avpkt 1000 prio 7 allot 1514 weight 1 ".
|
||||||
|
"maxburst 21 borrow split 2:0 defmap 0xffff \n";
|
||||||
|
print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
|
||||||
|
"min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
|
||||||
|
"probability 0.4\n";
|
||||||
|
print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
|
||||||
|
"handle 0 tcindex mask 0 classid 2:2 pass_on\n";
|
||||||
|
|
@ -0,0 +1,125 @@
|
||||||
|
|
||||||
|
These were the tests done to validate the Diffserv scripts.
|
||||||
|
This document will be updated continously. If you do more
|
||||||
|
thorough validation testing please post the details to the
|
||||||
|
diffserv mailing list.
|
||||||
|
Nevertheless, these tests should serve for basic validation.
|
||||||
|
|
||||||
|
AFCBQ, EFCBQ, EFPRIO
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
generate all possible DSCPs and observe that they
|
||||||
|
get sent to the proper classes. In the case of AF also
|
||||||
|
to the correct Virtual Queues.
|
||||||
|
|
||||||
|
Edge1
|
||||||
|
-----
|
||||||
|
generate TOS values 0x0,0x10,0xbb each with IP addresses
|
||||||
|
10.2.0.24 (mark 1), 10.2.0.3 (mark2) and 10.2.0.30 (mark 3)
|
||||||
|
and observe that they get marked as expected.
|
||||||
|
|
||||||
|
Edge2
|
||||||
|
-----
|
||||||
|
|
||||||
|
-Repeat the tests in Edge1
|
||||||
|
-ftp with data direction from 10.2.0.2
|
||||||
|
*observe that the metering/policing works correctly (and the marking
|
||||||
|
as well). In this case the mark used will be 3
|
||||||
|
|
||||||
|
Edge31-cb-chains
|
||||||
|
----------------
|
||||||
|
|
||||||
|
-ftp with data direction from 10.2.0.2
|
||||||
|
|
||||||
|
*observe that the metering/policing works correctly (and the marking
|
||||||
|
as well). In this case the mark used will be 1.
|
||||||
|
|
||||||
|
Metering: The data throughput should not exceed 2*CIR1 + 2*CIR2
|
||||||
|
which is roughly: 5mbps
|
||||||
|
|
||||||
|
Marking: the should be a variation of marked packets:
|
||||||
|
AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
|
||||||
|
|
||||||
|
More tests required to see the interaction of several sources (other
|
||||||
|
than subnet 10.2.0.0/24).
|
||||||
|
|
||||||
|
Edge31-ca-u32
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the
|
||||||
|
discard port of 10.1.0.2 (behind eth1)
|
||||||
|
|
||||||
|
1) generate with src tos = 0x88
|
||||||
|
Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
|
||||||
|
approximately 5mbps
|
||||||
|
Marking: Should vary between 0x88,0x90,0x98 and 0x0
|
||||||
|
|
||||||
|
2) generate with src tos = 0x90
|
||||||
|
Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
|
||||||
|
approximately 3.5mbps
|
||||||
|
Marking: Should vary between 0x90,0x98 and 0x0
|
||||||
|
|
||||||
|
3) generate with src tos = 0x98
|
||||||
|
Metering: Allocated throughput should not exceed CIR1 + CIR2
|
||||||
|
approximately 2.5mbps
|
||||||
|
Marking: Should vary between 0x98 and 0x0
|
||||||
|
|
||||||
|
4) generate with src tos any other than the above
|
||||||
|
Metering: Allocated throughput should not exceed CIR1
|
||||||
|
approximately 1.5mbps
|
||||||
|
Marking: Should be consistent at 0x0
|
||||||
|
|
||||||
|
TODO: Testing on how each color shares when all 4 types of packets
|
||||||
|
are going through the edge device
|
||||||
|
|
||||||
|
Edge32-cb-u32, Edge32-cb-chains
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
-ftp with data direction from 10.2.0.2
|
||||||
|
|
||||||
|
*observe that the metering/policing works correctly (and the marking
|
||||||
|
as well).
|
||||||
|
|
||||||
|
Metering:
|
||||||
|
The data throughput should not exceed 2*CIR1 + 2*CIR2
|
||||||
|
+ 2*PIR2 + PIR1 for u32 which is roughly: 6mbps
|
||||||
|
The data throughput should not exceed 2*CIR1 + 5*CIR2
|
||||||
|
for chains which is roughly: 6mbps
|
||||||
|
|
||||||
|
Marking: the should be a variation of marked packets:
|
||||||
|
AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
|
||||||
|
|
||||||
|
TODO:
|
||||||
|
-More tests required to see the interaction of several sources (other
|
||||||
|
than subnet 10.2.0.0/24).
|
||||||
|
-More tests needed to capture stats on how many times the CIR was exceeded
|
||||||
|
but the data was not remarked etc.
|
||||||
|
|
||||||
|
Edge32-ca-u32
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the
|
||||||
|
discard port of 10.1.0.2 (behind eth1)
|
||||||
|
|
||||||
|
1) generate with src tos = 0x88
|
||||||
|
Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
|
||||||
|
+PIR1 -- approximately 4mbps
|
||||||
|
Marking: Should vary between 0x88,0x90,0x98 and 0x0
|
||||||
|
|
||||||
|
2) generate with src tos = 0x90
|
||||||
|
Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
|
||||||
|
+ 2* PIR2 approximately 3mbps
|
||||||
|
Marking: Should vary between 0x90,0x98 and 0x0
|
||||||
|
|
||||||
|
3) generate with src tos = 0x98
|
||||||
|
Metering: Allocated throughput should not exceed PIR1+ CIR1 + CIR2
|
||||||
|
approximately 2.5mbps
|
||||||
|
Marking: Should vary between 0x98 and 0x0
|
||||||
|
|
||||||
|
4) generate with src tos any other than the above
|
||||||
|
Metering: Allocated throughput should not exceed CIR1
|
||||||
|
approximately 1mbps
|
||||||
|
Marking: Should be consistent at 0x0
|
||||||
|
|
||||||
|
TODO: Testing on how each color shares when all 4 types of packets
|
||||||
|
are going through the edge device
|
||||||
|
|
@ -0,0 +1,134 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
#
|
||||||
|
# Setup address label from /etc/gai.conf
|
||||||
|
#
|
||||||
|
# Written by YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>, 2010.
|
||||||
|
#
|
||||||
|
|
||||||
|
IP=ip
|
||||||
|
DEFAULT_GAICONF=/etc/gai.conf
|
||||||
|
verbose=
|
||||||
|
debug=
|
||||||
|
|
||||||
|
function run ()
|
||||||
|
{
|
||||||
|
if [ x"$verbose" != x"" ]; then
|
||||||
|
echo "$@"
|
||||||
|
fi
|
||||||
|
if [ x"$debug" = x"" ]; then
|
||||||
|
"$@"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function do_load_config ()
|
||||||
|
{
|
||||||
|
file=$1; shift
|
||||||
|
flush=1
|
||||||
|
cat $file | while read command prefix label; do
|
||||||
|
if [ x"$command" = x"#label" ]; then
|
||||||
|
if [ ${flush} = 1 ]; then
|
||||||
|
run ${IP} -6 addrlabel flush
|
||||||
|
flush=0
|
||||||
|
fi
|
||||||
|
run ${IP} -6 addrlabel add prefix $prefix label $label
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function do_list_config ()
|
||||||
|
{
|
||||||
|
${IP} -6 addrlabel list | while read p pfx l lbl; do
|
||||||
|
echo label ${pfx} ${lbl}
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function help ()
|
||||||
|
{
|
||||||
|
echo "Usage: $0 [-v] {--list | --config [ ${DEFAULT_GAICONF} ] | --default}"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
TEMP=`getopt -o c::dlv -l config::,default,list,verbose -n gaiconf -- "$@"`
|
||||||
|
|
||||||
|
if [ $? != 0 ]; then
|
||||||
|
echo "Terminating..." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
TEMPFILE=`mktemp`
|
||||||
|
|
||||||
|
eval set -- "$TEMP"
|
||||||
|
|
||||||
|
while true ; do
|
||||||
|
case "$1" in
|
||||||
|
-c|--config)
|
||||||
|
if [ x"$cmd" != x"" ]; then
|
||||||
|
help
|
||||||
|
fi
|
||||||
|
case "$2" in
|
||||||
|
"") gai_conf="${DEFAULT_GAICONF}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*) gai_conf="$2"
|
||||||
|
shift 2
|
||||||
|
esac
|
||||||
|
cmd=config
|
||||||
|
;;
|
||||||
|
-d|--default)
|
||||||
|
if [ x"$cmd" != x"" ]; then
|
||||||
|
help
|
||||||
|
fi
|
||||||
|
gai_conf=${TEMPFILE}
|
||||||
|
cmd=config
|
||||||
|
;;
|
||||||
|
-l|--list)
|
||||||
|
if [ x"$cmd" != x"" ]; then
|
||||||
|
help
|
||||||
|
fi
|
||||||
|
cmd=list
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-v)
|
||||||
|
verbose=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--)
|
||||||
|
shift;
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Internal error!" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
case "$cmd" in
|
||||||
|
config)
|
||||||
|
if [ x"$gai_conf" = x"${TEMPFILE}" ]; then
|
||||||
|
sed -e 's/^[[:space:]]*//' <<END_OF_DEFAULT >${TEMPFILE}
|
||||||
|
label ::1/128 0
|
||||||
|
label ::/0 1
|
||||||
|
label 2002::/16 2
|
||||||
|
label ::/96 3
|
||||||
|
label ::ffff:0:0/96 4
|
||||||
|
label fec0::/10 5
|
||||||
|
label fc00::/7 6
|
||||||
|
label 2001:0::/32 7
|
||||||
|
END_OF_DEFAULT
|
||||||
|
fi
|
||||||
|
do_load_config "$gai_conf"
|
||||||
|
;;
|
||||||
|
list)
|
||||||
|
do_list_config
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
help
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
rm -f "${TEMPFILE}"
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0
|
|
||||||
GENLOBJ=genl.o
|
GENLOBJ=genl.o
|
||||||
|
|
||||||
include ../config.mk
|
include ../Config
|
||||||
SHARED_LIBS ?= y
|
SHARED_LIBS ?= y
|
||||||
|
|
||||||
CFLAGS += -fno-strict-aliasing
|
CFLAGS += -fno-strict-aliasing
|
||||||
|
|
|
||||||
136
genl/ctrl.c
136
genl/ctrl.c
|
|
@ -13,6 +13,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <syslog.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
|
|
@ -28,18 +29,84 @@
|
||||||
static int usage(void)
|
static int usage(void)
|
||||||
{
|
{
|
||||||
fprintf(stderr,"Usage: ctrl <CMD>\n" \
|
fprintf(stderr,"Usage: ctrl <CMD>\n" \
|
||||||
"CMD := get <PARMS> | list | monitor | policy <PARMS>\n" \
|
"CMD := get <PARMS> | list | monitor\n" \
|
||||||
"PARMS := name <name> | id <id>\n" \
|
"PARMS := name <name> | id <id>\n" \
|
||||||
"Examples:\n" \
|
"Examples:\n" \
|
||||||
"\tctrl ls\n" \
|
"\tctrl ls\n" \
|
||||||
"\tctrl monitor\n" \
|
"\tctrl monitor\n" \
|
||||||
"\tctrl get name foobar\n" \
|
"\tctrl get name foobar\n" \
|
||||||
"\tctrl get id 0xF\n"
|
"\tctrl get id 0xF\n");
|
||||||
"\tctrl policy name foobar\n"
|
|
||||||
"\tctrl policy id 0xF\n");
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int genl_ctrl_resolve_family(const char *family)
|
||||||
|
{
|
||||||
|
struct rtnl_handle rth;
|
||||||
|
int ret = 0;
|
||||||
|
struct {
|
||||||
|
struct nlmsghdr n;
|
||||||
|
struct genlmsghdr g;
|
||||||
|
char buf[4096];
|
||||||
|
} req = {
|
||||||
|
.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN),
|
||||||
|
.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
|
||||||
|
.n.nlmsg_type = GENL_ID_CTRL,
|
||||||
|
.g.cmd = CTRL_CMD_GETFAMILY,
|
||||||
|
};
|
||||||
|
struct nlmsghdr *nlh = &req.n;
|
||||||
|
struct genlmsghdr *ghdr = &req.g;
|
||||||
|
|
||||||
|
if (rtnl_open_byproto(&rth, 0, NETLINK_GENERIC) < 0) {
|
||||||
|
fprintf(stderr, "Cannot open generic netlink socket\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
addattr_l(nlh, 128, CTRL_ATTR_FAMILY_NAME, family, strlen(family) + 1);
|
||||||
|
|
||||||
|
if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) {
|
||||||
|
fprintf(stderr, "Error talking to the kernel\n");
|
||||||
|
goto errout;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
struct rtattr *tb[CTRL_ATTR_MAX + 1];
|
||||||
|
int len = nlh->nlmsg_len;
|
||||||
|
struct rtattr *attrs;
|
||||||
|
|
||||||
|
if (nlh->nlmsg_type != GENL_ID_CTRL) {
|
||||||
|
fprintf(stderr, "Not a controller message, nlmsg_len=%d "
|
||||||
|
"nlmsg_type=0x%x\n", nlh->nlmsg_len, nlh->nlmsg_type);
|
||||||
|
goto errout;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ghdr->cmd != CTRL_CMD_NEWFAMILY) {
|
||||||
|
fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd);
|
||||||
|
goto errout;
|
||||||
|
}
|
||||||
|
|
||||||
|
len -= NLMSG_LENGTH(GENL_HDRLEN);
|
||||||
|
|
||||||
|
if (len < 0) {
|
||||||
|
fprintf(stderr, "wrong controller message len %d\n", len);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
|
||||||
|
parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len);
|
||||||
|
|
||||||
|
if (tb[CTRL_ATTR_FAMILY_ID] == NULL) {
|
||||||
|
fprintf(stderr, "Missing family id TLV\n");
|
||||||
|
goto errout;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = rta_getattr_u16(tb[CTRL_ATTR_FAMILY_ID]);
|
||||||
|
}
|
||||||
|
|
||||||
|
errout:
|
||||||
|
rtnl_close(&rth);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static void print_ctrl_cmd_flags(FILE *fp, __u32 fl)
|
static void print_ctrl_cmd_flags(FILE *fp, __u32 fl)
|
||||||
{
|
{
|
||||||
fprintf(fp, "\n\t\tCapabilities (0x%x):\n ", fl);
|
fprintf(fp, "\n\t\tCapabilities (0x%x):\n ", fl);
|
||||||
|
|
@ -105,7 +172,8 @@ static int print_ctrl_grp(FILE *fp, struct rtattr *arg, __u32 ctrl_ver)
|
||||||
/*
|
/*
|
||||||
* The controller sends one nlmsg per family
|
* The controller sends one nlmsg per family
|
||||||
*/
|
*/
|
||||||
static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
static int print_ctrl(const struct sockaddr_nl *who,
|
||||||
|
struct rtnl_ctrl_data *ctrl,
|
||||||
struct nlmsghdr *n, void *arg)
|
struct nlmsghdr *n, void *arg)
|
||||||
{
|
{
|
||||||
struct rtattr *tb[CTRL_ATTR_MAX + 1];
|
struct rtattr *tb[CTRL_ATTR_MAX + 1];
|
||||||
|
|
@ -125,8 +193,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
||||||
ghdr->cmd != CTRL_CMD_DELFAMILY &&
|
ghdr->cmd != CTRL_CMD_DELFAMILY &&
|
||||||
ghdr->cmd != CTRL_CMD_NEWFAMILY &&
|
ghdr->cmd != CTRL_CMD_NEWFAMILY &&
|
||||||
ghdr->cmd != CTRL_CMD_NEWMCAST_GRP &&
|
ghdr->cmd != CTRL_CMD_NEWMCAST_GRP &&
|
||||||
ghdr->cmd != CTRL_CMD_DELMCAST_GRP &&
|
ghdr->cmd != CTRL_CMD_DELMCAST_GRP) {
|
||||||
ghdr->cmd != CTRL_CMD_GETPOLICY) {
|
|
||||||
fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd);
|
fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -139,7 +206,7 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
||||||
}
|
}
|
||||||
|
|
||||||
attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
|
attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
|
||||||
parse_rtattr_flags(tb, CTRL_ATTR_MAX, attrs, len, NLA_F_NESTED);
|
parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len);
|
||||||
|
|
||||||
if (tb[CTRL_ATTR_FAMILY_NAME]) {
|
if (tb[CTRL_ATTR_FAMILY_NAME]) {
|
||||||
char *name = RTA_DATA(tb[CTRL_ATTR_FAMILY_NAME]);
|
char *name = RTA_DATA(tb[CTRL_ATTR_FAMILY_NAME]);
|
||||||
|
|
@ -162,36 +229,6 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
||||||
__u32 *ma = RTA_DATA(tb[CTRL_ATTR_MAXATTR]);
|
__u32 *ma = RTA_DATA(tb[CTRL_ATTR_MAXATTR]);
|
||||||
fprintf(fp, " max attribs: %d ",*ma);
|
fprintf(fp, " max attribs: %d ",*ma);
|
||||||
}
|
}
|
||||||
if (tb[CTRL_ATTR_OP_POLICY]) {
|
|
||||||
const struct rtattr *pos;
|
|
||||||
|
|
||||||
rtattr_for_each_nested(pos, tb[CTRL_ATTR_OP_POLICY]) {
|
|
||||||
struct rtattr *ptb[CTRL_ATTR_POLICY_DUMP_MAX + 1];
|
|
||||||
struct rtattr *pattrs = RTA_DATA(pos);
|
|
||||||
int plen = RTA_PAYLOAD(pos);
|
|
||||||
|
|
||||||
parse_rtattr_flags(ptb, CTRL_ATTR_POLICY_DUMP_MAX,
|
|
||||||
pattrs, plen, NLA_F_NESTED);
|
|
||||||
|
|
||||||
fprintf(fp, " op %d policies:",
|
|
||||||
pos->rta_type & ~NLA_F_NESTED);
|
|
||||||
|
|
||||||
if (ptb[CTRL_ATTR_POLICY_DO]) {
|
|
||||||
__u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DO]);
|
|
||||||
|
|
||||||
fprintf(fp, " do=%d", *v);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ptb[CTRL_ATTR_POLICY_DUMP]) {
|
|
||||||
__u32 *v = RTA_DATA(ptb[CTRL_ATTR_POLICY_DUMP]);
|
|
||||||
|
|
||||||
fprintf(fp, " dump=%d", *v);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (tb[CTRL_ATTR_POLICY])
|
|
||||||
nl_print_policy(tb[CTRL_ATTR_POLICY], fp);
|
|
||||||
|
|
||||||
/* end of family definitions .. */
|
/* end of family definitions .. */
|
||||||
fprintf(fp,"\n");
|
fprintf(fp,"\n");
|
||||||
if (tb[CTRL_ATTR_OPS]) {
|
if (tb[CTRL_ATTR_OPS]) {
|
||||||
|
|
@ -240,9 +277,10 @@ static int print_ctrl(struct rtnl_ctrl_data *ctrl,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int print_ctrl2(struct nlmsghdr *n, void *arg)
|
static int print_ctrl2(const struct sockaddr_nl *who,
|
||||||
|
struct nlmsghdr *n, void *arg)
|
||||||
{
|
{
|
||||||
return print_ctrl(NULL, n, arg);
|
return print_ctrl(who, NULL, n, arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ctrl_list(int cmd, int argc, char **argv)
|
static int ctrl_list(int cmd, int argc, char **argv)
|
||||||
|
|
@ -261,16 +299,13 @@ static int ctrl_list(int cmd, int argc, char **argv)
|
||||||
.g.cmd = CTRL_CMD_GETFAMILY,
|
.g.cmd = CTRL_CMD_GETFAMILY,
|
||||||
};
|
};
|
||||||
struct nlmsghdr *nlh = &req.n;
|
struct nlmsghdr *nlh = &req.n;
|
||||||
struct nlmsghdr *answer = NULL;
|
|
||||||
|
|
||||||
if (rtnl_open_byproto(&rth, 0, NETLINK_GENERIC) < 0) {
|
if (rtnl_open_byproto(&rth, 0, NETLINK_GENERIC) < 0) {
|
||||||
fprintf(stderr, "Cannot open generic netlink socket\n");
|
fprintf(stderr, "Cannot open generic netlink socket\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cmd == CTRL_CMD_GETFAMILY || cmd == CTRL_CMD_GETPOLICY) {
|
if (cmd == CTRL_CMD_GETFAMILY) {
|
||||||
req.g.cmd = cmd;
|
|
||||||
|
|
||||||
if (argc != 2) {
|
if (argc != 2) {
|
||||||
fprintf(stderr, "Wrong number of params\n");
|
fprintf(stderr, "Wrong number of params\n");
|
||||||
return -1;
|
return -1;
|
||||||
|
|
@ -278,7 +313,7 @@ static int ctrl_list(int cmd, int argc, char **argv)
|
||||||
|
|
||||||
if (matches(*argv, "name") == 0) {
|
if (matches(*argv, "name") == 0) {
|
||||||
NEXT_ARG();
|
NEXT_ARG();
|
||||||
strlcpy(d, *argv, sizeof(d));
|
strncpy(d, *argv, sizeof (d) - 1);
|
||||||
addattr_l(nlh, 128, CTRL_ATTR_FAMILY_NAME,
|
addattr_l(nlh, 128, CTRL_ATTR_FAMILY_NAME,
|
||||||
d, strlen(d) + 1);
|
d, strlen(d) + 1);
|
||||||
} else if (matches(*argv, "id") == 0) {
|
} else if (matches(*argv, "id") == 0) {
|
||||||
|
|
@ -295,22 +330,20 @@ static int ctrl_list(int cmd, int argc, char **argv)
|
||||||
fprintf(stderr, "Wrong params\n");
|
fprintf(stderr, "Wrong params\n");
|
||||||
goto ctrl_done;
|
goto ctrl_done;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (cmd == CTRL_CMD_GETFAMILY) {
|
if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) {
|
||||||
if (rtnl_talk(&rth, nlh, &answer) < 0) {
|
|
||||||
fprintf(stderr, "Error talking to the kernel\n");
|
fprintf(stderr, "Error talking to the kernel\n");
|
||||||
goto ctrl_done;
|
goto ctrl_done;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (print_ctrl2(answer, (void *) stdout) < 0) {
|
if (print_ctrl2(NULL, nlh, (void *) stdout) < 0) {
|
||||||
fprintf(stderr, "Dump terminated\n");
|
fprintf(stderr, "Dump terminated\n");
|
||||||
goto ctrl_done;
|
goto ctrl_done;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cmd == CTRL_CMD_UNSPEC || cmd == CTRL_CMD_GETPOLICY) {
|
if (cmd == CTRL_CMD_UNSPEC) {
|
||||||
nlh->nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
|
nlh->nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
|
||||||
nlh->nlmsg_seq = rth.dump = ++rth.seq;
|
nlh->nlmsg_seq = rth.dump = ++rth.seq;
|
||||||
|
|
||||||
|
|
@ -325,7 +358,6 @@ static int ctrl_list(int cmd, int argc, char **argv)
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
ctrl_done:
|
ctrl_done:
|
||||||
free(answer);
|
|
||||||
rtnl_close(&rth);
|
rtnl_close(&rth);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
@ -361,8 +393,6 @@ static int parse_ctrl(struct genl_util *a, int argc, char **argv)
|
||||||
matches(*argv, "show") == 0 ||
|
matches(*argv, "show") == 0 ||
|
||||||
matches(*argv, "lst") == 0)
|
matches(*argv, "lst") == 0)
|
||||||
return ctrl_list(CTRL_CMD_UNSPEC, argc-1, argv+1);
|
return ctrl_list(CTRL_CMD_UNSPEC, argc-1, argv+1);
|
||||||
if (matches(*argv, "policy") == 0)
|
|
||||||
return ctrl_list(CTRL_CMD_GETPOLICY, argc-1, argv+1);
|
|
||||||
if (matches(*argv, "help") == 0)
|
if (matches(*argv, "help") == 0)
|
||||||
return usage();
|
return usage();
|
||||||
|
|
||||||
|
|
|
||||||
39
genl/genl.c
39
genl/genl.c
|
|
@ -13,6 +13,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <syslog.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
|
|
@ -22,19 +23,21 @@
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <linux/netlink.h>
|
#include <linux/netlink.h>
|
||||||
#include <linux/rtnetlink.h> /* until we put our own header */
|
#include <linux/rtnetlink.h> /* until we put our own header */
|
||||||
#include "version.h"
|
#include "SNAPSHOT.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "genl_utils.h"
|
#include "genl_utils.h"
|
||||||
|
|
||||||
int show_stats;
|
int show_stats = 0;
|
||||||
int show_details;
|
int show_details = 0;
|
||||||
int show_raw;
|
int show_raw = 0;
|
||||||
|
int resolve_hosts = 0;
|
||||||
|
|
||||||
static void *BODY;
|
static void *BODY;
|
||||||
static struct genl_util *genl_list;
|
static struct genl_util * genl_list;
|
||||||
|
|
||||||
|
|
||||||
static int print_nofopt(struct nlmsghdr *n, void *arg)
|
static int print_nofopt(const struct sockaddr_nl *who, struct nlmsghdr *n,
|
||||||
|
void *arg)
|
||||||
{
|
{
|
||||||
fprintf((FILE *) arg, "unknown genl type ..\n");
|
fprintf((FILE *) arg, "unknown genl type ..\n");
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -43,9 +46,8 @@ static int print_nofopt(struct nlmsghdr *n, void *arg)
|
||||||
static int parse_nofopt(struct genl_util *f, int argc, char **argv)
|
static int parse_nofopt(struct genl_util *f, int argc, char **argv)
|
||||||
{
|
{
|
||||||
if (argc) {
|
if (argc) {
|
||||||
fprintf(stderr,
|
fprintf(stderr, "Unknown genl \"%s\", hence option \"%s\" "
|
||||||
"Unknown genl \"%s\", hence option \"%s\" is unparsable\n",
|
"is unparsable\n", f->name, *argv);
|
||||||
f->name, *argv);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -98,10 +100,9 @@ static void usage(void) __attribute__((noreturn));
|
||||||
|
|
||||||
static void usage(void)
|
static void usage(void)
|
||||||
{
|
{
|
||||||
fprintf(stderr,
|
fprintf(stderr, "Usage: genl [ OPTIONS ] OBJECT | help }\n"
|
||||||
"Usage: genl [ OPTIONS ] OBJECT [help] }\n"
|
"where OBJECT := { ctrl etc }\n"
|
||||||
"where OBJECT := { ctrl etc }\n"
|
" OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] }\n");
|
||||||
" OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -V[ersion] | -h[elp] }\n");
|
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -118,26 +119,24 @@ int main(int argc, char **argv)
|
||||||
} else if (matches(argv[1], "-raw") == 0) {
|
} else if (matches(argv[1], "-raw") == 0) {
|
||||||
++show_raw;
|
++show_raw;
|
||||||
} else if (matches(argv[1], "-Version") == 0) {
|
} else if (matches(argv[1], "-Version") == 0) {
|
||||||
printf("genl utility, iproute2-%s\n", version);
|
printf("genl utility, iproute2-ss%s\n", SNAPSHOT);
|
||||||
exit(0);
|
exit(0);
|
||||||
} else if (matches(argv[1], "-help") == 0) {
|
} else if (matches(argv[1], "-help") == 0) {
|
||||||
usage();
|
usage();
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr,
|
fprintf(stderr, "Option \"%s\" is unknown, try "
|
||||||
"Option \"%s\" is unknown, try \"genl -help\".\n",
|
"\"genl -help\".\n", argv[1]);
|
||||||
argv[1]);
|
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
argc--; argv++;
|
argc--; argv++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc > 1) {
|
if (argc > 1) {
|
||||||
struct genl_util *a;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
struct genl_util *a = NULL;
|
||||||
a = get_genl_kind(argv[1]);
|
a = get_genl_kind(argv[1]);
|
||||||
if (!a) {
|
if (!a) {
|
||||||
fprintf(stderr, "bad genl %s\n", argv[1]);
|
fprintf(stderr,"bad genl %s\n", argv[1]);
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,17 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
#ifndef _TC_UTIL_H_
|
#ifndef _TC_UTIL_H_
|
||||||
#define _TC_UTIL_H_ 1
|
#define _TC_UTIL_H_ 1
|
||||||
|
|
||||||
#include <linux/genetlink.h>
|
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#include "linux/genetlink.h"
|
||||||
|
|
||||||
struct genl_util {
|
struct genl_util
|
||||||
|
{
|
||||||
struct genl_util *next;
|
struct genl_util *next;
|
||||||
char name[16];
|
char name[16];
|
||||||
int (*parse_genlopt)(struct genl_util *fu, int argc, char **argv);
|
int (*parse_genlopt)(struct genl_util *fu, int argc, char **argv);
|
||||||
int (*print_genlopt)(struct nlmsghdr *n, void *arg);
|
int (*print_genlopt)(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern int genl_ctrl_resolve_family(const char *family);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
/*
|
/*
|
||||||
* This file creates a dummy version of dynamic loading
|
* This file creates a dummy version of dynamic loading
|
||||||
* for environments where dynamic linking
|
* for environments where dynamic linking
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
static const char SNAPSHOT[] = "170705";
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
#ifndef __BPF_API__
|
#ifndef __BPF_API__
|
||||||
#define __BPF_API__
|
#define __BPF_API__
|
||||||
|
|
||||||
|
|
@ -19,19 +18,6 @@
|
||||||
|
|
||||||
#include "bpf_elf.h"
|
#include "bpf_elf.h"
|
||||||
|
|
||||||
/** libbpf pin type. */
|
|
||||||
enum libbpf_pin_type {
|
|
||||||
LIBBPF_PIN_NONE,
|
|
||||||
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
|
|
||||||
LIBBPF_PIN_BY_NAME,
|
|
||||||
};
|
|
||||||
|
|
||||||
/** Type helper macros. */
|
|
||||||
|
|
||||||
#define __uint(name, val) int (*name)[val]
|
|
||||||
#define __type(name, val) typeof(val) *name
|
|
||||||
#define __array(name, val) typeof(val) *name[]
|
|
||||||
|
|
||||||
/** Misc macros. */
|
/** Misc macros. */
|
||||||
|
|
||||||
#ifndef __stringify
|
#ifndef __stringify
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
#ifndef __BPF_ELF__
|
#ifndef __BPF_ELF__
|
||||||
#define __BPF_ELF__
|
#define __BPF_ELF__
|
||||||
|
|
||||||
|
|
@ -37,17 +36,6 @@ struct bpf_elf_map {
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
__u32 id;
|
__u32 id;
|
||||||
__u32 pinning;
|
__u32 pinning;
|
||||||
__u32 inner_id;
|
|
||||||
__u32 inner_idx;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
|
|
||||||
struct ____btf_map_##name { \
|
|
||||||
type_key key; \
|
|
||||||
type_val value; \
|
|
||||||
}; \
|
|
||||||
struct ____btf_map_##name \
|
|
||||||
__attribute__ ((section(".maps." #name), used)) \
|
|
||||||
____btf_map_##name = { }
|
|
||||||
|
|
||||||
#endif /* __BPF_ELF__ */
|
#endif /* __BPF_ELF__ */
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,8 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
#ifndef __BPF_SCM__
|
#ifndef __BPF_SCM__
|
||||||
#define __BPF_SCM__
|
#define __BPF_SCM__
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/un.h>
|
|
||||||
|
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "bpf_elf.h"
|
#include "bpf_elf.h"
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,6 @@
|
||||||
#define __BPF_UTIL__
|
#define __BPF_UTIL__
|
||||||
|
|
||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
#include <linux/btf.h>
|
|
||||||
#include <linux/filter.h>
|
#include <linux/filter.h>
|
||||||
#include <linux/magic.h>
|
#include <linux/magic.h>
|
||||||
#include <linux/elf-em.h>
|
#include <linux/elf-em.h>
|
||||||
|
|
@ -57,29 +56,13 @@ struct bpf_cfg_ops {
|
||||||
void (*ebpf_cb)(void *nl, int fd, const char *annotation);
|
void (*ebpf_cb)(void *nl, int fd, const char *annotation);
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_mode {
|
|
||||||
CBPF_BYTECODE,
|
|
||||||
CBPF_FILE,
|
|
||||||
EBPF_OBJECT,
|
|
||||||
EBPF_PINNED,
|
|
||||||
BPF_MODE_MAX,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct bpf_cfg_in {
|
struct bpf_cfg_in {
|
||||||
const char *object;
|
const char *object;
|
||||||
const char *section;
|
const char *section;
|
||||||
const char *uds;
|
const char *uds;
|
||||||
enum bpf_prog_type type;
|
|
||||||
enum bpf_mode mode;
|
|
||||||
__u32 ifindex;
|
|
||||||
bool verbose;
|
|
||||||
int argc;
|
int argc;
|
||||||
char **argv;
|
char **argv;
|
||||||
struct sock_filter opcodes[BPF_MAXINSNS];
|
struct sock_filter *ops;
|
||||||
union {
|
|
||||||
int n_opcodes;
|
|
||||||
int prog_fd;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
|
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
|
||||||
|
|
@ -261,46 +244,27 @@ struct bpf_cfg_in {
|
||||||
.off = 0, \
|
.off = 0, \
|
||||||
.imm = 0 })
|
.imm = 0 })
|
||||||
|
|
||||||
int bpf_parse_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops);
|
int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
|
||||||
int bpf_load_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops,
|
const struct bpf_cfg_ops *ops, void *nl);
|
||||||
void *nl);
|
|
||||||
int bpf_parse_and_load_common(struct bpf_cfg_in *cfg,
|
|
||||||
const struct bpf_cfg_ops *ops, void *nl);
|
|
||||||
|
|
||||||
const char *bpf_prog_to_default_section(enum bpf_prog_type type);
|
const char *bpf_prog_to_default_section(enum bpf_prog_type type);
|
||||||
|
|
||||||
int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
|
int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
|
||||||
int bpf_trace_pipe(void);
|
int bpf_trace_pipe(void);
|
||||||
|
|
||||||
void bpf_print_ops(struct rtattr *bpf_ops, __u16 len);
|
void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
|
||||||
|
|
||||||
int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns,
|
int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
|
||||||
size_t size_insns, const char *license, __u32 ifindex,
|
size_t size_insns, const char *license, char *log,
|
||||||
char *log, size_t size_log);
|
size_t size_log);
|
||||||
int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns,
|
|
||||||
size_t size_insns, const char *license, char *log,
|
|
||||||
size_t size_log);
|
|
||||||
|
|
||||||
int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type);
|
int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type);
|
||||||
int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type);
|
int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type);
|
||||||
int bpf_program_attach(int prog_fd, int target_fd, enum bpf_attach_type type);
|
|
||||||
|
|
||||||
int bpf_dump_prog_info(FILE *f, uint32_t id);
|
|
||||||
|
|
||||||
#ifdef HAVE_ELF
|
#ifdef HAVE_ELF
|
||||||
int bpf_send_map_fds(const char *path, const char *obj);
|
int bpf_send_map_fds(const char *path, const char *obj);
|
||||||
int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
|
int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
|
||||||
unsigned int entries);
|
unsigned int entries);
|
||||||
#ifdef HAVE_LIBBPF
|
|
||||||
int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg);
|
|
||||||
int iproute2_bpf_fetch_ancillary(void);
|
|
||||||
int iproute2_get_root_path(char *root_path, size_t len);
|
|
||||||
bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname);
|
|
||||||
bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap,
|
|
||||||
struct bpf_elf_map *omap, char *omap_name);
|
|
||||||
int iproute2_find_map_name_by_id(unsigned int map_id, char *name);
|
|
||||||
int iproute2_load_libbpf(struct bpf_cfg_in *cfg);
|
|
||||||
#endif /* HAVE_LIBBPF */
|
|
||||||
#else
|
#else
|
||||||
static inline int bpf_send_map_fds(const char *path, const char *obj)
|
static inline int bpf_send_map_fds(const char *path, const char *obj)
|
||||||
{
|
{
|
||||||
|
|
@ -313,15 +277,5 @@ static inline int bpf_recv_map_fds(const char *path, int *fds,
|
||||||
{
|
{
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#ifdef HAVE_LIBBPF
|
|
||||||
static inline int iproute2_load_libbpf(struct bpf_cfg_in *cfg)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "No ELF library support compiled in.\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
#endif /* HAVE_LIBBPF */
|
|
||||||
#endif /* HAVE_ELF */
|
#endif /* HAVE_ELF */
|
||||||
|
|
||||||
const char *get_libbpf_version(void);
|
|
||||||
|
|
||||||
#endif /* __BPF_UTIL__ */
|
#endif /* __BPF_UTIL__ */
|
||||||
|
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
#ifndef __CG_MAP_H__
|
|
||||||
#define __CG_MAP_H__
|
|
||||||
|
|
||||||
const char *cg_id_to_path(__u64 id);
|
|
||||||
|
|
||||||
#endif /* __CG_MAP_H__ */
|
|
||||||
|
|
@ -1,9 +1,6 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
#ifndef __COLOR_H__
|
#ifndef __COLOR_H__
|
||||||
#define __COLOR_H__ 1
|
#define __COLOR_H__ 1
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
|
|
||||||
enum color_attr {
|
enum color_attr {
|
||||||
COLOR_IFNAME,
|
COLOR_IFNAME,
|
||||||
COLOR_MAC,
|
COLOR_MAC,
|
||||||
|
|
@ -11,17 +8,11 @@ enum color_attr {
|
||||||
COLOR_INET6,
|
COLOR_INET6,
|
||||||
COLOR_OPERSTATE_UP,
|
COLOR_OPERSTATE_UP,
|
||||||
COLOR_OPERSTATE_DOWN,
|
COLOR_OPERSTATE_DOWN,
|
||||||
COLOR_NONE
|
COLOR_CLEAR
|
||||||
};
|
};
|
||||||
|
|
||||||
enum color_opt {
|
void enable_color(void);
|
||||||
COLOR_OPT_NEVER = 0,
|
void set_color_palette(void);
|
||||||
COLOR_OPT_AUTO = 1,
|
|
||||||
COLOR_OPT_ALWAYS = 2
|
|
||||||
};
|
|
||||||
|
|
||||||
bool check_enable_color(int color, int json);
|
|
||||||
bool matches_color(const char *arg, int *val);
|
|
||||||
int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...);
|
int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...);
|
||||||
enum color_attr ifa_family_color(__u8 ifa_family);
|
enum color_attr ifa_family_color(__u8 ifa_family);
|
||||||
enum color_attr oper_state_color(__u8 state);
|
enum color_attr oper_state_color(__u8 state);
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
/*
|
/*
|
||||||
* Stub dlfcn implementation for systems that lack shared library support
|
* Stub dlfcn implementation for systems that lack shared library support
|
||||||
* but obviously can still reference compiled-in symbols.
|
* but obviously can still reference compiled-in symbols.
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
#ifndef _IP6TABLES_USER_H
|
#ifndef _IP6TABLES_USER_H
|
||||||
#define _IP6TABLES_USER_H
|
#define _IP6TABLES_USER_H
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
#ifndef _IPTABLES_USER_H
|
#ifndef _IPTABLES_USER_H
|
||||||
#define _IPTABLES_USER_H
|
#define _IPTABLES_USER_H
|
||||||
|
|
||||||
|
|
@ -12,7 +11,7 @@ extern int do_command4(int argc, char *argv[], char **table,
|
||||||
struct xtc_handle **handle, bool restore);
|
struct xtc_handle **handle, bool restore);
|
||||||
extern int delete_chain4(const xt_chainlabel chain, int verbose,
|
extern int delete_chain4(const xt_chainlabel chain, int verbose,
|
||||||
struct xtc_handle *handle);
|
struct xtc_handle *handle);
|
||||||
extern int flush_entries4(const xt_chainlabel chain, int verbose,
|
extern int flush_entries4(const xt_chainlabel chain, int verbose,
|
||||||
struct xtc_handle *handle);
|
struct xtc_handle *handle);
|
||||||
extern int for_each_chain4(int (*fn)(const xt_chainlabel, int, struct xtc_handle *),
|
extern int for_each_chain4(int (*fn)(const xt_chainlabel, int, struct xtc_handle *),
|
||||||
int verbose, int builtinstoo, struct xtc_handle *handle);
|
int verbose, int builtinstoo, struct xtc_handle *handle);
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue