Merge ../iproute2-next

This commit is contained in:
Stephen Hemminger 2019-01-07 11:36:41 -08:00
commit 724ec5aeb0
72 changed files with 1639 additions and 1038 deletions

View File

@ -40,12 +40,6 @@ DEFINES+=-DCONFDIR=\"$(CONFDIR)\" \
-DNETNS_RUN_DIR=\"$(NETNS_RUN_DIR)\" \
-DNETNS_ETC_DIR=\"$(NETNS_ETC_DIR)\"
#options for decnet
ADDLIB+=dnet_ntop.o dnet_pton.o
#options for ipx
ADDLIB+=ipx_ntop.o ipx_pton.o
#options for mpls
ADDLIB+=mpls_ntop.o mpls_pton.o

View File

@ -1,33 +0,0 @@
Here are a few quick points about DECnet support...
o iproute2 is the tool of choice for configuring the DECnet support for
Linux. For many features, it is the only tool which can be used to
configure them.
o No name resolution is available as yet, all addresses must be
entered numerically.
o Remember to set the hardware address of the interface using:
ip link set ethX address xx:xx:xx:xx:xx:xx
(where xx:xx:xx:xx:xx:xx is the MAC address for your DECnet node
address)
if your Ethernet card won't listen to more than one unicast
mac address at once. If the Linux DECnet stack doesn't talk to
any other DECnet nodes, then check this with tcpdump and if its
a problem, change the mac address (but do this _before_ starting
any other network protocol on the interface)
o Whilst you can use ip addr add to add more than one DECnet address to an
interface, don't expect addresses which are not the same as the
kernels node address to work properly with 2.4 kernels. This should
be fine with 2.6 kernels as the routing code has been extensively
modified and improved.
o The DECnet support is currently self contained. It does not depend on
the libdnet library.
Steve Whitehouse <steve@chygwyn.com>

View File

@ -42,7 +42,7 @@ in rsvp/cbqinit.eth1.
Terminology and advices about setting CBQ parameters may be found in Sally Floyd
papers.
papers.
Pairs X:Y are class handles, X:0 are qdisc handles.

View File

@ -7,9 +7,9 @@ This tool is a generalized and more feature-complete replacement for the old
In addition to routing cache statistics, it supports any kind of statistics
the linux kernel exports via a file in /proc/net/stat. In a stock 2.6.9
kernel, this is
per-protocol neighbour cache statistics
(ipv4, ipv6, atm, decnet)
kernel, this is
per-protocol neighbour cache statistics
(ipv4, ipv6, atm)
routing cache statistics
(ipv4)
connection tracking statistics
@ -29,7 +29,7 @@ In order to get a list of supported statistics files, you can run
lnstat -d
It will display something like
/proc/net/stat/arp_cache:
1: entries
2: allocs
@ -52,19 +52,19 @@ arp_cach|rt_cache|arp_cach|
You can specify the interval (e.g. 10 seconds) by:
lnstat -i 10
You can specify to only use one particular statistics file:
lnstat -f ip_conntrack
You can specify individual field widths
You can specify individual field widths
lnstat -k arp_cache:entries,rt_cache:entries -w 20,8
You can specify not to print a header at all
lnstat -s 0
You can specify to print a header only at start of the program
@ -76,6 +76,5 @@ You can specify to print a header at start and every 20 lines:
lnstat -s 20
You can specify the number of samples you want to take (e.g. 5):
lnstat -c 5
lnstat -c 5

View File

@ -302,7 +302,7 @@ _tc_qdisc_options()
;;
gred)
_tc_once_attr 'setup vqs default grio vq prio limit min max avpkt \
burst probability bandwidth'
burst probability bandwidth ecn harddrop'
return 0
;;
hhf)

View File

@ -97,6 +97,8 @@ static int batch(const char *name)
return EXIT_FAILURE;
}
rtnl_set_strict_dump(&rth);
cmdlineno = 0;
while (getcmdline(&line, &len, stdin) != -1) {
char *largv[100];
@ -205,6 +207,8 @@ main(int argc, char **argv)
if (rtnl_open(&rth, 0) < 0)
exit(1);
rtnl_set_strict_dump(&rth);
if (argc > 1)
return do_cmd(argv[1], argc-1, argv+1);

View File

@ -30,7 +30,7 @@
#include "rt_names.h"
#include "utils.h"
static unsigned int filter_index, filter_vlan, filter_state;
static unsigned int filter_index, filter_vlan, filter_state, filter_master;
static void usage(void)
{
@ -256,20 +256,49 @@ int print_fdb(struct nlmsghdr *n, void *arg)
return 0;
}
static int fdb_linkdump_filter(struct nlmsghdr *nlh, int reqlen)
{
int err;
if (filter_index) {
struct ifinfomsg *ifm = NLMSG_DATA(nlh);
ifm->ifi_index = filter_index;
}
if (filter_master) {
err = addattr32(nlh, reqlen, IFLA_MASTER, filter_master);
if (err)
return err;
}
return 0;
}
static int fdb_dump_filter(struct nlmsghdr *nlh, int reqlen)
{
int err;
if (filter_index) {
struct ndmsg *ndm = NLMSG_DATA(nlh);
ndm->ndm_ifindex = filter_index;
}
if (filter_master) {
err = addattr32(nlh, reqlen, NDA_MASTER, filter_master);
if (err)
return err;
}
return 0;
}
static int fdb_show(int argc, char **argv)
{
struct {
struct nlmsghdr n;
struct ifinfomsg ifm;
char buf[256];
} req = {
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.ifm.ifi_family = PF_BRIDGE,
};
char *filter_dev = NULL;
char *br = NULL;
int msg_size = sizeof(struct ifinfomsg);
int rc;
while (argc > 0) {
if ((strcmp(*argv, "brport") == 0) || strcmp(*argv, "dev") == 0) {
@ -304,8 +333,7 @@ static int fdb_show(int argc, char **argv)
fprintf(stderr, "Cannot find bridge device \"%s\"\n", br);
return -1;
}
addattr32(&req.n, sizeof(req), IFLA_MASTER, br_ifindex);
msg_size += RTA_LENGTH(4);
filter_master = br_ifindex;
}
/*we'll keep around filter_dev for older kernels */
@ -313,10 +341,14 @@ static int fdb_show(int argc, char **argv)
filter_index = ll_name_to_index(filter_dev);
if (!filter_index)
return nodev(filter_dev);
req.ifm.ifi_index = filter_index;
}
if (rtnl_dump_request(&rth, RTM_GETNEIGH, &req.ifm, msg_size) < 0) {
if (rth.flags & RTNL_HANDLE_F_STRICT_CHK)
rc = rtnl_neighdump_req(&rth, PF_BRIDGE, fdb_dump_filter);
else
rc = rtnl_linkdump_req_filter_fn(&rth, PF_BRIDGE,
fdb_linkdump_filter);
if (rc < 0) {
perror("Cannot send dump request");
exit(1);
}

View File

@ -1920,10 +1920,80 @@ static int cmd_dev_eswitch(struct dl *dl)
return -ENOENT;
}
static void pr_out_param_value(struct dl *dl, int nla_type, struct nlattr *nl)
struct param_val_conv {
const char *name;
const char *vstr;
uint32_t vuint;
};
static bool param_val_conv_exists(const struct param_val_conv *param_val_conv,
uint32_t len, const char *name)
{
uint32_t i;
for (i = 0; i < len; i++)
if (!strcmp(param_val_conv[i].name, name))
return true;
return false;
}
static int
param_val_conv_uint_get(const struct param_val_conv *param_val_conv,
uint32_t len, const char *name, const char *vstr,
uint32_t *vuint)
{
uint32_t i;
for (i = 0; i < len; i++)
if (!strcmp(param_val_conv[i].name, name) &&
!strcmp(param_val_conv[i].vstr, vstr)) {
*vuint = param_val_conv[i].vuint;
return 0;
}
return -ENOENT;
}
static int
param_val_conv_str_get(const struct param_val_conv *param_val_conv,
uint32_t len, const char *name, uint32_t vuint,
const char **vstr)
{
uint32_t i;
for (i = 0; i < len; i++)
if (!strcmp(param_val_conv[i].name, name) &&
param_val_conv[i].vuint == vuint) {
*vstr = param_val_conv[i].vstr;
return 0;
}
return -ENOENT;
}
static const struct param_val_conv param_val_conv[] = {
{
.name = "fw_load_policy",
.vstr = "driver",
.vuint = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER,
},
{
.name = "fw_load_policy",
.vstr = "flash",
.vuint = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
},
};
#define PARAM_VAL_CONV_LEN ARRAY_SIZE(param_val_conv)
static void pr_out_param_value(struct dl *dl, const char *nla_name,
int nla_type, struct nlattr *nl)
{
struct nlattr *nla_value[DEVLINK_ATTR_MAX + 1] = {};
struct nlattr *val_attr;
const char *vstr;
bool conv_exists;
int err;
err = mnl_attr_parse_nested(nl, attr_cb, nla_value);
@ -1939,15 +2009,51 @@ static void pr_out_param_value(struct dl *dl, int nla_type, struct nlattr *nl)
param_cmode_name(mnl_attr_get_u8(nla_value[DEVLINK_ATTR_PARAM_VALUE_CMODE])));
val_attr = nla_value[DEVLINK_ATTR_PARAM_VALUE_DATA];
conv_exists = param_val_conv_exists(param_val_conv, PARAM_VAL_CONV_LEN,
nla_name);
switch (nla_type) {
case MNL_TYPE_U8:
pr_out_uint(dl, "value", mnl_attr_get_u8(val_attr));
if (conv_exists) {
err = param_val_conv_str_get(param_val_conv,
PARAM_VAL_CONV_LEN,
nla_name,
mnl_attr_get_u8(val_attr),
&vstr);
if (err)
return;
pr_out_str(dl, "value", vstr);
} else {
pr_out_uint(dl, "value", mnl_attr_get_u8(val_attr));
}
break;
case MNL_TYPE_U16:
pr_out_uint(dl, "value", mnl_attr_get_u16(val_attr));
if (conv_exists) {
err = param_val_conv_str_get(param_val_conv,
PARAM_VAL_CONV_LEN,
nla_name,
mnl_attr_get_u16(val_attr),
&vstr);
if (err)
return;
pr_out_str(dl, "value", vstr);
} else {
pr_out_uint(dl, "value", mnl_attr_get_u16(val_attr));
}
break;
case MNL_TYPE_U32:
pr_out_uint(dl, "value", mnl_attr_get_u32(val_attr));
if (conv_exists) {
err = param_val_conv_str_get(param_val_conv,
PARAM_VAL_CONV_LEN,
nla_name,
mnl_attr_get_u32(val_attr),
&vstr);
if (err)
return;
pr_out_str(dl, "value", vstr);
} else {
pr_out_uint(dl, "value", mnl_attr_get_u32(val_attr));
}
break;
case MNL_TYPE_STRING:
pr_out_str(dl, "value", mnl_attr_get_str(val_attr));
@ -1962,6 +2068,7 @@ static void pr_out_param(struct dl *dl, struct nlattr **tb, bool array)
{
struct nlattr *nla_param[DEVLINK_ATTR_MAX + 1] = {};
struct nlattr *param_value_attr;
const char *nla_name;
int nla_type;
int err;
@ -1980,8 +2087,8 @@ static void pr_out_param(struct dl *dl, struct nlattr **tb, bool array)
nla_type = mnl_attr_get_u8(nla_param[DEVLINK_ATTR_PARAM_TYPE]);
pr_out_str(dl, "name",
mnl_attr_get_str(nla_param[DEVLINK_ATTR_PARAM_NAME]));
nla_name = mnl_attr_get_str(nla_param[DEVLINK_ATTR_PARAM_NAME]);
pr_out_str(dl, "name", nla_name);
if (!nla_param[DEVLINK_ATTR_PARAM_GENERIC])
pr_out_str(dl, "type", "driver-specific");
@ -1992,7 +2099,7 @@ static void pr_out_param(struct dl *dl, struct nlattr **tb, bool array)
mnl_attr_for_each_nested(param_value_attr,
nla_param[DEVLINK_ATTR_PARAM_VALUES_LIST]) {
pr_out_entry_start(dl);
pr_out_param_value(dl, nla_type, param_value_attr);
pr_out_param_value(dl, nla_name, nla_type, param_value_attr);
pr_out_entry_end(dl);
}
pr_out_array_end(dl);
@ -2097,6 +2204,7 @@ static int cmd_dev_param_set(struct dl *dl)
{
struct param_ctx ctx = {};
struct nlmsghdr *nlh;
bool conv_exists;
uint32_t val_u32;
uint16_t val_u16;
uint8_t val_u8;
@ -2124,10 +2232,22 @@ static int cmd_dev_param_set(struct dl *dl)
NLM_F_REQUEST | NLM_F_ACK);
dl_opts_put(nlh, dl);
conv_exists = param_val_conv_exists(param_val_conv, PARAM_VAL_CONV_LEN,
dl->opts.param_name);
mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_TYPE, ctx.nla_type);
switch (ctx.nla_type) {
case MNL_TYPE_U8:
err = strtouint8_t(dl->opts.param_value, &val_u8);
if (conv_exists) {
err = param_val_conv_uint_get(param_val_conv,
PARAM_VAL_CONV_LEN,
dl->opts.param_name,
dl->opts.param_value,
&val_u32);
val_u8 = val_u32;
} else {
err = strtouint8_t(dl->opts.param_value, &val_u8);
}
if (err)
goto err_param_value_parse;
if (val_u8 == ctx.value.vu8)
@ -2135,7 +2255,16 @@ static int cmd_dev_param_set(struct dl *dl)
mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u8);
break;
case MNL_TYPE_U16:
err = strtouint16_t(dl->opts.param_value, &val_u16);
if (conv_exists) {
err = param_val_conv_uint_get(param_val_conv,
PARAM_VAL_CONV_LEN,
dl->opts.param_name,
dl->opts.param_value,
&val_u32);
val_u16 = val_u32;
} else {
err = strtouint16_t(dl->opts.param_value, &val_u16);
}
if (err)
goto err_param_value_parse;
if (val_u16 == ctx.value.vu16)
@ -2143,7 +2272,14 @@ static int cmd_dev_param_set(struct dl *dl)
mnl_attr_put_u16(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, val_u16);
break;
case MNL_TYPE_U32:
err = strtouint32_t(dl->opts.param_value, &val_u32);
if (conv_exists)
err = param_val_conv_uint_get(param_val_conv,
PARAM_VAL_CONV_LEN,
dl->opts.param_name,
dl->opts.param_value,
&val_u32);
else
err = strtouint32_t(dl->opts.param_value, &val_u32);
if (err)
goto err_param_value_parse;
if (val_u32 == ctx.value.vu32)

View File

@ -6,8 +6,8 @@ What is it?
-----------
An extension to the filtering/classification architecture of Linux Traffic
Control.
Up to 2.6.8 the only action that could be "attached" to a filter was policing.
Control.
Up to 2.6.8 the only action that could be "attached" to a filter was policing.
i.e you could say something like:
-----
@ -17,7 +17,7 @@ tc filter add dev lo parent ffff: protocol ip prio 10 u32 match ip src \
which implies "if a packet is seen on the ingress of the lo device with
a source IP address of 127.0.0.1/32 we give it a classification id of 1:1 and
we execute a policing action which rate limits its bandwidth utilization
we execute a policing action which rate limits its bandwidth utilization
to 1.5Mbps".
The new extensions allow for more than just policing actions to be added.
@ -29,9 +29,9 @@ syntax which will work fine. Of course to get the required effect you need
both newer tc and kernel. If you are reading this you have the
right tc ;->
A side effect is that we can now get stateless firewalling to work with tc.
A side effect is that we can now get stateless firewalling to work with tc.
Essentially this is now an alternative to iptables.
I won't go into details of my dislike for iptables at times, but
I won't go into details of my dislike for iptables at times, but
scalability is one of the main issues; however, if you need stateful
classification - use netfilter (for now).
@ -61,7 +61,7 @@ tc filter add dev lo parent 1:0 protocol ip prio 10 u32 \
match ip src 127.0.0.1/32 flowid 1:1 \
action police mtu 4000 rate 1500kbit burst 90k
" generic Actions" (gact) at the moment are:
" generic Actions" (gact) at the moment are:
{ drop, pass, reclassify, continue}
(If you have others, no listed here give me a reason and we will add them)
+drop says to drop the packet
@ -93,43 +93,43 @@ decimal 12, then use flowid 1:c.
3) A feature i call pipe
The motivation is derived from Unix pipe mechanism but applied to packets.
Essentially take a matching packet and pass it through
Essentially take a matching packet and pass it through
action1 | action2 | action3 etc.
You could do something similar to this with the tc policer and the "continue"
operator but this rather restricts it to just the policer and requires
multiple rules (and lookups, hence quiet inefficient);
operator but this rather restricts it to just the policer and requires
multiple rules (and lookups, hence quiet inefficient);
as an example -- and please note that this is just an example _not_ The
as an example -- and please note that this is just an example _not_ The
Word Youve Been Waiting For (yes i have had problems giving examples
which ended becoming dogma in documents and people modifying them a little
to look clever);
to look clever);
i selected the metering rates to be small so that i can show better how
i selected the metering rates to be small so that i can show better how
things work.
The script below does the following:
- an incoming packet from 10.0.0.21 is first given a firewall mark of 1.
- It is then metered to make sure it does not exceed its allocated rate of
The script below does the following:
- an incoming packet from 10.0.0.21 is first given a firewall mark of 1.
- It is then metered to make sure it does not exceed its allocated rate of
1Kbps. If it doesn't exceed rate, this is where we terminate action execution.
- If it does exceed its rate, its "color" changes to a mark of 2 and it is
- If it does exceed its rate, its "color" changes to a mark of 2 and it is
then passed through a second meter.
-The second meter is shared across all flows on that device [i am surpised
that this seems to be not a well know feature of the policer; Bert was telling
-The second meter is shared across all flows on that device [i am surpised
that this seems to be not a well know feature of the policer; Bert was telling
me that someone was writing a qdisc just to do sharing across multiple devices;
it must be the summer heat again; weve had someone doing that every year around
summer -- the key to sharing is to use a operator "index" in your policer
rules (example "index 20"). All your rules have to use the same index to
summer -- the key to sharing is to use a operator "index" in your policer
rules (example "index 20"). All your rules have to use the same index to
share.]
-If the second meter is exceeded the color of the flow changes further to 3.
-We then pass the packet to another meter which is shared across all devices
in the system. If this meter is exceeded we drop the packet.
Note the mark can be used further up the system to do things like policy
Note the mark can be used further up the system to do things like policy
or more interesting things on the egress.
------------------ cut here -------------------------------
@ -161,31 +161,31 @@ action ipt -j mark --set-mark 3 \
# and then attempt to borrow from a meter used by all devices in the
# system. Should this be exceeded, drop the packet on the floor.
action police index 20 mtu 5000 rate 1kbit burst 90k drop
---------------------------------
---------------------------------
Now lets see the actions installed with
Now lets see the actions installed with
"tc filter show parent ffff: dev eth0"
-------- output -----------
jroot# tc filter show parent ffff: dev eth0
filter protocol ip pref 1 u32
filter protocol ip pref 1 u32 fh 800: ht divisor 1
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15
filter protocol ip pref 1 u32
filter protocol ip pref 1 u32 fh 800: ht divisor 1
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
target MARK set 0x1 index 2
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
target MARK set 0x2 index 1
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
target MARK set 0x3 index 3
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
match 0a000015/ffffffff at 12
-------------------------------
@ -209,31 +209,31 @@ Now lets take a look at the stats with "tc -s filter show parent ffff: dev eth0"
--------------
jroot# tc -s filter show parent ffff: dev eth0
filter protocol ip pref 1 u32
filter protocol ip pref 1 u32 fh 800: ht divisor 1
filter protocol ip pref 1 u32
filter protocol ip pref 1 u32 fh 800: ht divisor 1
filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
5
5
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
target MARK set 0x1 index 2
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0)
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0)
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122)
action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb
Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122)
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING
target MARK set 0x2 index 1
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0)
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0)
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945)
action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b
Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945)
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING
target MARK set 0x3 index 3
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0)
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0)
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437)
action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b
Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437)
match 0a000015/ffffffff at 12
-------------------------------
@ -254,4 +254,3 @@ At the moment the focus has been on getting the architecture in place.
Expect new things in the spurious time i have to work on this
(particularly around end of year when i have typically get time off
from work).

View File

@ -1,13 +1,13 @@
gact <ACTION> [RAND] [INDEX]
Where:
ACTION := reclassify | drop | continue | pass | ok
Where:
ACTION := reclassify | drop | continue | pass | ok
RAND := random <RANDTYPE> <ACTION> <VAL>
RANDTYPE := netrand | determ
VAL : = value not exceeding 10000
INDEX := index value used
ACTION semantics
- pass and ok are equivalent to accept
- continue allows to restart classification lookup
@ -42,14 +42,14 @@ filter u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16 (rule hit 32 suc
random type none pass val 0
index 1 ref 1 bind 1 installed 59 sec used 35 sec
Sent 1680 bytes 20 pkts (dropped 20, overlimits 0 )
----
# example 2
#allow 1 out 10 randomly using the netrand generator
tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
10.0.0.9/32 flowid 1:16 action drop random netrand ok 10
ping -c 20 10.0.0.9
----
@ -59,14 +59,14 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
random type netrand pass val 10
index 5 ref 1 bind 1 installed 49 sec used 25 sec
Sent 1680 bytes 20 pkts (dropped 16, overlimits 0 )
--------
#alternative: deterministically accept every second packet
tc filter add dev eth0 parent ffff: protocol ip prio 6 u32 match ip src \
10.0.0.9/32 flowid 1:16 action drop random determ ok 2
ping -c 20 10.0.0.9
tc -s filter show parent ffff: dev eth0
-----
filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1filter protocol ip pref 6 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:16 (rule hit 20 success 20)
@ -76,4 +76,3 @@ filter protocol ip pref 6 u32 filter protocol ip pref 6 u32 fh 800: ht divisor 1
index 4 ref 1 bind 1 installed 118 sec used 82 sec
Sent 1680 bytes 20 pkts (dropped 10, overlimits 0 )
-----

View File

@ -6,18 +6,18 @@ with a _lot_ less code.
Known IMQ/IFB USES
------------------
As far as i know the reasons listed below is why people use IMQ.
As far as i know the reasons listed below is why people use IMQ.
It would be nice to know of anything else that i missed.
1) qdiscs/policies that are per device as opposed to system wide.
IFB allows for sharing.
2) Allows for queueing incoming traffic for shaping instead of
dropping. I am not aware of any study that shows policing is
dropping. I am not aware of any study that shows policing is
worse than shaping in achieving the end goal of rate control.
I would be interested if anyone is experimenting.
3) Very interesting use: if you are serving p2p you may want to give
3) Very interesting use: if you are serving p2p you may want to give
preference to your own locally originated traffic (when responses come back)
vs someone using your system to do bittorent. So QoSing based on state
comes in as the solution. What people did to achieve this was stick
@ -25,17 +25,17 @@ the IMQ somewhere prelocal hook.
I think this is a pretty neat feature to have in Linux in general.
(i.e not just for IMQ).
But i won't go back to putting netfilter hooks in the device to satisfy
this. I also don't think its worth it hacking ifb some more to be
this. I also don't think its worth it hacking ifb some more to be
aware of say L3 info and play ip rule tricks to achieve this.
--> Instead the plan is to have a conntrack related action. This action will
selectively either query/create conntrack state on incoming packets.
Packets could then be redirected to ifb based on what happens -> eg
on incoming packets; if we find they are of known state we could send to
selectively either query/create conntrack state on incoming packets.
Packets could then be redirected to ifb based on what happens -> eg
on incoming packets; if we find they are of known state we could send to
a different queue than one which didn't have existing state. This
all however is dependent on whatever rules the admin enters.
At the moment this 3rd function does not exist yet. I have decided that
instead of sitting on the patch for another year, to release it and then
instead of sitting on the patch for another year, to release it and then
if there is pressure i will add this feature.
An example, to provide functionality that most people use IMQ for below:
@ -43,10 +43,10 @@ An example, to provide functionality that most people use IMQ for below:
--------
export TC="/sbin/tc"
$TC qdisc add dev ifb0 root handle 1: prio
$TC qdisc add dev ifb0 root handle 1: prio
$TC qdisc add dev ifb0 parent 1:1 handle 10: sfq
$TC qdisc add dev ifb0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000
$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq
$TC qdisc add dev ifb0 parent 1:3 handle 30: sfq
$TC filter add dev ifb0 protocol ip pref 1 parent 1: handle 1 fw classid 1:1
$TC filter add dev ifb0 protocol ip pref 2 parent 1: handle 2 fw classid 1:2
@ -54,7 +54,7 @@ ifconfig ifb0 up
$TC qdisc add dev eth0 ingress
# redirect all IP packets arriving in eth0 to ifb0
# redirect all IP packets arriving in eth0 to ifb0
# use mark 1 --> puts them onto class 1:1
$TC filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
match u32 0 0 flowid 1:1 \
@ -77,44 +77,44 @@ PING 10.22 (10.0.0.22): 56 data bytes
--- 10.22 ping statistics ---
3 packets transmitted, 3 packets received, 0% packet loss
round-trip min/avg/max = 0.6/1.3/2.8 ms
[root@jzny action-tests]#
[root@jzny action-tests]#
-----
Now look at some stats:
---
[root@jmandrake]:~# $TC -s filter show parent ffff: dev eth0
filter protocol ip pref 10 u32
filter protocol ip pref 10 u32 fh 800: ht divisor 1
filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
filter protocol ip pref 10 u32
filter protocol ip pref 10 u32 fh 800: ht divisor 1
filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
match 00000000/00000000 at 0
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
target MARK set 0x1
index 1 ref 1 bind 1 installed 4195sec used 27sec
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING
target MARK set 0x1
index 1 ref 1 bind 1 installed 4195sec used 27sec
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
action order 2: mirred (Egress Redirect to device ifb0) stolen
index 1 ref 1 bind 1 installed 165 sec used 27 sec
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
Sent 252 bytes 3 pkts (dropped 0, overlimits 0)
[root@jmandrake]:~# $TC -s qdisc
qdisc sfq 30: dev ifb0 limit 128p quantum 1514b
Sent 0 bytes 0 pkts (dropped 0, overlimits 0)
qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s
Sent 210 bytes 3 pkts (dropped 0, overlimits 0)
qdisc sfq 10: dev ifb0 limit 128p quantum 1514b
Sent 294 bytes 3 pkts (dropped 0, overlimits 0)
qdisc sfq 30: dev ifb0 limit 128p quantum 1514b
Sent 0 bytes 0 pkts (dropped 0, overlimits 0)
qdisc tbf 20: dev ifb0 rate 20Kbit burst 1575b lat 2147.5s
Sent 210 bytes 3 pkts (dropped 0, overlimits 0)
qdisc sfq 10: dev ifb0 limit 128p quantum 1514b
Sent 294 bytes 3 pkts (dropped 0, overlimits 0)
qdisc prio 1: dev ifb0 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
Sent 504 bytes 6 pkts (dropped 0, overlimits 0)
qdisc ingress ffff: dev eth0 ----------------
Sent 308 bytes 5 pkts (dropped 0, overlimits 0)
Sent 504 bytes 6 pkts (dropped 0, overlimits 0)
qdisc ingress ffff: dev eth0 ----------------
Sent 308 bytes 5 pkts (dropped 0, overlimits 0)
[root@jmandrake]:~# ifconfig ifb0
ifb0 Link encap:Ethernet HWaddr 00:00:00:00:00:00
ifb0 Link encap:Ethernet HWaddr 00:00:00:00:00:00
inet6 addr: fe80::200:ff:fe00:0/64 Scope:Link
UP BROADCAST RUNNING NOARP MTU:1500 Metric:1
RX packets:6 errors:0 dropped:3 overruns:0 frame:0
TX packets:3 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:32
collisions:0 txqueuelen:32
RX bytes:504 (504.0 b) TX bytes:252 (252.0 b)
-----

View File

@ -7,10 +7,10 @@ flow to be mirrored. High end switches typically can select based
on more than just a port (eg a 5 tuple classifier). They may also be
capable of redirecting.
Usage:
Usage:
mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME>
where:
mirred <DIRECTION> <ACTION> [index INDEX] <dev DEVICENAME>
where:
DIRECTION := <ingress | egress>
ACTION := <mirror | redirect>
INDEX is the specific policy instance id
@ -18,7 +18,7 @@ DEVICENAME is the devicename
Direction:
- Ingress is not supported at the moment. It will be in the
future as well as mirror/redirecting to a socket.
future as well as mirror/redirecting to a socket.
Action:
- Mirror takes a copy of the packet and sends it to specified
@ -29,14 +29,14 @@ steals the packet and redirects to specified destination dev.
What NOT to do if you don't want your machine to crash:
------------------------------------------------------
Do not create loops!
Do not create loops!
Loops are not hard to create in the egress qdiscs.
Here are simple rules to follow if you don't want to get
hurt:
A) Do not have the same packet go to same netdevice twice
in a single graph of policies. Your machine will just hang!
This is design intent _not a bug_ to teach you some lessons.
This is design intent _not a bug_ to teach you some lessons.
In the future if there are easy ways to do this in the kernel
without affecting other packets not interested in this feature
@ -51,7 +51,7 @@ B) Do not redirect from one IFB device to another.
Remember that IFB is a very specialized case of packet redirecting
device. Instead of redirecting it puts packets at the exact spot
on the stack it found them from.
Redirecting from ifbX->ifbY will actually not crash your machine but your
Redirecting from ifbX->ifbY will actually not crash your machine but your
packets will all be dropped (this is much simpler to detect
and resolve and is only affecting users of ifb as opposed to the
whole stack).
@ -64,7 +64,7 @@ Some examples:
1) Mirror all packets arriving on eth0 to be sent out on eth1.
You may have a sniffer or some accounting box hooked up on eth1.
---
tc qdisc add dev eth0 ingress
tc filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
@ -100,7 +100,7 @@ stack (i.e ping would work).
3) Even more funky example:
#
#allow 1 out 10 packets on ingress of lo to randomly make it to the
#allow 1 out 10 packets on ingress of lo to randomly make it to the
# host A (Randomness uses the netrand generator)
#
---
@ -111,9 +111,9 @@ action mirred egress mirror dev eth0
---
4)
# for packets from 10.0.0.9 going out on eth0 (could be local
# IP or something # we are forwarding) -
# if exceeding a 100Kbps rate, then redirect to eth1
# for packets from 10.0.0.9 going out on eth0 (could be local
# IP or something # we are forwarding) -
# if exceeding a 100Kbps rate, then redirect to eth1
#
---
@ -158,7 +158,7 @@ Essentially a good debugging/logging interface (sort of like
BSDs speacialized log device does without needing one).
If you replace mirror with redirect, those packets will be
blackholed and will never make it out.
blackholed and will never make it out.
cheers,
jamal

View File

@ -64,6 +64,7 @@ _PRINT_FUNC(null, const char*);
_PRINT_FUNC(string, const char*);
_PRINT_FUNC(uint, unsigned int);
_PRINT_FUNC(u64, uint64_t);
_PRINT_FUNC(hhu, unsigned char);
_PRINT_FUNC(hu, unsigned short);
_PRINT_FUNC(hex, unsigned int);
_PRINT_FUNC(0xhex, unsigned long long);

View File

@ -38,6 +38,7 @@ void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num);
void jsonw_uint(json_writer_t *self, unsigned int number);
void jsonw_u64(json_writer_t *self, uint64_t number);
void jsonw_xint(json_writer_t *self, uint64_t number);
void jsonw_hhu(json_writer_t *self, unsigned char num);
void jsonw_hu(json_writer_t *self, unsigned short number);
void jsonw_int(json_writer_t *self, int number);
void jsonw_s64(json_writer_t *self, int64_t number);
@ -52,6 +53,7 @@ void jsonw_float_field(json_writer_t *self, const char *prop, double num);
void jsonw_uint_field(json_writer_t *self, const char *prop, unsigned int num);
void jsonw_u64_field(json_writer_t *self, const char *prop, uint64_t num);
void jsonw_xint_field(json_writer_t *self, const char *prop, uint64_t num);
void jsonw_hhu_field(json_writer_t *self, const char *prop, unsigned char num);
void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num);
void jsonw_int_field(json_writer_t *self, const char *prop, int num);
void jsonw_s64_field(json_writer_t *self, const char *prop, int64_t num);

View File

@ -23,6 +23,7 @@ struct rtnl_handle {
FILE *dump_fp;
#define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01
#define RTNL_HANDLE_F_SUPPRESS_NLERR 0x02
#define RTNL_HANDLE_F_STRICT_CHK 0x04
int flags;
};
@ -46,16 +47,22 @@ int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
__attribute__((warn_unused_result));
void rtnl_close(struct rtnl_handle *rth);
void rtnl_set_strict_dump(struct rtnl_handle *rth);
int rtnl_addrdump_req(struct rtnl_handle *rth, int family)
typedef int (*req_filter_fn_t)(struct nlmsghdr *nlh, int reqlen);
int rtnl_addrdump_req(struct rtnl_handle *rth, int family,
req_filter_fn_t filter_fn)
__attribute__((warn_unused_result));
int rtnl_addrlbldump_req(struct rtnl_handle *rth, int family)
__attribute__((warn_unused_result));
int rtnl_routedump_req(struct rtnl_handle *rth, int family)
int rtnl_routedump_req(struct rtnl_handle *rth, int family,
req_filter_fn_t filter_fn)
__attribute__((warn_unused_result));
int rtnl_ruledump_req(struct rtnl_handle *rth, int family)
__attribute__((warn_unused_result));
int rtnl_neighdump_req(struct rtnl_handle *rth, int family)
int rtnl_neighdump_req(struct rtnl_handle *rth, int family,
req_filter_fn_t filter_fn)
__attribute__((warn_unused_result));
int rtnl_neightbldump_req(struct rtnl_handle *rth, int family)
__attribute__((warn_unused_result));
@ -71,8 +78,6 @@ int rtnl_linkdump_req(struct rtnl_handle *rth, int fam)
int rtnl_linkdump_req_filter(struct rtnl_handle *rth, int fam, __u32 filt_mask)
__attribute__((warn_unused_result));
typedef int (*req_filter_fn_t)(struct nlmsghdr *nlh, int reqlen);
int rtnl_linkdump_req_filter_fn(struct rtnl_handle *rth, int fam,
req_filter_fn_t fn)
__attribute__((warn_unused_result));

View File

@ -133,6 +133,14 @@ enum bpf_map_type {
BPF_MAP_TYPE_STACK,
};
/* Note that tracing related programs such as
* BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT}
* are not subject to a stable API since kernel internal data
* structures can change from release to release and may
* therefore break existing tracing BPF programs. Tracing BPF
* programs correspond to /a/ specific kernel which is to be
* analyzed, and not /a/ specific kernel /and/ all future ones.
*/
enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
BPF_PROG_TYPE_SOCKET_FILTER,
@ -232,6 +240,20 @@ enum bpf_attach_type {
*/
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
* verifier will allow any alignment whatsoever. On platforms
* with strict alignment requirements for loads ands stores (such
* as sparc and mips) the verifier validates that all loads and
* stores provably follow this requirement. This flag turns that
* checking and enforcement off.
*
* It is mostly used for testing when we want to validate the
* context and memory access aspects of the verifier, but because
* of an unaligned access the alignment check would trigger before
* the one we are interested in.
*/
#define BPF_F_ANY_ALIGNMENT (1U << 1)
/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
#define BPF_PSEUDO_MAP_FD 1
@ -257,9 +279,6 @@ enum bpf_attach_type {
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
#define BPF_OBJ_NAME_LEN 16U
/* Flags for accessing BPF object */
@ -269,6 +288,12 @@ enum bpf_attach_type {
/* Flag for stack_map, store build_id+offset instead of pointer */
#define BPF_F_STACK_BUILD_ID (1U << 5)
/* Zero-initialize hash function seed. This should only be used for testing. */
#define BPF_F_ZERO_SEED (1U << 6)
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
@ -326,7 +351,7 @@ union bpf_attr {
__u32 log_level; /* verbosity level of verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe */
__u32 kern_version; /* not used */
__u32 prog_flags;
char prog_name[BPF_OBJ_NAME_LEN];
__u32 prog_ifindex; /* ifindex of netdev to prep for */
@ -335,6 +360,13 @@ union bpf_attr {
* (context accesses, allowed helpers, etc).
*/
__u32 expected_attach_type;
__u32 prog_btf_fd; /* fd pointing to BTF type data */
__u32 func_info_rec_size; /* userspace bpf_func_info size */
__aligned_u64 func_info; /* func info */
__u32 func_info_cnt; /* number of bpf_func_info records */
__u32 line_info_rec_size; /* userspace bpf_line_info size */
__aligned_u64 line_info; /* line info */
__u32 line_info_cnt; /* number of bpf_line_info records */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@ -353,8 +385,11 @@ union bpf_attr {
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
__u32 prog_fd;
__u32 retval;
__u32 data_size_in;
__u32 data_size_out;
__u32 data_size_in; /* input: len of data_in */
__u32 data_size_out; /* input/output: len of data_out
* returns ENOSPC if data_out
* is too small.
*/
__aligned_u64 data_in;
__aligned_u64 data_out;
__u32 repeat;
@ -475,18 +510,6 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_map_pop_elem(struct bpf_map *map, void *value)
* Description
* Pop an element from *map*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_map_peek_elem(struct bpf_map *map, void *value)
* Description
* Get an element from *map* without removing it.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
@ -1910,9 +1933,9 @@ union bpf_attr {
* is set to metric from route (IPv4/IPv6 only), and ifindex
* is set to the device index of the nexthop from the FIB lookup.
*
* *plen* argument is the size of the passed in struct.
* *flags* argument can be a combination of one or more of the
* following values:
* *plen* argument is the size of the passed in struct.
* *flags* argument can be a combination of one or more of the
* following values:
*
* **BPF_FIB_LOOKUP_DIRECT**
* Do a direct table lookup vs full lookup using FIB
@ -1921,9 +1944,9 @@ union bpf_attr {
* Perform lookup from an egress perspective (default is
* ingress).
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
* Return
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
* Return
* * < 0 if any input argument is invalid
* * 0 on success (packet is forwarded, nexthop neighbor exists)
* * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
@ -2068,8 +2091,8 @@ union bpf_attr {
* translated to a keycode using the rc keymap, and reported as
* an input key down event. After a period a key up event is
* generated. This period can be extended by calling either
* **bpf_rc_keydown** () again with the same values, or calling
* **bpf_rc_repeat** ().
* **bpf_rc_keydown**\ () again with the same values, or calling
* **bpf_rc_repeat**\ ().
*
* Some protocols include a toggle bit, in case the button was
* released and pressed again between consecutive scancodes.
@ -2152,21 +2175,22 @@ union bpf_attr {
* The *flags* meaning is specific for each map type,
* and has to be 0 for cgroup local storage.
*
* Depending on the bpf program type, a local storage area
* can be shared between multiple instances of the bpf program,
* Depending on the BPF program type, a local storage area
* can be shared between multiple instances of the BPF program,
* running simultaneously.
*
* A user should care about the synchronization by himself.
* For example, by using the BPF_STX_XADD instruction to alter
* For example, by using the **BPF_STX_XADD** instruction to alter
* the shared data.
* Return
* Pointer to the local storage area.
* A pointer to the local storage area.
*
* int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
* Description
* Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map
* It checks the selected sk is matching the incoming
* request in the skb.
* Select a **SO_REUSEPORT** socket from a
* **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
* It checks the selected socket is matching the incoming
* request in the socket buffer.
* Return
* 0 on success, or a negative error in case of failure.
*
@ -2174,7 +2198,7 @@ union bpf_attr {
* Description
* Look for TCP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked,
* and if non-NULL, released via **bpf_sk_release**\ ().
* and if non-**NULL**, released via **bpf_sk_release**\ ().
*
* The *ctx* should point to the context of the program, such as
* the skb or socket (depending on the hook in use). This is used
@ -2202,15 +2226,15 @@ union bpf_attr {
* This helper is available only if the kernel was compiled with
* **CONFIG_NET** configuration option.
* Return
* Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, the *struct bpf_sock*
* result is from reuse->socks[] using the hash of the tuple.
* Pointer to **struct bpf_sock**, or **NULL** in case of failure.
* For sockets with reuseport option, the **struct bpf_sock**
* result is from **reuse->socks**\ [] using the hash of the tuple.
*
* struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
* Description
* Look for UDP socket matching *tuple*, optionally in a child
* network namespace *netns*. The return value must be checked,
* and if non-NULL, released via **bpf_sk_release**\ ().
* and if non-**NULL**, released via **bpf_sk_release**\ ().
*
* The *ctx* should point to the context of the program, such as
* the skb or socket (depending on the hook in use). This is used
@ -2238,33 +2262,71 @@ union bpf_attr {
* This helper is available only if the kernel was compiled with
* **CONFIG_NET** configuration option.
* Return
* Pointer to *struct bpf_sock*, or NULL in case of failure.
* For sockets with reuseport option, the *struct bpf_sock*
* result is from reuse->socks[] using the hash of the tuple.
* Pointer to **struct bpf_sock**, or **NULL** in case of failure.
* For sockets with reuseport option, the **struct bpf_sock**
* result is from **reuse->socks**\ [] using the hash of the tuple.
*
* int bpf_sk_release(struct bpf_sock *sk)
* int bpf_sk_release(struct bpf_sock *sock)
* Description
* Release the reference held by *sock*. *sock* must be a non-NULL
* pointer that was returned from bpf_sk_lookup_xxx\ ().
* Release the reference held by *sock*. *sock* must be a
* non-**NULL** pointer that was returned from
* **bpf_sk_lookup_xxx**\ ().
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_map_pop_elem(struct bpf_map *map, void *value)
* Description
* Pop an element from *map*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_map_peek_elem(struct bpf_map *map, void *value)
* Description
* Get an element from *map* without removing it.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
* Description
* For socket policies, insert *len* bytes into msg at offset
* For socket policies, insert *len* bytes into *msg* at offset
* *start*.
*
* If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
* *msg* it may want to insert metadata or options into the msg.
* *msg* it may want to insert metadata or options into the *msg*.
* This can later be read and used by any of the lower layer BPF
* hooks.
*
* This helper may fail if under memory pressure (a malloc
* fails) in these cases BPF programs will get an appropriate
* error and BPF programs will need to handle them.
*
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
* Description
* Will remove *pop* bytes from a *msg* starting at byte *start*.
* This may result in **ENOMEM** errors under certain situations if
* an allocation and copy are required due to a full ring buffer.
* However, the helper will try to avoid doing the allocation
* if possible. Other errors can occur if input parameters are
* invalid either due to *start* byte not being valid part of *msg*
* payload and/or *pop* value being to large.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded pointer movement.
*
* The *ctx* should point to the lirc sample as passed into
* the program.
*
* This helper is only available is the kernel was compiled with
* the **CONFIG_BPF_LIRC_MODE2** configuration option set to
* "**y**".
* Return
* 0
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -2357,7 +2419,9 @@ union bpf_attr {
FN(map_push_elem), \
FN(map_pop_elem), \
FN(map_peek_elem), \
FN(msg_push_data),
FN(msg_push_data), \
FN(msg_pop_data), \
FN(rc_pointer_rel),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -2474,6 +2538,8 @@ struct __sk_buff {
__u32 data_meta;
__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
__u64 tstamp;
__u32 wire_len;
};
struct bpf_tunnel_key {
@ -2599,6 +2665,7 @@ struct sk_msg_md {
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
__u32 size; /* Total size of sk_msg */
};
struct sk_reuseport_md {
@ -2649,6 +2716,18 @@ struct bpf_prog_info {
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
__u32 btf_id;
__u32 func_info_rec_size;
__aligned_u64 func_info;
__u32 nr_func_info;
__u32 nr_line_info;
__aligned_u64 line_info;
__aligned_u64 jited_line_info;
__u32 nr_jited_line_info;
__u32 line_info_rec_size;
__u32 jited_line_info_rec_size;
__u32 nr_prog_tags;
__aligned_u64 prog_tags;
} __attribute__((aligned(8)));
struct bpf_map_info {
@ -2960,4 +3039,19 @@ struct bpf_flow_keys {
};
};
struct bpf_func_info {
__u32 insn_off;
__u32 type_id;
};
#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
struct bpf_line_info {
__u32 insn_off;
__u32 file_name_off;
__u32 line_off;
__u32 line_col;
};
#endif /* __LINUX_BPF_H__ */

View File

@ -34,13 +34,16 @@ struct btf_type {
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
* bits 24-27: kind (e.g. int, ptr, array...etc)
* bits 28-31: unused
* bits 28-30: unused
* bit 31: kind_flag, currently used by
* struct, union and fwd
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT and UNION.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
* FUNC and FUNC_PROTO.
* "type" is a type_id referring to another type.
*/
union {
@ -51,6 +54,7 @@ struct btf_type {
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_INFO_KFLAG(info) ((info) >> 31)
#define BTF_KIND_UNKN 0 /* Unknown */
#define BTF_KIND_INT 1 /* Integer */
@ -64,8 +68,10 @@ struct btf_type {
#define BTF_KIND_VOLATILE 9 /* Volatile */
#define BTF_KIND_CONST 10 /* Const */
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_MAX 11
#define NR_BTF_KINDS 12
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_MAX 13
#define NR_BTF_KINDS 14
/* For some specific BTF_KIND, "struct btf_type" is immediately
* followed by extra data.
@ -107,7 +113,29 @@ struct btf_array {
struct btf_member {
__u32 name_off;
__u32 type;
__u32 offset; /* offset in bits */
/* If the type info kind_flag is set, the btf_member offset
* contains both member bitfield size and bit offset. The
* bitfield size is set for bitfield members. If the type
* info kind_flag is not set, the offset contains only bit
* offset.
*/
__u32 offset;
};
/* If the struct/union type info kind_flag is set, the
* following two macros are used to access bitfield_size
* and bit_offset from btf_member.offset.
*/
#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24)
#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff)
/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
* The exact number of btf_param is stored in the vlen (of the
* info in "struct btf_type").
*/
struct btf_param {
__u32 name_off;
__u32 type;
};
#endif /* __LINUX_BTF_H__ */

View File

@ -163,6 +163,11 @@ enum devlink_param_cmode {
DEVLINK_PARAM_CMODE_MAX = __DEVLINK_PARAM_CMODE_MAX - 1
};
enum devlink_param_fw_load_policy_value {
DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER,
DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
};
enum devlink_attr {
/* don't change the order or add anything between, this is ABI! */
DEVLINK_ATTR_UNSPEC,

View File

@ -292,4 +292,25 @@ struct br_mcast_stats {
__u64 mcast_bytes[BR_MCAST_DIR_SIZE];
__u64 mcast_packets[BR_MCAST_DIR_SIZE];
};
/* bridge boolean options
* BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets
*
* IMPORTANT: if adding a new option do not forget to handle
* it in br_boolopt_toggle/get and bridge sysfs
*/
enum br_boolopt_id {
BR_BOOLOPT_NO_LL_LEARN,
BR_BOOLOPT_MAX
};
/* struct br_boolopt_multi - change multiple bridge boolean options
*
* @optval: new option values (bit per option)
* @optmask: options to change (bit per option)
*/
struct br_boolopt_multi {
__u32 optval;
__u32 optmask;
};
#endif /* _LINUX_IF_BRIDGE_H */

View File

@ -286,6 +286,7 @@ enum {
IFLA_BR_MCAST_IGMP_VERSION,
IFLA_BR_MCAST_MLD_VERSION,
IFLA_BR_VLAN_STATS_PER_PORT,
IFLA_BR_MULTI_BOOLOPT,
__IFLA_BR_MAX,
};
@ -531,6 +532,7 @@ enum {
IFLA_VXLAN_LABEL,
IFLA_VXLAN_GPE,
IFLA_VXLAN_TTL_INHERIT,
IFLA_VXLAN_DF,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@ -540,6 +542,14 @@ struct ifla_vxlan_port_range {
__be16 high;
};
enum ifla_vxlan_df {
VXLAN_DF_UNSET = 0,
VXLAN_DF_SET,
VXLAN_DF_INHERIT,
__VXLAN_DF_END,
VXLAN_DF_MAX = __VXLAN_DF_END - 1,
};
/* GENEVE section */
enum {
IFLA_GENEVE_UNSPEC,
@ -555,10 +565,19 @@ enum {
IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
IFLA_GENEVE_LABEL,
IFLA_GENEVE_TTL_INHERIT,
IFLA_GENEVE_DF,
__IFLA_GENEVE_MAX
};
#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
enum ifla_geneve_df {
GENEVE_DF_UNSET = 0,
GENEVE_DF_SET,
GENEVE_DF_INHERIT,
__GENEVE_DF_END,
GENEVE_DF_MAX = __GENEVE_DF_END - 1,
};
/* PPP section */
enum {
IFLA_PPP_UNSPEC,

View File

@ -59,6 +59,7 @@
#define TUNGETVNETBE _IOR('T', 223, int)
#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
#define TUNSETFILTEREBPF _IOR('T', 225, int)
#define TUNSETCARRIER _IOW('T', 226, int)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001

View File

@ -28,6 +28,7 @@ enum {
NDA_MASTER,
NDA_LINK_NETNSID,
NDA_SRC_VNI,
NDA_PROTOCOL, /* Originator of entry */
__NDA_MAX
};

View File

@ -16,6 +16,8 @@ enum {
NETNSA_NSID,
NETNSA_PID,
NETNSA_FD,
NETNSA_TARGET_NSID,
NETNSA_CURRENT_NSID,
__NETNSA_MAX,
};

View File

@ -33,10 +33,6 @@
#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
/* only for userspace compatibility */
/* Generic cache responses from hook functions.
<= 0x2000 is used for protocol-flags. */
#define NFC_UNKNOWN 0x4000
#define NFC_ALTERED 0x8000
/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
#define NF_VERDICT_BITS 16

View File

@ -13,8 +13,9 @@
#include <linux/types.h>
/* The protocol version */
#define IPSET_PROTOCOL 6
/* The protocol versions */
#define IPSET_PROTOCOL 7
#define IPSET_PROTOCOL_MIN 6
/* The max length of strings including NUL: set and type identifiers */
#define IPSET_MAXNAMELEN 32
@ -38,17 +39,19 @@ enum ipset_cmd {
IPSET_CMD_TEST, /* 11: Test an element in a set */
IPSET_CMD_HEADER, /* 12: Get set header data only */
IPSET_CMD_TYPE, /* 13: Get set type */
IPSET_CMD_GET_BYNAME, /* 14: Get set index by name */
IPSET_CMD_GET_BYINDEX, /* 15: Get set name by index */
IPSET_MSG_MAX, /* Netlink message commands */
/* Commands in userspace: */
IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */
IPSET_CMD_HELP, /* 15: Get help */
IPSET_CMD_VERSION, /* 16: Get program version */
IPSET_CMD_QUIT, /* 17: Quit from interactive mode */
IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */
IPSET_CMD_HELP, /* 17: Get help */
IPSET_CMD_VERSION, /* 18: Get program version */
IPSET_CMD_QUIT, /* 19: Quit from interactive mode */
IPSET_CMD_MAX,
IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */
IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */
};
/* Attributes at command level */
@ -66,6 +69,7 @@ enum {
IPSET_ATTR_LINENO, /* 9: Restore lineno */
IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
IPSET_ATTR_INDEX, /* 11: Kernel index of set */
__IPSET_ATTR_CMD_MAX,
};
#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1)
@ -223,6 +227,7 @@ enum ipset_adt {
/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t
* and IPSET_INVALID_ID if you want to increase the max number of sets.
* Also, IPSET_ATTR_INDEX must be changed.
*/
typedef __u16 ip_set_id_t;

View File

@ -12,34 +12,6 @@
#include <limits.h> /* for INT_MIN, INT_MAX */
/* IP Cache bits. */
/* Src IP address. */
#define NFC_IP_SRC 0x0001
/* Dest IP address. */
#define NFC_IP_DST 0x0002
/* Input device. */
#define NFC_IP_IF_IN 0x0004
/* Output device. */
#define NFC_IP_IF_OUT 0x0008
/* TOS. */
#define NFC_IP_TOS 0x0010
/* Protocol. */
#define NFC_IP_PROTO 0x0020
/* IP options. */
#define NFC_IP_OPTIONS 0x0040
/* Frag & flags. */
#define NFC_IP_FRAG 0x0080
/* Per-protocol information: only matters if proto match. */
/* TCP flags. */
#define NFC_IP_TCPFLAGS 0x0100
/* Source port. */
#define NFC_IP_SRC_PT 0x0200
/* Dest port. */
#define NFC_IP_DST_PT 0x0400
/* Something else about the proto */
#define NFC_IP_PROTO_UNKNOWN 0x2000
/* IP Hooks */
/* After promisc drops, checksum checks. */
#define NF_IP_PRE_ROUTING 0

View File

@ -15,35 +15,6 @@
#include <limits.h> /* for INT_MIN, INT_MAX */
/* IP Cache bits. */
/* Src IP address. */
#define NFC_IP6_SRC 0x0001
/* Dest IP address. */
#define NFC_IP6_DST 0x0002
/* Input device. */
#define NFC_IP6_IF_IN 0x0004
/* Output device. */
#define NFC_IP6_IF_OUT 0x0008
/* TOS. */
#define NFC_IP6_TOS 0x0010
/* Protocol. */
#define NFC_IP6_PROTO 0x0020
/* IP options. */
#define NFC_IP6_OPTIONS 0x0040
/* Frag & flags. */
#define NFC_IP6_FRAG 0x0080
/* Per-protocol information: only matters if proto match. */
/* TCP flags. */
#define NFC_IP6_TCPFLAGS 0x0100
/* Source port. */
#define NFC_IP6_SRC_PT 0x0200
/* Dest port. */
#define NFC_IP6_DST_PT 0x0400
/* Something else about the proto */
#define NFC_IP6_PROTO_UNKNOWN 0x2000
/* IP6 Hooks */
/* After promisc drops, checksum checks. */
#define NF_IP6_PRE_ROUTING 0

View File

@ -485,6 +485,11 @@ enum {
TCA_FLOWER_IN_HW_COUNT,
TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */
TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */
TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */
TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */
__TCA_FLOWER_MAX,
};
@ -518,6 +523,8 @@ enum {
TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
};
#define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */
/* Match-all classifier */
enum {

View File

@ -291,11 +291,38 @@ enum {
TCA_GRED_DPS,
TCA_GRED_MAX_P,
TCA_GRED_LIMIT,
TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */
__TCA_GRED_MAX,
};
#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
enum {
TCA_GRED_VQ_ENTRY_UNSPEC,
TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */
__TCA_GRED_VQ_ENTRY_MAX,
};
#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1)
enum {
TCA_GRED_VQ_UNSPEC,
TCA_GRED_VQ_PAD,
TCA_GRED_VQ_DP, /* u32 */
TCA_GRED_VQ_STAT_BYTES, /* u64 */
TCA_GRED_VQ_STAT_PACKETS, /* u32 */
TCA_GRED_VQ_STAT_BACKLOG, /* u32 */
TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */
TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */
TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */
TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */
TCA_GRED_VQ_STAT_PDROP, /* u32 */
TCA_GRED_VQ_STAT_OTHER, /* u32 */
TCA_GRED_VQ_FLAGS, /* u32 */
__TCA_GRED_VQ_MAX
};
#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1)
struct tc_gred_qopt {
__u32 limit; /* HARD maximal queue length (bytes) */
__u32 qth_min; /* Min average length threshold (bytes) */
@ -864,6 +891,8 @@ enum {
TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */
__TCA_FQ_MAX
};
@ -882,6 +911,7 @@ struct tc_fq_qd_stats {
__u32 inactive_flows;
__u32 throttled_flows;
__u32 unthrottle_latency_ns;
__u64 ce_mark; /* packets above ce_threshold */
};
/* Heavy-Hitter Filter */

View File

@ -129,6 +129,7 @@ typedef __s32 sctp_assoc_t;
#define SCTP_STREAM_SCHEDULER_VALUE 124
#define SCTP_INTERLEAVING_SUPPORTED 125
#define SCTP_SENDMSG_CONNECT 126
#define SCTP_EVENT 127
/* PR-SCTP policies */
#define SCTP_PR_SCTP_NONE 0x0000
@ -632,7 +633,9 @@ union sctp_notification {
*/
enum sctp_sn_type {
SCTP_SN_TYPE_BASE = (1<<15),
SCTP_SN_TYPE_BASE = (1<<15),
SCTP_DATA_IO_EVENT = SCTP_SN_TYPE_BASE,
#define SCTP_DATA_IO_EVENT SCTP_DATA_IO_EVENT
SCTP_ASSOC_CHANGE,
#define SCTP_ASSOC_CHANGE SCTP_ASSOC_CHANGE
SCTP_PEER_ADDR_CHANGE,
@ -657,6 +660,8 @@ enum sctp_sn_type {
#define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT
SCTP_STREAM_CHANGE_EVENT,
#define SCTP_STREAM_CHANGE_EVENT SCTP_STREAM_CHANGE_EVENT
SCTP_SN_TYPE_MAX = SCTP_STREAM_CHANGE_EVENT,
#define SCTP_SN_TYPE_MAX SCTP_SN_TYPE_MAX
};
/* Notification error codes used to fill up the error fields in some
@ -1144,6 +1149,12 @@ struct sctp_add_streams {
uint16_t sas_outstrms;
};
struct sctp_event {
sctp_assoc_t se_assoc_id;
uint16_t se_type;
uint8_t se_on;
};
/* SCTP Stream schedulers */
enum sctp_sched_type {
SCTP_SS_FCFS,

View File

@ -243,6 +243,7 @@ enum
LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */
LINUX_MIB_TCPRETRANSFAIL, /* TCPRetransFail */
LINUX_MIB_TCPRCVCOALESCE, /* TCPRcvCoalesce */
LINUX_MIB_TCPBACKLOGCOALESCE, /* TCPBacklogCoalesce */
LINUX_MIB_TCPOFOQUEUE, /* TCPOFOQueue */
LINUX_MIB_TCPOFODROP, /* TCPOFODrop */
LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */

View File

@ -266,6 +266,7 @@ enum {
TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */
TCP_NLA_DSACK_DUPS, /* DSACK blocks received */
TCP_NLA_REORD_SEEN, /* reordering events seen */
TCP_NLA_SRTT, /* smoothed RTT in usecs */
};
/* for TCP_MD5SIG socket option */

View File

@ -116,13 +116,6 @@ struct dn_naddr
unsigned char a_addr[DN_MAXADDL];
};
#define IPX_NODE_LEN 6
struct ipx_addr {
u_int32_t ipx_net;
u_int8_t ipx_node[IPX_NODE_LEN];
};
#ifndef AF_MPLS
# define AF_MPLS 28
#endif
@ -201,12 +194,6 @@ int matches(const char *arg, const char *pattern);
int inet_addr_match(const inet_prefix *a, const inet_prefix *b, int bits);
int inet_addr_match_rta(const inet_prefix *m, const struct rtattr *rta);
const char *dnet_ntop(int af, const void *addr, char *str, size_t len);
int dnet_pton(int af, const char *src, void *addr);
const char *ipx_ntop(int af, const void *addr, char *str, size_t len);
int ipx_pton(int af, const char *src, void *addr);
const char *mpls_ntop(int af, const void *addr, char *str, size_t len);
int mpls_pton(int af, const char *src, void *addr, size_t alen);

View File

@ -53,7 +53,7 @@ static void usage(void)
" vrf | sr }\n"
" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n"
" -h[uman-readable] | -iec | -j[son] | -p[retty] |\n"
" -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n"
" -f[amily] { inet | inet6 | mpls | bridge | link } |\n"
" -4 | -6 | -I | -D | -M | -B | -0 |\n"
" -l[oops] { maximum-addr-flush-attempts } | -br[ief] |\n"
" -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n"
@ -225,8 +225,6 @@ int main(int argc, char **argv)
preferred_family = AF_INET6;
} else if (strcmp(opt, "-0") == 0) {
preferred_family = AF_PACKET;
} else if (strcmp(opt, "-I") == 0) {
preferred_family = AF_IPX;
} else if (strcmp(opt, "-D") == 0) {
preferred_family = AF_DECnet;
} else if (strcmp(opt, "-M") == 0) {
@ -310,6 +308,8 @@ int main(int argc, char **argv)
if (rtnl_open(&rth, 0) < 0)
exit(1);
rtnl_set_strict_dump(&rth);
if (strlen(basename) > 2)
return do_cmd(basename+2, argc, argv);

View File

@ -84,8 +84,7 @@ int do_seg6(int argc, char **argv);
int iplink_get(char *name, __u32 filt_mask);
int iplink_ifla_xstats(int argc, char **argv);
int ip_linkaddr_list(int family, req_filter_fn_t filter_fn,
struct nlmsg_chain *linfo, struct nlmsg_chain *ainfo);
int ip_link_list(req_filter_fn_t filter_fn, struct nlmsg_chain *linfo);
void free_nlmsg_chain(struct nlmsg_chain *info);
static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)

View File

@ -1679,6 +1679,15 @@ static void ipaddr_filter(struct nlmsg_chain *linfo, struct nlmsg_chain *ainfo)
}
}
static int ipaddr_dump_filter(struct nlmsghdr *nlh, int reqlen)
{
struct ifaddrmsg *ifa = NLMSG_DATA(nlh);
ifa->ifa_index = filter.ifindex;
return 0;
}
static int ipaddr_flush(void)
{
int round = 0;
@ -1689,7 +1698,8 @@ static int ipaddr_flush(void)
filter.flushe = sizeof(flushb);
while ((max_flush_loops == 0) || (round < max_flush_loops)) {
if (rtnl_addrdump_req(&rth, filter.family) < 0) {
if (rtnl_addrdump_req(&rth, filter.family,
ipaddr_dump_filter) < 0) {
perror("Cannot send dump request");
exit(1);
}
@ -1762,12 +1772,41 @@ static int iplink_filter_req(struct nlmsghdr *nlh, int reqlen)
return 0;
}
static int ipaddr_link_get(int index, struct nlmsg_chain *linfo)
{
struct iplink_req req = {
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.n.nlmsg_flags = NLM_F_REQUEST,
.n.nlmsg_type = RTM_GETLINK,
.i.ifi_family = filter.family,
.i.ifi_index = index,
};
__u32 filt_mask = RTEXT_FILTER_VF;
struct nlmsghdr *answer;
if (!show_stats)
filt_mask |= RTEXT_FILTER_SKIP_STATS;
addattr32(&req.n, sizeof(req), IFLA_EXT_MASK, filt_mask);
if (rtnl_talk(&rth, &req.n, &answer) < 0) {
perror("Cannot send link request");
return 1;
}
if (store_nlmsg(answer, linfo) < 0) {
fprintf(stderr, "Failed to process link information\n");
return 1;
}
return 0;
}
/* fills in linfo with link data and optionally ainfo with address info
* caller can walk lists as desired and must call free_nlmsg_chain for
* both when done
*/
int ip_linkaddr_list(int family, req_filter_fn_t filter_fn,
struct nlmsg_chain *linfo, struct nlmsg_chain *ainfo)
int ip_link_list(req_filter_fn_t filter_fn, struct nlmsg_chain *linfo)
{
if (rtnl_linkdump_req_filter_fn(&rth, preferred_family,
filter_fn) < 0) {
@ -1780,16 +1819,19 @@ int ip_linkaddr_list(int family, req_filter_fn_t filter_fn,
return 1;
}
if (ainfo) {
if (rtnl_addrdump_req(&rth, family) < 0) {
perror("Cannot send dump request");
return 1;
}
return 0;
}
if (rtnl_dump_filter(&rth, store_nlmsg, ainfo) < 0) {
fprintf(stderr, "Dump terminated\n");
return 1;
}
static int ip_addr_list(struct nlmsg_chain *ainfo)
{
if (rtnl_addrdump_req(&rth, filter.family, ipaddr_dump_filter) < 0) {
perror("Cannot send dump request");
return 1;
}
if (rtnl_dump_filter(&rth, store_nlmsg, ainfo) < 0) {
fprintf(stderr, "Dump terminated\n");
return 1;
}
return 0;
@ -1798,7 +1840,7 @@ int ip_linkaddr_list(int family, req_filter_fn_t filter_fn,
static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
{
struct nlmsg_chain linfo = { NULL, NULL};
struct nlmsg_chain _ainfo = { NULL, NULL}, *ainfo = NULL;
struct nlmsg_chain _ainfo = { NULL, NULL}, *ainfo = &_ainfo;
struct nlmsg_list *l;
char *filter_dev = NULL;
int no_link = 0;
@ -1906,7 +1948,8 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
if (ipadd_save_prep())
exit(1);
if (rtnl_addrdump_req(&rth, preferred_family) < 0) {
if (rtnl_addrdump_req(&rth, preferred_family,
ipaddr_dump_filter) < 0) {
perror("Cannot send dump request");
exit(1);
}
@ -1940,19 +1983,23 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
goto out;
}
if (filter.family != AF_PACKET) {
ainfo = &_ainfo;
if (filter.oneline)
no_link = 1;
if (filter.ifindex) {
if (ipaddr_link_get(filter.ifindex, &linfo) != 0)
goto out;
} else {
if (ip_link_list(iplink_filter_req, &linfo) != 0)
goto out;
}
if (ip_linkaddr_list(filter.family, iplink_filter_req,
&linfo, ainfo) != 0)
goto out;
if (filter.family != AF_PACKET) {
if (filter.oneline)
no_link = 1;
if (ip_addr_list(ainfo) != 0)
goto out;
if (filter.family != AF_PACKET)
ipaddr_filter(&linfo, ainfo);
}
for (l = linfo.head; l; l = l->next) {
struct nlmsghdr *n = &l->h;
@ -1971,8 +2018,7 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
fflush(stdout);
out:
if (ainfo)
free_nlmsg_chain(ainfo);
free_nlmsg_chain(ainfo);
free_nlmsg_chain(&linfo);
delete_json_obj();
return 0;

View File

@ -24,6 +24,7 @@ static void print_explain(FILE *f)
" remote ADDR\n"
" [ ttl TTL ]\n"
" [ tos TOS ]\n"
" [ df DF ]\n"
" [ flowlabel LABEL ]\n"
" [ dstport PORT ]\n"
" [ [no]external ]\n"
@ -35,6 +36,7 @@ static void print_explain(FILE *f)
" ADDR := IP_ADDRESS\n"
" TOS := { NUMBER | inherit }\n"
" TTL := { 1..255 | auto | inherit }\n"
" DF := { unset | set | inherit }\n"
" LABEL := 0-1048575\n"
);
}
@ -115,6 +117,22 @@ static int geneve_parse_opt(struct link_util *lu, int argc, char **argv,
tos = uval;
} else
tos = 1;
} else if (!matches(*argv, "df")) {
enum ifla_geneve_df df;
NEXT_ARG();
check_duparg(&attrs, IFLA_GENEVE_DF, "df", *argv);
if (strcmp(*argv, "unset") == 0)
df = GENEVE_DF_UNSET;
else if (strcmp(*argv, "set") == 0)
df = GENEVE_DF_SET;
else if (strcmp(*argv, "inherit") == 0)
df = GENEVE_DF_INHERIT;
else
invarg("DF must be 'unset', 'set' or 'inherit'",
*argv);
addattr8(n, 1024, IFLA_GENEVE_DF, df);
} else if (!matches(*argv, "label") ||
!matches(*argv, "flowlabel")) {
__u32 uval;
@ -287,6 +305,17 @@ static void geneve_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
print_string(PRINT_FP, NULL, "tos %s ", "inherit");
}
if (tb[IFLA_GENEVE_DF]) {
enum ifla_geneve_df df = rta_getattr_u8(tb[IFLA_GENEVE_DF]);
if (df == GENEVE_DF_UNSET)
print_string(PRINT_JSON, "df", "df %s ", "unset");
else if (df == GENEVE_DF_SET)
print_string(PRINT_ANY, "df", "df %s ", "set");
else if (df == GENEVE_DF_INHERIT)
print_string(PRINT_ANY, "df", "df %s ", "inherit");
}
if (tb[IFLA_GENEVE_LABEL]) {
__u32 label = rta_getattr_u32(tb[IFLA_GENEVE_LABEL]);

View File

@ -31,6 +31,7 @@ static void print_explain(FILE *f)
" [ local ADDR ]\n"
" [ ttl TTL ]\n"
" [ tos TOS ]\n"
" [ df DF ]\n"
" [ flowlabel LABEL ]\n"
" [ dev PHYS_DEV ]\n"
" [ dstport PORT ]\n"
@ -52,6 +53,7 @@ static void print_explain(FILE *f)
" ADDR := { IP_ADDRESS | any }\n"
" TOS := { NUMBER | inherit }\n"
" TTL := { 1..255 | auto | inherit }\n"
" DF := { unset | set | inherit }\n"
" LABEL := 0-1048575\n"
);
}
@ -170,6 +172,22 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
} else
tos = 1;
addattr8(n, 1024, IFLA_VXLAN_TOS, tos);
} else if (!matches(*argv, "df")) {
enum ifla_vxlan_df df;
NEXT_ARG();
check_duparg(&attrs, IFLA_VXLAN_DF, "df", *argv);
if (strcmp(*argv, "unset") == 0)
df = VXLAN_DF_UNSET;
else if (strcmp(*argv, "set") == 0)
df = VXLAN_DF_SET;
else if (strcmp(*argv, "inherit") == 0)
df = VXLAN_DF_INHERIT;
else
invarg("DF must be 'unset', 'set' or 'inherit'",
*argv);
addattr8(n, 1024, IFLA_VXLAN_DF, df);
} else if (!matches(*argv, "label") ||
!matches(*argv, "flowlabel")) {
__u32 uval;
@ -538,6 +556,17 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
print_string(PRINT_FP, NULL, "ttl %s ", "auto");
}
if (tb[IFLA_VXLAN_DF]) {
enum ifla_vxlan_df df = rta_getattr_u8(tb[IFLA_VXLAN_DF]);
if (df == VXLAN_DF_UNSET)
print_string(PRINT_JSON, "df", "df %s ", "unset");
else if (df == VXLAN_DF_SET)
print_string(PRINT_ANY, "df", "df %s ", "set");
else if (df == VXLAN_DF_INHERIT)
print_string(PRINT_ANY, "df", "df %s ", "inherit");
}
if (tb[IFLA_VXLAN_LABEL]) {
__u32 label = rta_getattr_u32(tb[IFLA_VXLAN_LABEL]);

View File

@ -220,21 +220,36 @@ void ipmroute_reset_filter(int ifindex)
filter.iif = ifindex;
}
static int iproute_dump_filter(struct nlmsghdr *nlh, int reqlen)
{
int err;
if (filter.tb) {
err = addattr32(nlh, reqlen, RTA_TABLE, filter.tb);
if (err)
return err;
}
return 0;
}
static int mroute_list(int argc, char **argv)
{
char *id = NULL;
int family;
int family = preferred_family;
ipmroute_reset_filter(0);
if (preferred_family == AF_UNSPEC)
family = AF_INET;
else
family = AF_INET6;
if (family == AF_INET) {
if (family == AF_INET || family == AF_UNSPEC) {
family = RTNL_FAMILY_IPMR;
filter.af = RTNL_FAMILY_IPMR;
filter.tb = RT_TABLE_DEFAULT; /* for backward compatibility */
} else
} else if (family == AF_INET6) {
family = RTNL_FAMILY_IP6MR;
filter.af = RTNL_FAMILY_IP6MR;
} else {
/* family does not have multicast routing */
return 0;
}
filter.msrc.family = filter.mdst.family = family;
@ -283,7 +298,7 @@ static int mroute_list(int argc, char **argv)
filter.iif = idx;
}
if (rtnl_routedump_req(&rth, filter.af) < 0) {
if (rtnl_routedump_req(&rth, filter.af, iproute_dump_filter) < 0) {
perror("Cannot send dump request");
return 1;
}

View File

@ -40,6 +40,8 @@ static struct
int flushp;
int flushe;
int master;
int protocol;
__u8 ndm_flags;
} filter;
static void usage(void) __attribute__((noreturn));
@ -48,7 +50,7 @@ static void usage(void)
{
fprintf(stderr, "Usage: ip neigh { add | del | change | replace }\n"
" { ADDR [ lladdr LLADDR ] [ nud STATE ] | proxy ADDR } [ dev DEV ]\n");
fprintf(stderr, " [ router ] [ extern_learn ]\n\n");
fprintf(stderr, " [ router ] [ extern_learn ] [ protocol PROTO ]\n\n");
fprintf(stderr, " ip neigh { show | flush } [ proxy ] [ to PREFIX ] [ dev DEV ] [ nud STATE ]\n");
fprintf(stderr, " [ vrf NAME ]\n\n");
fprintf(stderr, "STATE := { permanent | noarp | stale | reachable | none |\n"
@ -148,6 +150,14 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
NEXT_ARG();
dev = *argv;
dev_ok = 1;
} else if (matches(*argv, "protocol") == 0) {
__u32 proto;
NEXT_ARG();
if (rtnl_rtprot_a2n(&proto, *argv))
invarg("\"protocol\" value is invalid\n", *argv);
if (addattr8(&req.n, sizeof(req), NDA_PROTOCOL, proto))
return -1;
} else {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
@ -244,6 +254,7 @@ int print_neigh(struct nlmsghdr *n, void *arg)
int len = n->nlmsg_len;
struct rtattr *tb[NDA_MAX+1];
static int logit = 1;
__u8 protocol = 0;
if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH &&
n->nlmsg_type != RTM_GETNEIGH) {
@ -285,6 +296,12 @@ int print_neigh(struct nlmsghdr *n, void *arg)
if (inet_addr_match_rta(&filter.pfx, tb[NDA_DST]))
return 0;
if (tb[NDA_PROTOCOL])
protocol = rta_getattr_u8(tb[NDA_PROTOCOL]);
if (filter.protocol && filter.protocol != protocol)
return 0;
if (filter.unused_only && tb[NDA_CACHEINFO]) {
struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]);
@ -379,6 +396,13 @@ int print_neigh(struct nlmsghdr *n, void *arg)
if (r->ndm_state)
print_neigh_state(r->ndm_state);
if (protocol) {
SPRINT_BUF(b1);
print_string(PRINT_ANY, "protocol", " proto %s ",
rtnl_rtprot_n2a(protocol, b1, sizeof(b1)));
}
print_string(PRINT_FP, NULL, "\n", "");
close_json_object();
fflush(stdout);
@ -393,16 +417,29 @@ void ipneigh_reset_filter(int ifindex)
filter.index = ifindex;
}
static int ipneigh_dump_filter(struct nlmsghdr *nlh, int reqlen)
{
struct ndmsg *ndm = NLMSG_DATA(nlh);
int err;
ndm->ndm_flags = filter.ndm_flags;
if (filter.index) {
err = addattr32(nlh, reqlen, NDA_IFINDEX, filter.index);
if (err)
return err;
}
if (filter.master) {
err = addattr32(nlh, reqlen, NDA_MASTER, filter.master);
if (err)
return err;
}
return 0;
}
static int do_show_or_flush(int argc, char **argv, int flush)
{
struct {
struct nlmsghdr n;
struct ndmsg ndm;
char buf[256];
} req = {
.n.nlmsg_type = RTM_GETNEIGH,
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
};
char *filter_dev = NULL;
int state_given = 0;
@ -433,7 +470,6 @@ static int do_show_or_flush(int argc, char **argv, int flush)
ifindex = ll_name_to_index(*argv);
if (!ifindex)
invarg("Device does not exist\n", *argv);
addattr32(&req.n, sizeof(req), NDA_MASTER, ifindex);
filter.master = ifindex;
} else if (strcmp(*argv, "vrf") == 0) {
int ifindex;
@ -444,7 +480,6 @@ static int do_show_or_flush(int argc, char **argv, int flush)
invarg("Not a valid VRF name\n", *argv);
if (!name_is_vrf(*argv))
invarg("Not a valid VRF name\n", *argv);
addattr32(&req.n, sizeof(req), NDA_MASTER, ifindex);
filter.master = ifindex;
} else if (strcmp(*argv, "unused") == 0) {
filter.unused_only = 1;
@ -466,9 +501,19 @@ static int do_show_or_flush(int argc, char **argv, int flush)
if (state == 0)
state = 0x100;
filter.state |= state;
} else if (strcmp(*argv, "proxy") == 0)
req.ndm.ndm_flags = NTF_PROXY;
else {
} else if (strcmp(*argv, "proxy") == 0) {
filter.ndm_flags = NTF_PROXY;
} else if (matches(*argv, "protocol") == 0) {
__u32 prot;
NEXT_ARG();
if (rtnl_rtprot_a2n(&prot, *argv)) {
if (strcmp(*argv, "all"))
invarg("invalid \"protocol\"\n", *argv);
prot = 0;
}
filter.protocol = prot;
} else {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
}
@ -488,11 +533,8 @@ static int do_show_or_flush(int argc, char **argv, int flush)
filter.index = ll_name_to_index(filter_dev);
if (!filter.index)
return nodev(filter_dev);
addattr32(&req.n, sizeof(req), NDA_IFINDEX, filter.index);
}
req.ndm.ndm_family = filter.family;
if (flush) {
int round = 0;
char flushb[4096-512];
@ -502,7 +544,8 @@ static int do_show_or_flush(int argc, char **argv, int flush)
filter.flushe = sizeof(flushb);
while (round < MAX_ROUNDS) {
if (rtnl_dump_request_n(&rth, &req.n) < 0) {
if (rtnl_neighdump_req(&rth, filter.family,
ipneigh_dump_filter) < 0) {
perror("Cannot send dump request");
exit(1);
}
@ -535,7 +578,7 @@ static int do_show_or_flush(int argc, char **argv, int flush)
return 1;
}
if (rtnl_dump_request_n(&rth, &req.n) < 0) {
if (rtnl_neighdump_req(&rth, filter.family, ipneigh_dump_filter) < 0) {
perror("Cannot send dump request");
exit(1);
}

View File

@ -83,7 +83,7 @@ static void usage(void)
"INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n"
"NH := [ encap ENCAPTYPE ENCAPHDR ] [ via [ FAMILY ] ADDRESS ]\n"
" [ dev STRING ] [ weight NUMBER ] NHFLAGS\n"
"FAMILY := [ inet | inet6 | ipx | dnet | mpls | bridge | link ]\n"
"FAMILY := [ inet | inet6 | mpls | bridge | link ]\n"
"OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n"
" [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n"
" [ window NUMBER ] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n"
@ -1535,24 +1535,6 @@ static int iproute_modify(int cmd, unsigned int flags, int argc, char **argv)
return 0;
}
static int rtnl_rtcache_request(struct rtnl_handle *rth, int family)
{
struct {
struct nlmsghdr nlh;
struct rtmsg rtm;
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_type = RTM_GETROUTE,
.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
.rtm.rtm_family = family,
.rtm.rtm_flags = RTM_F_CLONED,
};
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
return sendto(rth->fd, (void *)&req, sizeof(req), 0, (struct sockaddr *)&nladdr, sizeof(nladdr));
}
static int iproute_flush_cache(void)
{
#define ROUTE_FLUSH_PATH "/proc/sys/net/ipv4/route/flush"
@ -1622,7 +1604,7 @@ static int save_route_prep(void)
return 0;
}
static int iproute_flush(int do_ipv6, rtnl_filter_t filter_fn)
static int iproute_flush(int family, rtnl_filter_t filter_fn)
{
time_t start = time(0);
char flushb[4096-512];
@ -1630,12 +1612,12 @@ static int iproute_flush(int do_ipv6, rtnl_filter_t filter_fn)
int ret;
if (filter.cloned) {
if (do_ipv6 != AF_INET6) {
if (family != AF_INET6) {
iproute_flush_cache();
if (show_stats)
printf("*** IPv4 routing cache is flushed.\n");
}
if (do_ipv6 == AF_INET)
if (family == AF_INET)
return 0;
}
@ -1644,7 +1626,7 @@ static int iproute_flush(int do_ipv6, rtnl_filter_t filter_fn)
filter.flushe = sizeof(flushb);
for (;;) {
if (rtnl_routedump_req(&rth, do_ipv6) < 0) {
if (rtnl_routedump_req(&rth, family, NULL) < 0) {
perror("Cannot send dump request");
return -2;
}
@ -1656,7 +1638,7 @@ static int iproute_flush(int do_ipv6, rtnl_filter_t filter_fn)
if (filter.flushed == 0) {
if (show_stats) {
if (round == 0 &&
(!filter.cloned || do_ipv6 == AF_INET6))
(!filter.cloned || family == AF_INET6))
printf("Nothing to flush.\n");
else
printf("*** Flush is complete after %d round%s ***\n",
@ -1684,9 +1666,33 @@ static int iproute_flush(int do_ipv6, rtnl_filter_t filter_fn)
}
}
static int iproute_dump_filter(struct nlmsghdr *nlh, int reqlen)
{
struct rtmsg *rtm = NLMSG_DATA(nlh);
int err;
rtm->rtm_protocol = filter.protocol;
if (filter.cloned)
rtm->rtm_flags |= RTM_F_CLONED;
if (filter.tb) {
err = addattr32(nlh, reqlen, RTA_TABLE, filter.tb);
if (err)
return err;
}
if (filter.oif) {
err = addattr32(nlh, reqlen, RTA_OIF, filter.oif);
if (err)
return err;
}
return 0;
}
static int iproute_list_flush_or_save(int argc, char **argv, int action)
{
int do_ipv6 = preferred_family;
int dump_family = preferred_family;
char *id = NULL;
char *od = NULL;
unsigned int mark = 0;
@ -1805,13 +1811,13 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
NEXT_ARG();
family = read_family(*argv);
if (family == AF_UNSPEC)
family = do_ipv6;
family = dump_family;
else
NEXT_ARG();
get_prefix(&filter.rvia, *argv, family);
} else if (strcmp(*argv, "src") == 0) {
NEXT_ARG();
get_prefix(&filter.rprefsrc, *argv, do_ipv6);
get_prefix(&filter.rprefsrc, *argv, dump_family);
} else if (matches(*argv, "realms") == 0) {
__u32 realm;
@ -1831,15 +1837,15 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
NEXT_ARG();
if (matches(*argv, "root") == 0) {
NEXT_ARG();
get_prefix(&filter.rsrc, *argv, do_ipv6);
get_prefix(&filter.rsrc, *argv, dump_family);
} else if (matches(*argv, "match") == 0) {
NEXT_ARG();
get_prefix(&filter.msrc, *argv, do_ipv6);
get_prefix(&filter.msrc, *argv, dump_family);
} else {
if (matches(*argv, "exact") == 0) {
NEXT_ARG();
}
get_prefix(&filter.msrc, *argv, do_ipv6);
get_prefix(&filter.msrc, *argv, dump_family);
filter.rsrc = filter.msrc;
}
} else {
@ -1848,23 +1854,23 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
}
if (matches(*argv, "root") == 0) {
NEXT_ARG();
get_prefix(&filter.rdst, *argv, do_ipv6);
get_prefix(&filter.rdst, *argv, dump_family);
} else if (matches(*argv, "match") == 0) {
NEXT_ARG();
get_prefix(&filter.mdst, *argv, do_ipv6);
get_prefix(&filter.mdst, *argv, dump_family);
} else {
if (matches(*argv, "exact") == 0) {
NEXT_ARG();
}
get_prefix(&filter.mdst, *argv, do_ipv6);
get_prefix(&filter.mdst, *argv, dump_family);
filter.rdst = filter.mdst;
}
}
argc--; argv++;
}
if (do_ipv6 == AF_UNSPEC && filter.tb)
do_ipv6 = AF_INET;
if (dump_family == AF_UNSPEC && filter.tb)
dump_family = AF_INET;
if (id || od) {
int idx;
@ -1887,18 +1893,11 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
filter.mark = mark;
if (action == IPROUTE_FLUSH)
return iproute_flush(do_ipv6, filter_fn);
return iproute_flush(dump_family, filter_fn);
if (!filter.cloned) {
if (rtnl_routedump_req(&rth, do_ipv6) < 0) {
perror("Cannot send dump request");
return -2;
}
} else {
if (rtnl_rtcache_request(&rth, do_ipv6) < 0) {
perror("Cannot send dump request");
return -2;
}
if (rtnl_routedump_req(&rth, dump_family, iproute_dump_filter) < 0) {
perror("Cannot send dump request");
return -2;
}
new_json_obj(json);

View File

@ -79,6 +79,9 @@ static struct
inet_prefix dst;
int protocol;
int protocolmask;
struct fib_rule_port_range sport;
struct fib_rule_port_range dport;
__u8 ipproto;
} filter;
static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb)
@ -175,6 +178,39 @@ static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
return false;
}
if (filter.ipproto) {
__u8 ipproto = 0;
if (tb[FRA_IP_PROTO])
ipproto = rta_getattr_u8(tb[FRA_IP_PROTO]);
if (filter.ipproto != ipproto)
return false;
}
if (filter.sport.start) {
const struct fib_rule_port_range *r;
if (!tb[FRA_SPORT_RANGE])
return false;
r = RTA_DATA(tb[FRA_SPORT_RANGE]);
if (r->start != filter.sport.start ||
r->end != filter.sport.end)
return false;
}
if (filter.dport.start) {
const struct fib_rule_port_range *r;
if (!tb[FRA_DPORT_RANGE])
return false;
r = RTA_DATA(tb[FRA_DPORT_RANGE]);
if (r->start != filter.dport.start ||
r->end != filter.dport.end)
return false;
}
if (filter.tun_id) {
__u64 tun_id = 0;
@ -633,6 +669,36 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action)
filter.protocolmask = 0;
}
filter.protocol = prot;
} else if (strcmp(*argv, "ipproto") == 0) {
int ipproto;
NEXT_ARG();
ipproto = inet_proto_a2n(*argv);
if (ipproto < 0)
invarg("Invalid \"ipproto\" value\n", *argv);
filter.ipproto = ipproto;
} else if (strcmp(*argv, "sport") == 0) {
struct fib_rule_port_range r;
int ret;
NEXT_ARG();
ret = sscanf(*argv, "%hu-%hu", &r.start, &r.end);
if (ret == 1)
r.end = r.start;
else if (ret != 2)
invarg("invalid port range\n", *argv);
filter.sport = r;
} else if (strcmp(*argv, "dport") == 0) {
struct fib_rule_port_range r;
int ret;
NEXT_ARG();
ret = sscanf(*argv, "%hu-%hu", &r.start, &r.end);
if (ret == 1)
r.end = r.start;
else if (ret != 2)
invarg("invalid dport range\n", *argv);
filter.dport = r;
} else{
if (matches(*argv, "dst") == 0 ||
matches(*argv, "to") == 0) {

View File

@ -589,7 +589,7 @@ static int ipvrf_show(int argc, char **argv)
return 0;
}
if (ip_linkaddr_list(0, ipvrf_filter_req, &linfo, NULL) == 0) {
if (ip_link_list(ipvrf_filter_req, &linfo) == 0) {
struct nlmsg_list *l;
unsigned nvrf = 0;
int n;

View File

@ -1,101 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <netinet/in.h>
#include "utils.h"
static __inline__ u_int16_t dn_ntohs(u_int16_t addr)
{
union {
u_int8_t byte[2];
u_int16_t word;
} u;
u.word = addr;
return ((u_int16_t)u.byte[0]) | (((u_int16_t)u.byte[1]) << 8);
}
static __inline__ int do_digit(char *str, u_int16_t *addr, u_int16_t scale, size_t *pos, size_t len, int *started)
{
u_int16_t tmp = *addr / scale;
if (*pos == len)
return 1;
if (((tmp) > 0) || *started || (scale == 1)) {
*str = tmp + '0';
*started = 1;
(*pos)++;
*addr -= (tmp * scale);
}
return 0;
}
static const char *dnet_ntop1(const struct dn_naddr *dna, char *str, size_t len)
{
u_int16_t addr, area;
size_t pos = 0;
int started = 0;
memcpy(&addr, dna->a_addr, sizeof(addr));
addr = dn_ntohs(addr);
area = addr >> 10;
if (dna->a_len != 2)
return NULL;
addr &= 0x03ff;
if (len == 0)
return str;
if (do_digit(str + pos, &area, 10, &pos, len, &started))
return str;
if (do_digit(str + pos, &area, 1, &pos, len, &started))
return str;
if (pos == len)
return str;
*(str + pos) = '.';
pos++;
started = 0;
if (do_digit(str + pos, &addr, 1000, &pos, len, &started))
return str;
if (do_digit(str + pos, &addr, 100, &pos, len, &started))
return str;
if (do_digit(str + pos, &addr, 10, &pos, len, &started))
return str;
if (do_digit(str + pos, &addr, 1, &pos, len, &started))
return str;
if (pos == len)
return str;
*(str + pos) = 0;
return str;
}
const char *dnet_ntop(int af, const void *addr, char *str, size_t len)
{
switch(af) {
case AF_DECnet:
errno = 0;
return dnet_ntop1((struct dn_naddr *)addr, str, len);
default:
errno = EAFNOSUPPORT;
}
return NULL;
}

View File

@ -1,75 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <netinet/in.h>
#include "utils.h"
static __inline__ u_int16_t dn_htons(u_int16_t addr)
{
union {
u_int8_t byte[2];
u_int16_t word;
} u;
u.word = addr;
return ((u_int16_t)u.byte[0]) | (((u_int16_t)u.byte[1]) << 8);
}
static int dnet_num(const char *src, u_int16_t * dst)
{
int rv = 0;
int tmp;
*dst = 0;
while ((tmp = *src++) != 0) {
tmp -= '0';
if ((tmp < 0) || (tmp > 9))
return rv;
rv++;
(*dst) *= 10;
(*dst) += tmp;
}
return rv;
}
static int dnet_pton1(const char *src, struct dn_naddr *dna)
{
u_int16_t addr;
u_int16_t area = 0;
u_int16_t node = 0;
int pos;
pos = dnet_num(src, &area);
if ((pos == 0) || (area > 63) || (*(src + pos) != '.'))
return 0;
pos = dnet_num(src + pos + 1, &node);
if ((pos == 0) || (node > 1023))
return 0;
dna->a_len = 2;
addr = dn_htons((area << 10) | node);
memcpy(dna->a_addr, &addr, sizeof(addr));
return 1;
}
int dnet_pton(int af, const char *src, void *addr)
{
int err;
switch (af) {
case AF_DECnet:
errno = 0;
err = dnet_pton1(src, (struct dn_naddr *)addr);
break;
default:
errno = EAFNOSUPPORT;
err = -1;
}
return err;
}

View File

@ -1,71 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <errno.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include "utils.h"
static __inline__ int do_digit(char *str, u_int32_t addr, u_int32_t scale, size_t *pos, size_t len)
{
u_int32_t tmp = addr >> (scale * 4);
if (*pos == len)
return 1;
tmp &= 0x0f;
if (tmp > 9)
*str = tmp + 'A' - 10;
else
*str = tmp + '0';
(*pos)++;
return 0;
}
static const char *ipx_ntop1(const struct ipx_addr *addr, char *str, size_t len)
{
int i;
size_t pos = 0;
if (len == 0)
return str;
for(i = 7; i >= 0; i--)
if (do_digit(str + pos, ntohl(addr->ipx_net), i, &pos, len))
return str;
if (pos == len)
return str;
*(str + pos) = '.';
pos++;
for(i = 0; i < 6; i++) {
if (do_digit(str + pos, addr->ipx_node[i], 1, &pos, len))
return str;
if (do_digit(str + pos, addr->ipx_node[i], 0, &pos, len))
return str;
}
if (pos == len)
return str;
*(str + pos) = 0;
return str;
}
const char *ipx_ntop(int af, const void *addr, char *str, size_t len)
{
switch(af) {
case AF_IPX:
errno = 0;
return ipx_ntop1((struct ipx_addr *)addr, str, len);
default:
errno = EAFNOSUPPORT;
}
return NULL;
}

View File

@ -1,97 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include "utils.h"
static int ipx_getnet(u_int32_t *net, const char *str)
{
int i;
u_int32_t tmp;
for(i = 0; *str && (i < 8); i++) {
if ((tmp = get_hex(*str)) == -1) {
if (*str == '.')
return 0;
else
return -1;
}
str++;
(*net) <<= 4;
(*net) |= tmp;
}
if (*str == 0)
return 0;
return -1;
}
static int ipx_getnode(u_int8_t *node, const char *str)
{
int i;
u_int32_t tmp;
for(i = 0; i < 6; i++) {
if ((tmp = get_hex(*str++)) == -1)
return -1;
node[i] = (u_int8_t)tmp;
node[i] <<= 4;
if ((tmp = get_hex(*str++)) == -1)
return -1;
node[i] |= (u_int8_t)tmp;
if (*str == ':')
str++;
}
return 0;
}
static int ipx_pton1(const char *src, struct ipx_addr *addr)
{
char *sep = (char *)src;
int no_node = 0;
memset(addr, 0, sizeof(struct ipx_addr));
while(*sep && (*sep != '.'))
sep++;
if (*sep != '.')
no_node = 1;
if (ipx_getnet(&addr->ipx_net, src))
return 0;
addr->ipx_net = htonl(addr->ipx_net);
if (no_node)
return 1;
if (ipx_getnode(addr->ipx_node, sep + 1))
return 0;
return 1;
}
int ipx_pton(int af, const char *src, void *addr)
{
int err;
switch (af) {
case AF_IPX:
errno = 0;
err = ipx_pton1(src, (struct ipx_addr *)addr);
break;
default:
errno = EAFNOSUPPORT;
err = -1;
}
return err;
}

View File

@ -118,6 +118,7 @@ void close_json_array(enum output_type type, const char *str)
}
_PRINT_FUNC(int, int);
_PRINT_FUNC(s64, int64_t);
_PRINT_FUNC(hhu, unsigned char);
_PRINT_FUNC(hu, unsigned short);
_PRINT_FUNC(uint, unsigned int);
_PRINT_FUNC(u64, uint64_t);

View File

@ -211,6 +211,11 @@ void jsonw_float(json_writer_t *self, double num)
jsonw_printf(self, "%g", num);
}
void jsonw_hhu(json_writer_t *self, unsigned char num)
{
jsonw_printf(self, "%hhu", num);
}
void jsonw_hu(json_writer_t *self, unsigned short num)
{
jsonw_printf(self, "%hu", num);
@ -288,6 +293,12 @@ void jsonw_xint_field(json_writer_t *self, const char *prop, uint64_t num)
jsonw_xint(self, num);
}
void jsonw_hhu_field(json_writer_t *self, const char *prop, unsigned char num)
{
jsonw_name(self, prop);
jsonw_hhu(self, num);
}
void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num)
{
jsonw_name(self, prop);

View File

@ -28,6 +28,8 @@
#include "libnetlink.h"
#define __aligned(x) __attribute__((aligned(x)))
#ifndef SOL_NETLINK
#define SOL_NETLINK 270
#endif
@ -67,6 +69,14 @@ static int err_attr_cb(const struct nlattr *attr, void *data)
return MNL_CB_OK;
}
static void print_ext_ack_msg(bool is_err, const char *msg)
{
fprintf(stderr, "%s: %s", is_err ? "Error" : "Warning", msg);
if (msg[strlen(msg) - 1] != '.')
fprintf(stderr, ".");
fprintf(stderr, "\n");
}
/* dump netlink extended ack error message */
int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn)
{
@ -108,12 +118,29 @@ int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn)
if (msg && *msg != '\0') {
bool is_err = !!err->error;
fprintf(stderr, "%s: %s",
is_err ? "Error" : "Warning", msg);
if (msg[strlen(msg) - 1] != '.')
fprintf(stderr, ".");
fprintf(stderr, "\n");
print_ext_ack_msg(is_err, msg);
return is_err ? 1 : 0;
}
return 0;
}
static int nl_dump_ext_ack_done(const struct nlmsghdr *nlh, int error)
{
struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {};
unsigned int hlen = sizeof(int);
const char *msg = NULL;
if (mnl_attr_parse(nlh, hlen, err_attr_cb, tb) != MNL_CB_OK)
return 0;
if (tb[NLMSGERR_ATTR_MSG])
msg = mnl_attr_get_str(tb[NLMSGERR_ATTR_MSG]);
if (msg && *msg != '\0') {
bool is_err = !!error;
print_ext_ack_msg(is_err, msg);
return is_err ? 1 : 0;
}
@ -127,8 +154,25 @@ int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn)
{
return 0;
}
static int nl_dump_ext_ack_done(const struct nlmsghdr *nlh, int error)
{
return 0;
}
#endif
/* Older kernels may not support strict dump and filtering */
void rtnl_set_strict_dump(struct rtnl_handle *rth)
{
int one = 1;
if (setsockopt(rth->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK,
&one, sizeof(one)) < 0)
return;
rth->flags |= RTNL_HANDLE_F_STRICT_CHK;
}
void rtnl_close(struct rtnl_handle *rth)
{
if (rth->fd >= 0) {
@ -202,19 +246,29 @@ int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE);
}
int rtnl_addrdump_req(struct rtnl_handle *rth, int family)
int rtnl_addrdump_req(struct rtnl_handle *rth, int family,
req_filter_fn_t filter_fn)
{
struct {
struct nlmsghdr nlh;
struct ifaddrmsg ifm;
char buf[128];
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
.nlh.nlmsg_type = RTM_GETADDR,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
.ifm.ifa_family = family,
};
if (filter_fn) {
int err;
err = filter_fn(&req.nlh, sizeof(req));
if (err)
return err;
}
return send(rth->fd, &req, sizeof(req), 0);
}
@ -224,7 +278,7 @@ int rtnl_addrlbldump_req(struct rtnl_handle *rth, int family)
struct nlmsghdr nlh;
struct ifaddrlblmsg ifal;
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrlblmsg)),
.nlh.nlmsg_type = RTM_GETADDRLABEL,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
@ -234,19 +288,29 @@ int rtnl_addrlbldump_req(struct rtnl_handle *rth, int family)
return send(rth->fd, &req, sizeof(req), 0);
}
int rtnl_routedump_req(struct rtnl_handle *rth, int family)
int rtnl_routedump_req(struct rtnl_handle *rth, int family,
req_filter_fn_t filter_fn)
{
struct {
struct nlmsghdr nlh;
struct rtmsg rtm;
char buf[128];
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
.nlh.nlmsg_type = RTM_GETROUTE,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
.rtm.rtm_family = family,
};
if (filter_fn) {
int err;
err = filter_fn(&req.nlh, sizeof(req));
if (err)
return err;
}
return send(rth->fd, &req, sizeof(req), 0);
}
@ -256,7 +320,7 @@ int rtnl_ruledump_req(struct rtnl_handle *rth, int family)
struct nlmsghdr nlh;
struct fib_rule_hdr frh;
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
.nlh.nlmsg_type = RTM_GETRULE,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
@ -266,19 +330,29 @@ int rtnl_ruledump_req(struct rtnl_handle *rth, int family)
return send(rth->fd, &req, sizeof(req), 0);
}
int rtnl_neighdump_req(struct rtnl_handle *rth, int family)
int rtnl_neighdump_req(struct rtnl_handle *rth, int family,
req_filter_fn_t filter_fn)
{
struct {
struct nlmsghdr nlh;
struct ndmsg ndm;
char buf[256];
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
.nlh.nlmsg_type = RTM_GETNEIGH,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
.ndm.ndm_family = family,
};
if (filter_fn) {
int err;
err = filter_fn(&req.nlh, sizeof(req));
if (err)
return err;
}
return send(rth->fd, &req, sizeof(req), 0);
}
@ -288,7 +362,7 @@ int rtnl_neightbldump_req(struct rtnl_handle *rth, int family)
struct nlmsghdr nlh;
struct ndtmsg ndtmsg;
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg)),
.nlh.nlmsg_type = RTM_GETNEIGHTBL,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
@ -304,7 +378,7 @@ int rtnl_mdbdump_req(struct rtnl_handle *rth, int family)
struct nlmsghdr nlh;
struct br_port_msg bpm;
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct br_port_msg)),
.nlh.nlmsg_type = RTM_GETMDB,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
@ -319,8 +393,9 @@ int rtnl_netconfdump_req(struct rtnl_handle *rth, int family)
struct {
struct nlmsghdr nlh;
struct netconfmsg ncm;
char buf[0] __aligned(NLMSG_ALIGNTO);
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_len = NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct netconfmsg))),
.nlh.nlmsg_type = RTM_GETNETCONF,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
@ -335,8 +410,9 @@ int rtnl_nsiddump_req(struct rtnl_handle *rth, int family)
struct {
struct nlmsghdr nlh;
struct rtgenmsg rtm;
char buf[0] __aligned(NLMSG_ALIGNTO);
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_len = NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct rtgenmsg))),
.nlh.nlmsg_type = RTM_GETNSID,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
@ -346,41 +422,11 @@ int rtnl_nsiddump_req(struct rtnl_handle *rth, int family)
return send(rth->fd, &req, sizeof(req), 0);
}
int rtnl_linkdump_req(struct rtnl_handle *rth, int family)
{
return rtnl_linkdump_req_filter(rth, family, RTEXT_FILTER_VF);
}
int rtnl_linkdump_req_filter(struct rtnl_handle *rth, int family,
__u32 filt_mask)
static int __rtnl_linkdump_req(struct rtnl_handle *rth, int family)
{
struct {
struct nlmsghdr nlh;
struct ifinfomsg ifm;
/* attribute has to be NLMSG aligned */
struct rtattr ext_req __attribute__ ((aligned(NLMSG_ALIGNTO)));
__u32 ext_filter_mask;
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_type = RTM_GETLINK,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
.ifm.ifi_family = family,
.ext_req.rta_type = IFLA_EXT_MASK,
.ext_req.rta_len = RTA_LENGTH(sizeof(__u32)),
.ext_filter_mask = filt_mask,
};
return send(rth->fd, &req, sizeof(req), 0);
}
int rtnl_linkdump_req_filter_fn(struct rtnl_handle *rth, int family,
req_filter_fn_t filter_fn)
{
struct {
struct nlmsghdr nlh;
struct ifinfomsg ifm;
char buf[1024];
} req = {
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.nlh.nlmsg_type = RTM_GETLINK,
@ -388,16 +434,73 @@ int rtnl_linkdump_req_filter_fn(struct rtnl_handle *rth, int family,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
.ifm.ifi_family = family,
};
int err;
if (!filter_fn)
return -EINVAL;
return send(rth->fd, &req, sizeof(req), 0);
}
err = filter_fn(&req.nlh, sizeof(req));
if (err)
return err;
int rtnl_linkdump_req(struct rtnl_handle *rth, int family)
{
if (family == AF_UNSPEC)
return rtnl_linkdump_req_filter(rth, family, RTEXT_FILTER_VF);
return send(rth->fd, &req, req.nlh.nlmsg_len, 0);
return __rtnl_linkdump_req(rth, family);
}
int rtnl_linkdump_req_filter(struct rtnl_handle *rth, int family,
__u32 filt_mask)
{
if (family == AF_UNSPEC || family == AF_BRIDGE) {
struct {
struct nlmsghdr nlh;
struct ifinfomsg ifm;
/* attribute has to be NLMSG aligned */
struct rtattr ext_req __aligned(NLMSG_ALIGNTO);
__u32 ext_filter_mask;
} req = {
.nlh.nlmsg_len = sizeof(req),
.nlh.nlmsg_type = RTM_GETLINK,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
.ifm.ifi_family = family,
.ext_req.rta_type = IFLA_EXT_MASK,
.ext_req.rta_len = RTA_LENGTH(sizeof(__u32)),
.ext_filter_mask = filt_mask,
};
return send(rth->fd, &req, sizeof(req), 0);
}
return __rtnl_linkdump_req(rth, family);
}
int rtnl_linkdump_req_filter_fn(struct rtnl_handle *rth, int family,
req_filter_fn_t filter_fn)
{
if (family == AF_UNSPEC) {
struct {
struct nlmsghdr nlh;
struct ifinfomsg ifm;
char buf[1024];
} req = {
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.nlh.nlmsg_type = RTM_GETLINK,
.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
.nlh.nlmsg_seq = rth->dump = ++rth->seq,
.ifm.ifi_family = family,
};
int err;
if (!filter_fn)
return -EINVAL;
err = filter_fn(&req.nlh, sizeof(req));
if (err)
return err;
return send(rth->fd, &req, req.nlh.nlmsg_len, 0);
}
return __rtnl_linkdump_req(rth, family);
}
int rtnl_statsdump_req_filter(struct rtnl_handle *rth, int fam, __u32 filt_mask)
@ -512,6 +615,10 @@ static int rtnl_dump_done(struct nlmsghdr *h)
}
if (len < 0) {
/* check for any messages returned from kernel */
if (nl_dump_ext_ack_done(h, len))
return len;
errno = -len;
switch (errno) {
case ENOENT:

View File

@ -600,18 +600,6 @@ static int __get_addr_1(inet_prefix *addr, const char *name, int family)
return 0;
}
if (family == AF_DECnet) {
struct dn_naddr dna;
addr->family = AF_DECnet;
if (dnet_pton(AF_DECnet, name, &dna) <= 0)
return -1;
memcpy(addr->data, dna.a_addr, 2);
addr->bytelen = 2;
addr->bitlen = -1;
return 0;
}
if (family == AF_MPLS) {
unsigned int maxlabels;
int i;
@ -1000,15 +988,6 @@ const char *rt_addr_n2a_r(int af, int len,
return inet_ntop(af, addr, buf, buflen);
case AF_MPLS:
return mpls_ntop(af, addr, buf, buflen);
case AF_IPX:
return ipx_ntop(af, addr, buf, buflen);
case AF_DECnet:
{
struct dn_naddr dna = { 2, { 0, 0, } };
memcpy(dna.a_addr, addr, 2);
return dnet_ntop(af, &dna, buf, buflen);
}
case AF_PACKET:
return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen);
case AF_BRIDGE:
@ -1050,8 +1029,6 @@ int read_family(const char *name)
family = AF_INET;
else if (strcmp(name, "inet6") == 0)
family = AF_INET6;
else if (strcmp(name, "dnet") == 0)
family = AF_DECnet;
else if (strcmp(name, "link") == 0)
family = AF_PACKET;
else if (strcmp(name, "ipx") == 0)
@ -1069,8 +1046,6 @@ const char *family_name(int family)
return "inet";
if (family == AF_INET6)
return "inet6";
if (family == AF_DECnet)
return "dnet";
if (family == AF_PACKET)
return "link";
if (family == AF_IPX)

View File

@ -496,6 +496,8 @@ the following additional arguments are supported:
] [
.BI tos " TOS "
] [
.BI df " DF "
] [
.BI flowlabel " FLOWLABEL "
] [
.BI dstport " PORT "
@ -565,6 +567,18 @@ parameter.
.BI tos " TOS"
- specifies the TOS value to use in outgoing packets.
.sp
.BI df " DF"
- specifies the usage of the Don't Fragment flag (DF) bit in outgoing packets
with IPv4 headers. The value
.B inherit
causes the bit to be copied from the original IP header. The values
.B unset
and
.B set
cause the bit to be always unset or always set, respectively. By default, the
bit is not set.
.sp
.BI flowlabel " FLOWLABEL"
- specifies the flow label to use in outgoing packets.
@ -1166,6 +1180,8 @@ the following additional arguments are supported:
] [
.BI tos " TOS "
] [
.BI df " DF "
] [
.BI flowlabel " FLOWLABEL "
] [
.BI dstport " PORT"
@ -1198,6 +1214,18 @@ ttl. Default option is "0".
.BI tos " TOS"
- specifies the TOS value to use in outgoing packets.
.sp
.BI df " DF"
- specifies the usage of the Don't Fragment flag (DF) bit in outgoing packets
with IPv4 headers. The value
.B inherit
causes the bit to be copied from the original IP header. The values
.B unset
and
.B set
cause the bit to be always unset or always set, respectively. By default, the
bit is not set.
.sp
.BI flowlabel " FLOWLABEL"
- specifies the flow label to use in outgoing packets.

View File

@ -107,7 +107,7 @@ replace " } "
.ti -8
.IR FAMILY " := [ "
.BR inet " | " inet6 " | " ipx " | " dnet " | " mpls " | " bridge " | " link " ]"
.BR inet " | " inet6 " | " mpls " | " bridge " | " link " ]"
.ti -8
.IR OPTIONS " := " FLAGS " [ "

View File

@ -34,7 +34,7 @@ ip \- show / manipulate routing, network devices, interfaces and tunnels
\fB\-r\fR[\fIesolve\fR] |
\fB\-iec\fR |
\fB\-f\fR[\fIamily\fR] {
.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | "
.BR inet " | " inet6 " | " link " } | "
\fB-4\fR |
\fB-6\fR |
\fB-I\fR |
@ -94,7 +94,7 @@ Zero (0) means loop until all addresses are removed.
.TP
.BR "\-f" , " \-family " <FAMILY>
Specifies the protocol family to use. The protocol family identifier can be one of
.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet" , " mpls"
.BR "inet" , " inet6" , " bridge" , " mpls"
or
.BR link .
If this option is not present,
@ -125,16 +125,6 @@ shortcut for
shortcut for
.BR "\-family bridge" .
.TP
.B \-D
shortcut for
.BR "\-family decnet" .
.TP
.B \-I
shortcut for
.BR "\-family ipx" .
.TP
.B \-M
shortcut for

View File

@ -1,6 +1,6 @@
.TH RDMA\-DEV 8 "06 Jul 2017" "iproute2" "Linux"
.SH NAME
rdmak-dev \- RDMA device configuration
rdma-dev \- RDMA device configuration
.SH SYNOPSIS
.sp
.ad l
@ -22,10 +22,18 @@ rdmak-dev \- RDMA device configuration
.B rdma dev show
.RI "[ " DEV " ]"
.ti -8
.B rdma dev set
.RI "[ " DEV " ]"
.BR name
.BR NEWNAME
.ti -8
.B rdma dev help
.SH "DESCRIPTION"
.SS rdma dev set - rename rdma device
.SS rdma dev show - display rdma device attributes
.PP
@ -45,6 +53,11 @@ rdma dev show mlx5_3
Shows the state of specified RDMA device.
.RE
.PP
rdma dev set mlx5_3 name rdma_0
.RS 4
Renames the mlx5_3 device to rdma_0.
.RE
.PP
.SH SEE ALSO
.BR rdma (8),

View File

@ -56,8 +56,9 @@ flower \- flow based traffic control filter
.IR MASKED_IP_TTL " | { "
.BR dst_ip " | " src_ip " } "
.IR PREFIX " | { "
.BR dst_port " | " src_port " } "
.IR port_number " } | "
.BR dst_port " | " src_port " } { "
.IR port_number " | "
.IR min_port_number-max_port_number " } | "
.B tcp_flags
.IR MASKED_TCP_FLAGS " | "
.B type
@ -220,10 +221,12 @@ must be a valid IPv4 or IPv6 address, depending on the \fBprotocol\fR
option to tc filter, optionally followed by a slash and the prefix length.
If the prefix is missing, \fBtc\fR assumes a full-length host match.
.TP
.BI dst_port " NUMBER"
.IR \fBdst_port " { " NUMBER " | " " MIN_VALUE-MAX_VALUE " }
.TQ
.BI src_port " NUMBER"
Match on layer 4 protocol source or destination port number. Only available for
.IR \fBsrc_port " { " NUMBER " | " " MIN_VALUE-MAX_VALUE " }
Match on layer 4 protocol source or destination port number. Alternatively, the
mininum and maximum values can be specified to match on a range of layer 4
protocol source or destination port numbers. Only available for
.BR ip_proto " values " udp ", " tcp " and " sctp
which have to be specified in beforehand.
.TP

View File

@ -15,23 +15,28 @@ BYTES ] [
.B maxrate
RATE ] [
.B buckets
NUMBER ] [
NUMBER ] [
.B orphan_mask
NUMBER ] [
.B pacing
|
.B nopacing
]
] [
.B ce_threshold
TIME ]
.SH DESCRIPTION
FQ (Fair Queue) is a classless packet scheduler meant to be mostly
used for locally generated traffic. It is designed to achieve per flow pacing.
FQ does flow separation, and is able to respect pacing requirements set by TCP stack.
All packets belonging to a socket are considered as a 'flow'.
For non local packets (router workload), packet rxhash is used as fallback.
For non local packets (router workload), packet hash is used as fallback.
An application can specify a maximum pacing rate using the
.B SO_MAX_PACING_RATE
setsockopt call. This packet scheduler adds delay between packets to
respect rate limitation set by TCP stack.
respect rate limitation set on each socket. Note that after linux-4.20, linux adopted EDT (Earliest Departure Time)
and TCP directly sets the appropriate Departure Time for each skb.
Dequeueing happens in a round-robin fashion.
A special FIFO queue is reserved for high priority packets (
@ -72,18 +77,28 @@ is ignored only if it is larger than this value.
The size of the hash table used for flow lookups. Each bucket is assigned a
red-black tree for efficient collision sorting.
Default: 1024.
.SS orphan_mask
For packets not owned by a socket, fq is able to mask a part of skb->hash
and reduce number of buckets associated with the traffic. This is a DDOS
prevention mechanism, and the default is 1023 (meaning no more than 1024 flows
are allocated for these packets)
.SS [no]pacing
Enable or disable flow pacing. Default is enabled.
.SS ce_threshold
sets a threshold above which all packets are marked with ECN Congestion
Experienced. This is useful for DCTCP-style congestion control algorithms that
require marking at very shallow queueing thresholds.
.SH EXAMPLES
#tc qdisc add dev eth0 root fq
#tc qdisc add dev eth0 root est 1sec 4sec fq ce_threshold 4ms
.br
#tc -s -d qdisc
#tc -s -d qdisc sh dev eth0
.br
qdisc fq 8003: dev eth0 root refcnt 2 limit 10000p flow_limit 100p buckets 1024 quantum 3028 initial_quantum 15140
Sent 503727981 bytes 1146972 pkt (dropped 0, overlimits 0 requeues 54452)
backlog 0b 0p requeues 54452
1289 flows (1289 inactive, 0 throttled)
0 gc, 31 highprio, 27411 throttled
qdisc fq 800e: root refcnt 9 limit 10000p flow_limit 1000p buckets 1024 orphan_mask 1023 quantum 3028 initial_quantum 15140 low_rate_threshold 550Kbit refill_delay 40.0ms ce_threshold 4.0ms
Sent 533368436185 bytes 352296695 pkt (dropped 0, overlimits 0 requeues 1339864)
rate 39220Mbit 3238202pps backlog 12417828b 358p requeues 1339864
1052 flows (852 inactive, 0 throttled)
112 gc, 0 highprio, 212 throttled, 21501 ns latency, 470241 ce_mark
.br
.SH SEE ALSO
.BR tc (8),

View File

@ -424,7 +424,7 @@ static int do_one_request(struct nlmsghdr *n)
static void load_initial_table(void)
{
if (rtnl_neighdump_req(&rth, AF_INET) < 0) {
if (rtnl_neighdump_req(&rth, AF_INET, NULL) < 0) {
perror("dump request failed");
exit(1);
}

View File

@ -1,4 +1,4 @@
Notes about distribution tables from Nistnet
Notes about distribution tables from Nistnet
-------------------------------------------------------------------------------
I. About the distribution tables

View File

@ -14,6 +14,7 @@
static int dev_help(struct rd *rd)
{
pr_out("Usage: %s dev show [DEV]\n", rd->filename);
pr_out(" %s dev set [DEV] name DEVNAME\n", rd->filename);
return 0;
}
@ -258,17 +259,51 @@ static int dev_one_show(struct rd *rd)
return rd_exec_cmd(rd, cmds, "parameter");
}
static int dev_set_name(struct rd *rd)
{
uint32_t seq;
if (rd_no_arg(rd)) {
pr_err("Please provide device new name.\n");
return -EINVAL;
}
rd_prepare_msg(rd, RDMA_NLDEV_CMD_SET,
&seq, (NLM_F_REQUEST | NLM_F_ACK));
mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
return rd_send_msg(rd);
}
static int dev_one_set(struct rd *rd)
{
const struct rd_cmd cmds[] = {
{ NULL, dev_help},
{ "name", dev_set_name},
{ 0 }
};
return rd_exec_cmd(rd, cmds, "parameter");
}
static int dev_show(struct rd *rd)
{
return rd_exec_dev(rd, dev_one_show);
}
static int dev_set(struct rd *rd)
{
return rd_exec_require_dev(rd, dev_one_set);
}
int cmd_dev(struct rd *rd)
{
const struct rd_cmd cmds[] = {
{ NULL, dev_show },
{ "show", dev_show },
{ "list", dev_show },
{ "set", dev_set },
{ "help", dev_help },
{ 0 }
};

View File

@ -19,7 +19,7 @@ static int link_help(struct rd *rd)
static const char *caps_to_str(uint32_t idx)
{
#define RDMA_PORT_FLAGS(x) \
#define RDMA_PORT_FLAGS_LOW(x) \
x(RESERVED, 0) \
x(SM, 1) \
x(NOTICE, 2) \
@ -53,13 +53,39 @@ static const char *caps_to_str(uint32_t idx)
x(MULT_FDB, 30) \
x(HIERARCHY_INFO, 31)
enum { RDMA_PORT_FLAGS(RDMA_BITMAP_ENUM) };
#define RDMA_PORT_FLAGS_HIGH(x) \
x(SET_NODE_DESC, 0) \
x(EXT_INFO, 1) \
x(VIRT, 2) \
x(SWITCH_POR_STATE_TABLE, 3) \
x(LINK_WIDTH_2X, 4) \
x(LINK_SPEED_HDR, 5)
/*
* Separation below is needed to allow compilation of rdmatool
* on 32bits systems. On such systems, C-enum is limited to be
* int and can't hold more than 32 bits.
*/
enum { RDMA_PORT_FLAGS_LOW(RDMA_BITMAP_ENUM) };
enum { RDMA_PORT_FLAGS_HIGH(RDMA_BITMAP_ENUM) };
static const char * const
rdma_port_names[] = { RDMA_PORT_FLAGS(RDMA_BITMAP_NAMES) };
#undef RDMA_PORT_FLAGS
rdma_port_names_low[] = { RDMA_PORT_FLAGS_LOW(RDMA_BITMAP_NAMES) };
static const char * const
rdma_port_names_high[] = { RDMA_PORT_FLAGS_HIGH(RDMA_BITMAP_NAMES) };
uint32_t high_idx;
#undef RDMA_PORT_FLAGS_LOW
#undef RDMA_PORT_FLAGS_HIGH
return rdma_port_names[idx];
if (idx < ARRAY_SIZE(rdma_port_names_low) && rdma_port_names_low[idx])
return rdma_port_names_low[idx];
high_idx = idx - ARRAY_SIZE(rdma_port_names_low);
if (high_idx < ARRAY_SIZE(rdma_port_names_high) &&
rdma_port_names_high[high_idx])
return rdma_port_names_high[high_idx];
return "UNKNOWN";
}
static void link_print_caps(struct rd *rd, struct nlattr **tb)

View File

@ -74,6 +74,13 @@ struct rd_cmd {
int (*func)(struct rd *rd);
};
/*
* Parser interface
*/
bool rd_no_arg(struct rd *rd);
void rd_arg_inc(struct rd *rd);
char *rd_argv(struct rd *rd);
/*
* Commands interface
@ -83,11 +90,14 @@ int cmd_link(struct rd *rd);
int cmd_res(struct rd *rd);
int rd_exec_cmd(struct rd *rd, const struct rd_cmd *c, const char *str);
int rd_exec_dev(struct rd *rd, int (*cb)(struct rd *rd));
int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd));
int rd_exec_link(struct rd *rd, int (*cb)(struct rd *rd), bool strict_port);
void rd_free(struct rd *rd);
int rd_set_arg_to_devname(struct rd *rd);
int rd_argc(struct rd *rd);
int strcmpx(const char *str1, const char *str2);
/*
* Device manipulation
*/
@ -108,12 +118,14 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
int rd_attr_cb(const struct nlattr *attr, void *data);
int rd_attr_check(const struct nlattr *attr, int *typep);
/*
* Print helpers
*/
void print_driver_table(struct rd *rd, struct nlattr *tb);
void newline(struct rd *rd);
void newline_indent(struct rd *rd);
#define MAX_LINE_LENGTH 80
#endif /* _RDMA_TOOL_H_ */

View File

@ -18,14 +18,14 @@ int rd_argc(struct rd *rd)
return rd->argc;
}
static char *rd_argv(struct rd *rd)
char *rd_argv(struct rd *rd)
{
if (!rd_argc(rd))
return NULL;
return *rd->argv;
}
static int strcmpx(const char *str1, const char *str2)
int strcmpx(const char *str1, const char *str2)
{
if (strlen(str1) > strlen(str2))
return -1;
@ -39,7 +39,7 @@ static bool rd_argv_match(struct rd *rd, const char *pattern)
return strcmpx(rd_argv(rd), pattern) == 0;
}
static void rd_arg_inc(struct rd *rd)
void rd_arg_inc(struct rd *rd)
{
if (!rd_argc(rd))
return;
@ -47,7 +47,7 @@ static void rd_arg_inc(struct rd *rd)
rd->argv++;
}
static bool rd_no_arg(struct rd *rd)
bool rd_no_arg(struct rd *rd)
{
return rd_argc(rd) == 0;
}
@ -404,7 +404,7 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DRIVER_U64] = MNL_TYPE_U64,
};
static int rd_attr_check(const struct nlattr *attr, int *typep)
int rd_attr_check(const struct nlattr *attr, int *typep)
{
int type;
@ -577,6 +577,16 @@ out:
return ret;
}
int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd))
{
if (rd_no_arg(rd)) {
pr_err("Please provide device name.\n");
return -EINVAL;
}
return rd_exec_dev(rd, cb);
}
int rd_exec_cmd(struct rd *rd, const struct rd_cmd *cmds, const char *str)
{
const struct rd_cmd *c;
@ -696,7 +706,7 @@ void newline(struct rd *rd)
pr_out("\n");
}
static void newline_indent(struct rd *rd)
void newline_indent(struct rd *rd)
{
newline(rd);
if (!rd->json_output)

View File

@ -473,24 +473,57 @@ static int flower_port_attr_type(__u8 ip_proto, enum flower_endpoint endpoint)
return -1;
}
static int flower_port_range_attr_type(__u8 ip_proto, enum flower_endpoint type,
__be16 *min_port_type,
__be16 *max_port_type)
{
if (ip_proto == IPPROTO_TCP || ip_proto == IPPROTO_UDP ||
ip_proto == IPPROTO_SCTP) {
if (type == FLOWER_ENDPOINT_SRC) {
*min_port_type = TCA_FLOWER_KEY_PORT_SRC_MIN;
*max_port_type = TCA_FLOWER_KEY_PORT_SRC_MAX;
} else {
*min_port_type = TCA_FLOWER_KEY_PORT_DST_MIN;
*max_port_type = TCA_FLOWER_KEY_PORT_DST_MAX;
}
} else {
return -1;
}
return 0;
}
static int flower_parse_port(char *str, __u8 ip_proto,
enum flower_endpoint endpoint,
struct nlmsghdr *n)
{
__u16 min, max;
int ret;
int type;
__be16 port;
type = flower_port_attr_type(ip_proto, endpoint);
if (type < 0)
ret = sscanf(str, "%hu-%hu", &min, &max);
if (ret == 1) {
int type;
type = flower_port_attr_type(ip_proto, endpoint);
if (type < 0)
return -1;
addattr16(n, MAX_MSG, type, htons(min));
} else if (ret == 2) {
__be16 min_port_type, max_port_type;
if (max <= min) {
fprintf(stderr, "max value should be greater than min value\n");
return -1;
}
if (flower_port_range_attr_type(ip_proto, endpoint,
&min_port_type, &max_port_type))
return -1;
addattr16(n, MAX_MSG, min_port_type, htons(min));
addattr16(n, MAX_MSG, max_port_type, htons(max));
} else {
return -1;
ret = get_be16(&port, str, 10);
if (ret)
return -1;
addattr16(n, MAX_MSG, type, port);
}
return 0;
}
@ -1490,6 +1523,29 @@ static void flower_print_port(char *name, struct rtattr *attr)
print_hu(PRINT_ANY, name, namefrm, rta_getattr_be16(attr));
}
static void flower_print_port_range(char *name, struct rtattr *min_attr,
struct rtattr *max_attr)
{
if (!min_attr || !max_attr)
return;
if (is_json_context()) {
open_json_object(name);
print_hu(PRINT_JSON, "start", NULL, rta_getattr_be16(min_attr));
print_hu(PRINT_JSON, "end", NULL, rta_getattr_be16(max_attr));
close_json_object();
} else {
SPRINT_BUF(namefrm);
SPRINT_BUF(out);
size_t done;
done = sprintf(out, "%u", rta_getattr_be16(min_attr));
sprintf(out + done, "-%u", rta_getattr_be16(max_attr));
sprintf(namefrm, "\n %s %%s", name);
print_string(PRINT_ANY, name, namefrm, out);
}
}
static void flower_print_tcp_flags(const char *name, struct rtattr *flags_attr,
struct rtattr *mask_attr)
{
@ -1678,6 +1734,7 @@ static int flower_print_opt(struct filter_util *qu, FILE *f,
struct rtattr *opt, __u32 handle)
{
struct rtattr *tb[TCA_FLOWER_MAX + 1];
__be16 min_port_type, max_port_type;
int nl_type, nl_mask_type;
__be16 eth_type = 0;
__u8 ip_proto = 0xff;
@ -1796,6 +1853,16 @@ static int flower_print_opt(struct filter_util *qu, FILE *f,
if (nl_type >= 0)
flower_print_port("src_port", tb[nl_type]);
if (!flower_port_range_attr_type(ip_proto, FLOWER_ENDPOINT_DST,
&min_port_type, &max_port_type))
flower_print_port_range("dst_port",
tb[min_port_type], tb[max_port_type]);
if (!flower_port_range_attr_type(ip_proto, FLOWER_ENDPOINT_SRC,
&min_port_type, &max_port_type))
flower_print_port_range("src_port",
tb[min_port_type], tb[max_port_type]);
flower_print_tcp_flags("tcp_flags", tb[TCA_FLOWER_KEY_TCP_FLAGS],
tb[TCA_FLOWER_KEY_TCP_FLAGS_MASK]);

View File

@ -188,8 +188,7 @@ static int choke_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
fprintf(f, "limit %up min %up max %up ",
qopt->limit, qopt->qth_min, qopt->qth_max);
if (qopt->flags & TC_RED_ECN)
fprintf(f, "ecn ");
tc_red_print_flags(qopt->flags);
if (show_details) {
fprintf(f, "ewma %u ", qopt->Wlog);

View File

@ -56,6 +56,7 @@ static void explain(void)
fprintf(stderr, " [ [no]pacing ] [ refill_delay TIME ]\n");
fprintf(stderr, " [ low_rate_threshold RATE ]\n");
fprintf(stderr, " [ orphan_mask MASK]\n");
fprintf(stderr, " [ ce_threshold TIME ]\n");
}
static unsigned int ilog2(unsigned int val)
@ -83,6 +84,7 @@ static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv,
unsigned int defrate;
unsigned int refill_delay;
unsigned int orphan_mask;
unsigned int ce_threshold;
bool set_plimit = false;
bool set_flow_plimit = false;
bool set_quantum = false;
@ -92,6 +94,7 @@ static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv,
bool set_refill_delay = false;
bool set_orphan_mask = false;
bool set_low_rate_threshold = false;
bool set_ce_threshold = false;
int pacing = -1;
struct rtattr *tail;
@ -135,6 +138,13 @@ static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv,
return -1;
}
set_low_rate_threshold = true;
} else if (strcmp(*argv, "ce_threshold") == 0) {
NEXT_ARG();
if (get_time(&ce_threshold, *argv)) {
fprintf(stderr, "Illegal \"ce_threshold\"\n");
return -1;
}
set_ce_threshold = true;
} else if (strcmp(*argv, "defrate") == 0) {
NEXT_ARG();
if (strchr(*argv, '%')) {
@ -226,6 +236,9 @@ static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv,
if (set_orphan_mask)
addattr_l(n, 1024, TCA_FQ_ORPHAN_MASK,
&orphan_mask, sizeof(refill_delay));
if (set_ce_threshold)
addattr_l(n, 1024, TCA_FQ_CE_THRESHOLD,
&ce_threshold, sizeof(ce_threshold));
addattr_nest_end(n, tail);
return 0;
}
@ -239,6 +252,7 @@ static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
unsigned int rate, quantum;
unsigned int refill_delay;
unsigned int orphan_mask;
unsigned int ce_threshold;
SPRINT_BUF(b1);
@ -310,21 +324,28 @@ static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
fprintf(f, "refill_delay %s ", sprint_time(refill_delay, b1));
}
if (tb[TCA_FQ_CE_THRESHOLD] &&
RTA_PAYLOAD(tb[TCA_FQ_CE_THRESHOLD]) >= sizeof(__u32)) {
ce_threshold = rta_getattr_u32(tb[TCA_FQ_CE_THRESHOLD]);
if (ce_threshold != ~0U)
fprintf(f, "ce_threshold %s ", sprint_time(ce_threshold, b1));
}
return 0;
}
static int fq_print_xstats(struct qdisc_util *qu, FILE *f,
struct rtattr *xstats)
{
struct tc_fq_qd_stats *st;
struct tc_fq_qd_stats *st, _st;
if (xstats == NULL)
return 0;
if (RTA_PAYLOAD(xstats) < sizeof(*st))
return -1;
memset(&_st, 0, sizeof(_st));
memcpy(&_st, RTA_DATA(xstats), min(RTA_PAYLOAD(xstats), sizeof(*st)));
st = RTA_DATA(xstats);
st = &_st;
fprintf(f, " %u flows (%u inactive, %u throttled)",
st->flows, st->inactive_flows, st->throttled_flows);
@ -343,6 +364,9 @@ static int fq_print_xstats(struct qdisc_util *qu, FILE *f,
if (st->unthrottle_latency_ns)
fprintf(f, ", %u ns latency", st->unthrottle_latency_ns);
if (st->ce_mark)
fprintf(f, ", %llu ce_mark", st->ce_mark);
if (st->flows_plimit)
fprintf(f, ", %llu flows_plimit", st->flows_plimit);

View File

@ -37,10 +37,10 @@
static void explain(void)
{
fprintf(stderr, "Usage: tc qdisc { add | replace | change } ... gred setup vqs NUMBER\n");
fprintf(stderr, " default DEFAULT_VQ [ grio ] [ limit BYTES ]\n");
fprintf(stderr, " default DEFAULT_VQ [ grio ] [ limit BYTES ] [ecn] [harddrop]\n");
fprintf(stderr, " tc qdisc change ... gred vq VQ [ prio VALUE ] limit BYTES\n");
fprintf(stderr, " min BYTES max BYTES avpkt BYTES [ burst PACKETS ]\n");
fprintf(stderr, " [ probability PROBABILITY ] [ bandwidth KBPS ]\n");
fprintf(stderr, " [ probability PROBABILITY ] [ bandwidth KBPS ] [ecn] [harddrop]\n");
}
static int init_gred(struct qdisc_util *qu, int argc, char **argv,
@ -87,6 +87,10 @@ static int init_gred(struct qdisc_util *qu, int argc, char **argv,
fprintf(stderr, "Illegal \"limit\"\n");
return -1;
}
} else if (strcmp(*argv, "ecn") == 0) {
opt.flags |= TC_RED_ECN;
} else if (strcmp(*argv, "harddrop") == 0) {
opt.flags |= TC_RED_HARDDROP;
} else if (strcmp(*argv, "help") == 0) {
explain();
return -1;
@ -117,15 +121,16 @@ static int init_gred(struct qdisc_util *qu, int argc, char **argv,
*/
static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n, const char *dev)
{
struct rtattr *tail, *entry, *vqs;
int ok = 0;
struct tc_gred_qopt opt = { 0 };
unsigned int burst = 0;
unsigned int avpkt = 0;
unsigned int flags = 0;
double probability = 0.02;
unsigned int rate = 0;
int parm;
__u8 sbuf[256];
struct rtattr *tail;
__u32 max_P;
opt.DP = MAX_DPs;
@ -208,6 +213,10 @@ static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct n
return -1;
}
ok++;
} else if (strcmp(*argv, "ecn") == 0) {
flags |= TC_RED_ECN;
} else if (strcmp(*argv, "harddrop") == 0) {
flags |= TC_RED_HARDDROP;
} else if (strcmp(*argv, "help") == 0) {
explain();
return -1;
@ -261,22 +270,167 @@ static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct n
addattr_l(n, 1024, TCA_GRED_STAB, sbuf, 256);
max_P = probability * pow(2, 32);
addattr32(n, 1024, TCA_GRED_MAX_P, max_P);
vqs = addattr_nest(n, 1024, TCA_GRED_VQ_LIST);
entry = addattr_nest(n, 1024, TCA_GRED_VQ_ENTRY);
addattr32(n, 1024, TCA_GRED_VQ_DP, opt.DP);
addattr32(n, 1024, TCA_GRED_VQ_FLAGS, flags);
addattr_nest_end(n, entry);
addattr_nest_end(n, vqs);
addattr_nest_end(n, tail);
return 0;
}
struct tc_gred_info {
bool flags_present;
__u64 bytes;
__u32 packets;
__u32 backlog;
__u32 prob_drop;
__u32 prob_mark;
__u32 forced_drop;
__u32 forced_mark;
__u32 pdrop;
__u32 other;
__u32 flags;
};
static void
gred_parse_vqs(struct tc_gred_info *info, struct rtattr *vqs)
{
int rem = RTA_PAYLOAD(vqs);
unsigned int offset = 0;
while (rem > offset) {
struct rtattr *tb_entry[TCA_GRED_VQ_ENTRY_MAX + 1] = {};
struct rtattr *tb[TCA_GRED_VQ_MAX + 1] = {};
struct rtattr *entry;
unsigned int len;
unsigned int dp;
entry = RTA_DATA(vqs) + offset;
parse_rtattr(tb_entry, TCA_GRED_VQ_ENTRY_MAX, entry,
rem - offset);
len = RTA_LENGTH(RTA_PAYLOAD(entry));
offset += len;
if (!tb_entry[TCA_GRED_VQ_ENTRY]) {
fprintf(stderr,
"ERROR: Failed to parse Virtual Queue entry\n");
continue;
}
parse_rtattr_nested(tb, TCA_GRED_VQ_MAX,
tb_entry[TCA_GRED_VQ_ENTRY]);
if (!tb[TCA_GRED_VQ_DP]) {
fprintf(stderr,
"ERROR: Virtual Queue without DP attribute\n");
continue;
}
dp = rta_getattr_u32(tb[TCA_GRED_VQ_DP]);
if (tb[TCA_GRED_VQ_STAT_BYTES])
info[dp].bytes =
rta_getattr_u32(tb[TCA_GRED_VQ_STAT_BYTES]);
if (tb[TCA_GRED_VQ_STAT_PACKETS])
info[dp].packets =
rta_getattr_u32(tb[TCA_GRED_VQ_STAT_PACKETS]);
if (tb[TCA_GRED_VQ_STAT_BACKLOG])
info[dp].backlog =
rta_getattr_u32(tb[TCA_GRED_VQ_STAT_BACKLOG]);
if (tb[TCA_GRED_VQ_STAT_PROB_DROP])
info[dp].prob_drop =
rta_getattr_u32(tb[TCA_GRED_VQ_STAT_PROB_DROP]);
if (tb[TCA_GRED_VQ_STAT_PROB_MARK])
info[dp].prob_mark =
rta_getattr_u32(tb[TCA_GRED_VQ_STAT_PROB_MARK]);
if (tb[TCA_GRED_VQ_STAT_FORCED_DROP])
info[dp].forced_drop =
rta_getattr_u32(tb[TCA_GRED_VQ_STAT_FORCED_DROP]);
if (tb[TCA_GRED_VQ_STAT_FORCED_MARK])
info[dp].forced_mark =
rta_getattr_u32(tb[TCA_GRED_VQ_STAT_FORCED_MARK]);
if (tb[TCA_GRED_VQ_STAT_PDROP])
info[dp].pdrop =
rta_getattr_u32(tb[TCA_GRED_VQ_STAT_PDROP]);
if (tb[TCA_GRED_VQ_STAT_OTHER])
info[dp].other =
rta_getattr_u32(tb[TCA_GRED_VQ_STAT_OTHER]);
info[dp].flags_present = !!tb[TCA_GRED_VQ_FLAGS];
if (tb[TCA_GRED_VQ_FLAGS])
info[dp].flags =
rta_getattr_u32(tb[TCA_GRED_VQ_FLAGS]);
}
}
static void
gred_print_stats(struct tc_gred_info *info, struct tc_gred_qopt *qopt)
{
__u64 bytes = info ? info->bytes : qopt->bytesin;
SPRINT_BUF(b1);
if (!is_json_context())
printf("\n Queue size: ");
print_uint(PRINT_JSON, "qave", NULL, qopt->qave);
print_string(PRINT_FP, NULL, "average %s ",
sprint_size(qopt->qave, b1));
print_uint(PRINT_JSON, "backlog", NULL, qopt->backlog);
print_string(PRINT_FP, NULL, "current %s ",
sprint_size(qopt->backlog, b1));
if (!is_json_context())
printf("\n Dropped packets: ");
if (info) {
print_uint(PRINT_ANY, "forced_drop", "forced %u ",
info->forced_drop);
print_uint(PRINT_ANY, "prob_drop", "early %u ",
info->prob_drop);
print_uint(PRINT_ANY, "pdrop", "pdrop %u ", info->pdrop);
print_uint(PRINT_ANY, "other", "other %u ", info->other);
if (!is_json_context())
printf("\n Marked packets: ");
print_uint(PRINT_ANY, "forced_mark", "forced %u ",
info->forced_mark);
print_uint(PRINT_ANY, "prob_mark", "early %u ",
info->prob_mark);
} else {
print_uint(PRINT_ANY, "forced_drop", "forced %u ",
qopt->forced);
print_uint(PRINT_ANY, "prob_drop", "early %u ", qopt->early);
print_uint(PRINT_ANY, "pdrop", "pdrop %u ", qopt->pdrop);
print_uint(PRINT_ANY, "other", "other %u ", qopt->other);
}
if (!is_json_context())
printf("\n Total packets: ");
print_uint(PRINT_ANY, "packets", "%u ", qopt->packets);
print_uint(PRINT_JSON, "bytes", NULL, bytes);
print_string(PRINT_FP, NULL, "(%s) ", sprint_size(bytes, b1));
}
static int gred_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
{
struct tc_gred_info infos[MAX_DPs] = {};
struct rtattr *tb[TCA_GRED_MAX + 1];
struct tc_gred_sopt *sopt;
struct tc_gred_qopt *qopt;
bool vq_info = false;
__u32 *max_p = NULL;
__u32 *limit = NULL;
unsigned int i;
SPRINT_BUF(b1);
SPRINT_BUF(b2);
SPRINT_BUF(b3);
if (opt == NULL)
return 0;
@ -302,47 +456,69 @@ static int gred_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
return -1;
}
/* Bad hack! should really return a proper message as shown above*/
fprintf(f, "vqs %u default %u %s",
sopt->DPs,
sopt->def_DP,
sopt->grio ? "grio " : "");
if (limit)
fprintf(f, "limit %s ",
sprint_size(*limit, b1));
for (i = 0; i < MAX_DPs; i++, qopt++) {
if (qopt->DP >= MAX_DPs) continue;
fprintf(f, "\n vq %u prio %hhu limit %s min %s max %s ",
qopt->DP,
qopt->prio,
sprint_size(qopt->limit, b1),
sprint_size(qopt->qth_min, b2),
sprint_size(qopt->qth_max, b3));
if (show_details) {
fprintf(f, "ewma %u ", qopt->Wlog);
if (max_p)
fprintf(f, "probability %lg ", max_p[i] / pow(2, 32));
else
fprintf(f, "Plog %u ", qopt->Plog);
fprintf(f, "Scell_log %u ", qopt->Scell_log);
}
if (show_stats) {
fprintf(f, "\n Queue size: average %s current %s ",
sprint_size(qopt->qave, b1),
sprint_size(qopt->backlog, b2));
fprintf(f, "\n Dropped packets: forced %u early %u pdrop %u other %u ",
qopt->forced,
qopt->early,
qopt->pdrop,
qopt->other);
fprintf(f, "\n Total packets: %u (%s) ",
qopt->packets,
sprint_size(qopt->bytesin, b1));
}
if (tb[TCA_GRED_VQ_LIST]) {
gred_parse_vqs(infos, tb[TCA_GRED_VQ_LIST]);
vq_info = true;
}
print_uint(PRINT_ANY, "dp_cnt", "vqs %u ", sopt->DPs);
print_uint(PRINT_ANY, "dp_default", "default %u ", sopt->def_DP);
if (sopt->grio)
print_bool(PRINT_ANY, "grio", "grio ", true);
else
print_bool(PRINT_ANY, "grio", NULL, false);
if (limit) {
print_uint(PRINT_JSON, "limit", NULL, *limit);
print_string(PRINT_FP, NULL, "limit %s ",
sprint_size(*limit, b1));
}
tc_red_print_flags(sopt->flags);
open_json_array(PRINT_JSON, "vqs");
for (i = 0; i < MAX_DPs; i++, qopt++) {
if (qopt->DP >= MAX_DPs)
continue;
open_json_object(NULL);
print_uint(PRINT_ANY, "vq", "\n vq %u ", qopt->DP);
print_hhu(PRINT_ANY, "prio", "prio %hhu ", qopt->prio);
print_uint(PRINT_JSON, "limit", NULL, qopt->limit);
print_string(PRINT_FP, NULL, "limit %s ",
sprint_size(qopt->limit, b1));
print_uint(PRINT_JSON, "min", NULL, qopt->qth_min);
print_string(PRINT_FP, NULL, "min %s ",
sprint_size(qopt->qth_min, b1));
print_uint(PRINT_JSON, "max", NULL, qopt->qth_max);
print_string(PRINT_FP, NULL, "max %s ",
sprint_size(qopt->qth_max, b1));
if (infos[i].flags_present)
tc_red_print_flags(infos[i].flags);
if (show_details) {
print_uint(PRINT_ANY, "ewma", "ewma %u ", qopt->Wlog);
if (max_p)
print_float(PRINT_ANY, "probability",
"probability %lg ",
max_p[i] / pow(2, 32));
else
print_uint(PRINT_ANY, "Plog", "Plog %u ",
qopt->Plog);
print_uint(PRINT_ANY, "Scell_log", "Scell_log %u ",
qopt->Scell_log);
}
if (show_stats)
gred_print_stats(vq_info ? &infos[i] : NULL, qopt);
close_json_object();
}
close_json_array(PRINT_JSON, "vqs");
return 0;
}

View File

@ -189,18 +189,8 @@ static int red_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
print_uint(PRINT_JSON, "max", NULL, qopt->qth_max);
print_string(PRINT_FP, NULL, "max %s ", sprint_size(qopt->qth_max, b3));
if (qopt->flags & TC_RED_ECN)
print_bool(PRINT_ANY, "ecn", "ecn ", true);
else
print_bool(PRINT_ANY, "ecn", NULL, false);
if (qopt->flags & TC_RED_HARDDROP)
print_bool(PRINT_ANY, "harddrop", "harddrop ", true);
else
print_bool(PRINT_ANY, "harddrop", NULL, false);
if (qopt->flags & TC_RED_ADAPTATIVE)
print_bool(PRINT_ANY, "adaptive", "adaptive ", true);
else
print_bool(PRINT_ANY, "adaptive", NULL, false);
tc_red_print_flags(qopt->flags);
if (show_details) {
print_uint(PRINT_ANY, "ewma", "ewma %u ", qopt->Wlog);
if (max_P)

View File

@ -235,8 +235,7 @@ static int sfq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
sprint_size(qopt_ext->qth_min, b2),
sprint_size(qopt_ext->qth_max, b3),
qopt_ext->max_P / pow(2, 32));
if (qopt_ext->flags & TC_RED_ECN)
fprintf(f, "ecn ");
tc_red_print_flags(qopt_ext->flags);
if (show_stats) {
fprintf(f, "\n prob_mark %u prob_mark_head %u prob_drop %u",
qopt_ext->stats.prob_mark,

View File

@ -20,7 +20,9 @@
#include <arpa/inet.h>
#include <string.h>
#include "utils.h"
#include "tc_core.h"
#include "tc_util.h"
#include "tc_red.h"
/*
@ -97,3 +99,21 @@ int tc_red_eval_idle_damping(int Wlog, unsigned int avpkt, unsigned int bps, __u
sbuf[255] = 31;
return clog;
}
void tc_red_print_flags(__u32 flags)
{
if (flags & TC_RED_ECN)
print_bool(PRINT_ANY, "ecn", "ecn ", true);
else
print_bool(PRINT_ANY, "ecn", NULL, false);
if (flags & TC_RED_HARDDROP)
print_bool(PRINT_ANY, "harddrop", "harddrop ", true);
else
print_bool(PRINT_ANY, "harddrop", NULL, false);
if (flags & TC_RED_ADAPTATIVE)
print_bool(PRINT_ANY, "adaptive", "adaptive ", true);
else
print_bool(PRINT_ANY, "adaptive", NULL, false);
}

View File

@ -6,5 +6,6 @@ int tc_red_eval_P(unsigned qmin, unsigned qmax, double prob);
int tc_red_eval_ewma(unsigned qmin, unsigned burst, unsigned avpkt);
int tc_red_eval_idle_damping(int wlog, unsigned avpkt, unsigned bandwidth,
__u8 *sbuf);
void tc_red_print_flags(__u32 flags);
#endif

View File

@ -1,73 +0,0 @@
#!/bin/bash
# vim: ft=sh
. lib/generic.sh
QDISCS="cbq htb dsmark"
if [ ! -d tests/cls ]; then
ts_log "tests/cls folder does not exist"
ts_skip
fi
for q in ${QDISCS}; do
ts_log "Preparing classifier testbed with qdisc $q"
for c in tests/cls/*.c; do
case "$q" in
cbq)
ts_tc "cls-testbed" "cbq root qdisc creation" \
qdisc add dev $DEV root handle 10:0 \
cbq bandwidth 100Mbit avpkt 1400 mpu 64
ts_tc "cls-testbed" "cbq root class creation" \
class add dev $DEV parent 10:0 classid 10:12 \
cbq bandwidth 100mbit rate 100mbit allot 1514 prio 3 \
maxburst 1 avpkt 500 bounded
;;
htb)
ts_qdisc_available "htb"
if [ $? -eq 0 ]; then
ts_log "cls-testbed: HTB is unsupported by $TC, skipping"
continue;
fi
ts_tc "cls-testbed" "htb root qdisc creation" \
qdisc add dev $DEV root handle 10:0 htb
ts_tc "cls-testbed" "htb root class creation" \
class add dev $DEV parent 10:0 classid 10:12 \
htb rate 100Mbit quantum 1514
;;
dsmark)
ts_qdisc_available "dsmark"
if [ $? -eq 0 ]; then
ts_log "cls-testbed: dsmark is unsupported by $TC, skipping"
continue;
fi
ts_tc "cls-testbed" "dsmark root qdisc creation" \
qdisc add dev $DEV root handle 20:0 \
dsmark indices 64 default_index 1 set_tc_index
ts_tc "cls-testbed" "dsmark class creation" \
class change dev $DEV parent 20:0 classid 20:12 \
dsmark mask 0xff value 2
ts_tc "cls-testbed" "prio inner qdisc creation" \
qdisc add dev $DEV parent 20:0 handle 10:0 prio
;;
*)
ts_err "cls-testbed: no testbed configuration found for qdisc $q"
continue
;;
esac
ts_tc "cls-testbed" "tree listing" qdisc list dev eth0
ts_tc "cls-testbed" "tree class listing" class list dev eth0
ts_log "cls-testbed: starting classifier test $c"
$c
case "$q" in
*)
ts_tc "cls-testbed" "generic qdisc tree deletion" \
qdisc del dev $DEV root
;;
esac
done
done