Merge branch 'master' into next

Signed-off-by: David Ahern <dsahern@gmail.com>
This commit is contained in:
David Ahern 2020-02-28 22:42:49 +00:00
commit b6de0bf7db
20 changed files with 604 additions and 90 deletions

View File

@ -3,8 +3,8 @@
# Copyright 2016 Quentin Monnet <quentin.monnet@6wind.com>
QDISC_KIND=' choke codel bfifo pfifo pfifo_head_drop fq fq_codel gred hhf \
mqprio multiq netem pfifo_fast pie red rr sfb sfq tbf atm cbq drr \
dsmark hfsc htb prio qfq '
mqprio multiq netem pfifo_fast pie fq_pie red rr sfb sfq tbf atm \
cbq drr dsmark hfsc htb prio qfq '
FILTER_KIND=' basic bpf cgroup flow flower fw route rsvp tcindex u32 matchall '
ACTION_KIND=' gact mirred bpf sample '
@ -326,6 +326,14 @@ _tc_qdisc_options()
_tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
return 0
;;
fq_pie)
_tc_once_attr 'limit flows target tupdate \
alpha beta quantum memory_limit ecn_prob'
_tc_one_of_list 'ecn noecn'
_tc_one_of_list 'bytemode nobytemode'
_tc_one_of_list 'dq_rate_estimator no_dq_rate_estimator'
return 0
;;
red)
_tc_once_attr 'limit min max avpkt burst adaptive probability \
bandwidth ecn harddrop'

View File

@ -3066,11 +3066,13 @@ static int cmd_dev_flash(struct dl *dl)
/* In child, just execute the flash and pass returned
* value through pipe once it is done.
*/
int cc;
close(pipe_r);
err = _mnlg_socket_send(dl->nlg, nlh);
write(pipe_w, &err, sizeof(err));
cc = write(pipe_w, &err, sizeof(err));
close(pipe_w);
exit(0);
exit(cc != sizeof(err));
}
close(pipe_w);

View File

@ -1045,9 +1045,9 @@ union bpf_attr {
* supports redirection to the egress interface, and accepts no
* flag at all.
*
* The same effect can be attained with the more generic
* **bpf_redirect_map**\ (), which requires specific maps to be
* used but offers better performance.
* The same effect can also be attained with the more generic
* **bpf_redirect_map**\ (), which uses a BPF map to store the
* redirect target instead of providing it directly to the helper.
* Return
* For XDP, the helper returns **XDP_REDIRECT** on success or
* **XDP_ABORTED** on error. For other program types, the values
@ -1611,13 +1611,11 @@ union bpf_attr {
* the caller. Any higher bits in the *flags* argument must be
* unset.
*
* When used to redirect packets to net devices, this helper
* provides a high performance increase over **bpf_redirect**\ ().
* This is due to various implementation details of the underlying
* mechanisms, one of which is the fact that **bpf_redirect_map**\
* () tries to send packet as a "bulk" to the device.
* See also bpf_redirect(), which only supports redirecting to an
* ifindex, but doesn't require a map to do so.
* Return
* **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
* **XDP_REDIRECT** on success, or the value of the two lower bits
* of the **flags* argument on error.
*
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description

View File

@ -87,6 +87,7 @@
#define NSFS_MAGIC 0x6e736673
#define BPF_FS_MAGIC 0xcafe4a11
#define AAFS_MAGIC 0x5a3c69f0
#define ZONEFS_MAGIC 0x5a4f4653
/* Since UDF 2.01 is ISO 13346 based... */
#define UDF_SUPER_MAGIC 0x15013346

View File

@ -743,7 +743,7 @@ static void bridge_print_stats_attr(struct rtattr *attr, int ifindex)
print_string(PRINT_FP, NULL, "%-16s ", "");
print_u64(PRINT_ANY, "tx_v1", "TX: v1 %llu ",
mstats->igmp_v1reports[BR_MCAST_DIR_TX]);
print_u64(PRINT_ANY, "tx_v2", "v2 %llu",
print_u64(PRINT_ANY, "tx_v2", "v2 %llu ",
mstats->igmp_v2reports[BR_MCAST_DIR_TX]);
print_u64(PRINT_ANY, "tx_v3", "v3 %llu\n",
mstats->igmp_v3reports[BR_MCAST_DIR_TX]);

View File

@ -933,9 +933,6 @@ int print_route(struct nlmsghdr *n, void *arg)
if (tb[RTA_IIF] && filter.iifmask != -1)
print_rta_if(fp, tb[RTA_IIF], "iif");
if (tb[RTA_MULTIPATH])
print_rta_multipath(fp, r, tb[RTA_MULTIPATH]);
if (tb[RTA_PREF])
print_rt_pref(fp, rta_getattr_u8(tb[RTA_PREF]));
@ -951,6 +948,14 @@ int print_route(struct nlmsghdr *n, void *arg)
propagate ? "enabled" : "disabled");
}
if (tb[RTA_MULTIPATH])
print_rta_multipath(fp, r, tb[RTA_MULTIPATH]);
/* If you are adding new route RTA_XXXX then place it above
* the RTA_MULTIPATH else it will appear that the last nexthop
* in the ECMP has new attributes
*/
print_string(PRINT_FP, NULL, "\n", NULL);
close_json_object();
fflush(fp);

View File

@ -94,7 +94,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
__u8 metadata = 0;
__u32 fwmark = 0;
__u32 erspan_idx = 0;
__u8 erspan_ver = 0;
__u8 erspan_ver = 1;
__u8 erspan_dir = 0;
__u16 erspan_hwid = 0;

View File

@ -106,7 +106,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
__u8 metadata = 0;
__u32 fwmark = 0;
__u32 erspan_idx = 0;
__u8 erspan_ver = 0;
__u8 erspan_ver = 1;
__u8 erspan_dir = 0;
__u16 erspan_hwid = 0;

View File

@ -66,24 +66,9 @@ static void usage(void)
"Usage: ip xfrm policy count\n"
"Usage: ip xfrm policy set [ hthresh4 LBITS RBITS ] [ hthresh6 LBITS RBITS ]\n"
"SELECTOR := [ src ADDR[/PLEN] ] [ dst ADDR[/PLEN] ] [ dev DEV ] [ UPSPEC ]\n"
"UPSPEC := proto { { ");
fprintf(stderr, "%s | %s | %s | %s } ",
strxf_proto(IPPROTO_TCP),
strxf_proto(IPPROTO_UDP),
strxf_proto(IPPROTO_SCTP),
strxf_proto(IPPROTO_DCCP));
fprintf(stderr,
"[ sport PORT ] [ dport PORT ] |\n"
" { %s | %s | %s } ",
strxf_proto(IPPROTO_ICMP),
strxf_proto(IPPROTO_ICMPV6),
strxf_proto(IPPROTO_MH));
fprintf(stderr,
"[ type NUMBER ] [ code NUMBER ] |\n"
" %s",
strxf_proto(IPPROTO_GRE));
fprintf(stderr,
" [ key { DOTTED-QUAD | NUMBER } ] | PROTO }\n"
"UPSPEC := proto { { tcp | udp | sctp | dccp } [ sport PORT ] [ dport PORT ] |\n"
" { icmp | ipv6-icmp | mobility-header } [ type NUMBER ] [ code NUMBER ] |\n"
" gre [ key { DOTTED-QUAD | NUMBER } ] | PROTO }\n"
"DIR := in | out | fwd\n"
"PTYPE := main | sub\n"
"ACTION := allow | block\n"

View File

@ -106,27 +106,9 @@ static void usage(void)
"EXTRA-FLAG-LIST := [ EXTRA-FLAG-LIST ] EXTRA-FLAG\n"
"EXTRA-FLAG := dont-encap-dscp\n"
"SELECTOR := [ src ADDR[/PLEN] ] [ dst ADDR[/PLEN] ] [ dev DEV ] [ UPSPEC ]\n"
"UPSPEC := proto { { ");
fprintf(stderr,
"%s | %s | %s | %s",
strxf_proto(IPPROTO_TCP),
strxf_proto(IPPROTO_UDP),
strxf_proto(IPPROTO_SCTP),
strxf_proto(IPPROTO_DCCP));
fprintf(stderr,
" } [ sport PORT ] [ dport PORT ] |\n"
" { ");
fprintf(stderr,
"%s | %s | %s",
strxf_proto(IPPROTO_ICMP),
strxf_proto(IPPROTO_ICMPV6),
strxf_proto(IPPROTO_MH));
fprintf(stderr,
" } [ type NUMBER ] [ code NUMBER ] |\n");
fprintf(stderr,
" %s", strxf_proto(IPPROTO_GRE));
fprintf(stderr,
" [ key { DOTTED-QUAD | NUMBER } ] | PROTO }\n"
"UPSPEC := proto { { tcp | udp | sctp | dccp } [ sport PORT ] [ dport PORT ] |\n"
" { icmp | ipv6-icmp | mobility-header } [ type NUMBER ] [ code NUMBER ] |\n"
" gre [ key { DOTTED-QUAD | NUMBER } ] | PROTO }\n"
"LIMIT-LIST := [ LIMIT-LIST ] limit LIMIT\n"
"LIMIT := { time-soft | time-hard | time-use-soft | time-use-hard } SECONDS |\n"
" { byte-soft | byte-hard } SIZE | { packet-soft | packet-hard } COUNT\n"
@ -1149,6 +1131,9 @@ static int xfrm_state_keep(struct nlmsghdr *n, void *arg)
if (!xfrm_state_filter_match(xsinfo))
return 0;
if (xsinfo->id.proto == IPPROTO_IPIP)
return 0;
if (xb->offset > xb->size) {
fprintf(stderr, "State buffer overflow\n");
return -1;

View File

@ -22,7 +22,7 @@ ip \- show / manipulate routing, network devices, interfaces and tunnels
.BR link " | " address " | " addrlabel " | " route " | " rule " | " neigh " | "\
ntable " | " tunnel " | " tuntap " | " maddress " | " mroute " | " mrule " | "\
monitor " | " xfrm " | " netns " | " l2tp " | " tcp_metrics " | " token " | "\
macsec " }"
macsec " | " vrf " }"
.sp
.ti -8
@ -312,6 +312,10 @@ readability.
.B tuntap
- manage TUN/TAP devices.
.TP
.B vrf
- manage virtual routing and forwarding devices.
.TP
.B xfrm
- manage IPSec policies.
@ -410,6 +414,7 @@ was written by Alexey N. Kuznetsov and added in Linux 2.2.
.BR ip-tcp_metrics (8),
.BR ip-token (8),
.BR ip-tunnel (8),
.BR ip-vrf (8),
.BR ip-xfrm (8)
.br
.RB "IP Command reference " ip-cref.ps

View File

@ -9,7 +9,7 @@ rdma-statistic \- RDMA statistic counter configuration
.B rdma
.RI "[ " OPTIONS " ]"
.B statistic
.RI " { " COMMAND " | "
.RI "{ " COMMAND " | "
.BR help " }"
.sp
@ -23,6 +23,7 @@ rdma-statistic \- RDMA statistic counter configuration
.RI "[ " OBJECT " ]"
.B show link
.RI "[ " DEV/PORT_INDX " ]"
.RI "[ " FILTER_NAME " " FILTER_VALUE " ]"
.ti -8
.B rdma statistic
@ -34,7 +35,7 @@ rdma-statistic \- RDMA statistic counter configuration
.IR OBJECT
.B set
.IR COUNTER_SCOPE
.RI "[ " DEV/PORT_INDEX "]"
.RI "[ " DEV/PORT_INDEX " ]"
.B auto
.RI "{ " CRITERIA " | "
.BR off " }"
@ -44,7 +45,7 @@ rdma-statistic \- RDMA statistic counter configuration
.IR OBJECT
.B bind
.IR COUNTER_SCOPE
.RI "[ " DEV/PORT_INDEX "]"
.RI "[ " DEV/PORT_INDEX " ]"
.RI "[ " OBJECT-ID " ]"
.RI "[ " COUNTER-ID " ]"
@ -53,7 +54,7 @@ rdma-statistic \- RDMA statistic counter configuration
.IR OBJECT
.B unbind
.IR COUNTER_SCOPE
.RI "[ " DEV/PORT_INDEX "]"
.RI "[ " DEV/PORT_INDEX " ]"
.RI "[ " COUNTER-ID " ]"
.RI "[ " OBJECT-ID " ]"
@ -69,6 +70,10 @@ rdma-statistic \- RDMA statistic counter configuration
.IR CRITERIA " := "
.RB "{ " type " }"
.ti -8
.IR FILTER_NAME " := "
.RB "{ " cntn " | " lqpn " | " pid " }"
.SH "DESCRIPTION"
.SS rdma statistic [object] show - Queries the specified RDMA device for RDMA and driver-specific statistics. Show the default hw counters if object is not specified
@ -79,6 +84,9 @@ rdma-statistic \- RDMA statistic counter configuration
.I "PORT_INDEX"
- specifies counters on this RDMA port to show.
.I "FILTER_NAME
- specifies a filter to show only the results matching it.
.SS rdma statistic <object> set - configure counter statistic auto-mode for a specific device/port
In auto mode all objects belong to one category are bind automatically to a single counter set. Not applicable for MR's.

View File

@ -19,7 +19,7 @@ rdma \- RDMA tool
.ti -8
.IR OBJECT " := { "
.BR dev " | " link " | " system " | " statistic " }"
.BR dev " | " link " | " resource " | " system " | " statistic " }"
.sp
.ti -8
@ -70,6 +70,10 @@ Generate JSON output.
.B link
- RDMA port related.
.TP
.B resource
- RDMA resource configuration.
.TP
.B sys
- RDMA subsystem related.

166
man/man8/tc-fq_pie.8 Normal file
View File

@ -0,0 +1,166 @@
.TH FQ-PIE 8 "23 January 2020" "iproute2" "Linux"
.SH NAME
FQ-PIE - Flow Queue Proportional Integral controller Enhanced
.SH SYNOPSIS
.B tc qdisc ... fq_pie
[ \fBlimit\fR PACKETS ] [ \fBflows\fR NUMBER ]
.br
\
[ \fBtarget\fR TIME ] [ \fBtupdate\fR TIME ]
.br
\
[ \fBalpha\fR NUMBER ] [ \fBbeta\fR NUMBER ]
.br
\
[ \fBquantum\fR BYTES ] [ \fBmemory_limit\fR BYTES ]
.br
\
[ \fBecn_prob\fR PERENTAGE ] [ [\fBno\fR]\fBecn\fR ]
.br
\
[ [\fBno\fR]\fBbytemode\fR ] [ [\fBno_\fR]\fBdq_rate_estimator\fR ]
.SH DESCRIPTION
FQ-PIE (Flow Queuing with Proportional Integral controller Enhanced) is a
queuing discipline that combines Flow Queuing with the PIE AQM scheme. FQ-PIE
uses a Jenkins hash function to classify incoming packets into different flows
and is used to provide a fair share of the bandwidth to all the flows using the
qdisc. Each such flow is managed by the PIE algorithm.
.SH ALGORITHM
The FQ-PIE algorithm consists of two logical parts: the scheduler which selects
which queue to dequeue a packet from, and the PIE AQM which works on each of the
queues. The major work of FQ-PIE is mostly in the scheduling part. The
interaction between the scheduler and the PIE algorithm is straight forward.
During the enqueue stage, a hashing-based scheme is used, where flows are hashed
into a number of buckets with each bucket having its own queue. The number of
buckets is configurable, and presently defaults to 1024 in the implementation.
The flow hashing is performed on the 5-tuple of source and destination IP
addresses, port numbers and IP protocol number. Once the packet has been
successfully classified into a queue, it is handed over to the PIE algorithm
for enqueuing. It is then added to the tail of the selected queue, and the
queue's byte count is updated by the packet size. If the queue is not currently
active (i.e., if it is not in either the list of new or the list of old queues)
, it is added to the end of the list of new queues, and its number of credits
is initiated to the configured quantum. Otherwise, the queue is left in its
current queue list.
During the dequeue stage, the scheduler first looks at the list of new queues;
for the queue at the head of that list, if that queue has a negative number of
credits (i.e., it has already dequeued at least a quantum of bytes), it is given
an additional quantum of credits, the queue is put onto the end of the list of
old queues, and the routine selects the next queue and starts again. Otherwise,
that queue is selected for dequeue again. If the list of new queues is empty,
the scheduler proceeds down the list of old queues in the same fashion
(checking the credits, and either selecting the queue for dequeuing, or adding
credits and putting the queue back at the end of the list). After having
selected a queue from which to dequeue a packet, the PIE algorithm is invoked
on that queue.
Finally, if the PIE algorithm does not return a packet, then the queue must be
empty and the scheduler does one of two things:
If the queue selected for dequeue came from the list of new queues, it is moved
to the end of the list of old queues. If instead it came from the list of old
queues, that queue is removed from the list, to be added back (as a new queue)
the next time a packet arrives that hashes to that queue. Then (since no packet
was available for dequeue), the whole dequeue process is restarted from the
beginning.
If, instead, the scheduler did get a packet back from the PIE algorithm, it
subtracts the size of the packet from the byte credits for the selected queue
and returns the packet as the result of the dequeue operation.
.SH PARAMETERS
.SS limit
It is the limit on the queue size in packets. Incoming packets are dropped when
the limit is reached. The default value is 10240 packets.
.SS flows
It is the number of flows into which the incoming packets are classified. Due
to the stochastic nature of hashing, multiple flows may end up being hashed
into the same slot. Newer flows have priority over older ones. This
parameter can be set only at load time since memory has to be allocated for
the hash table. The default value is 1024.
.SS target
It is the queue delay which the PIE algorithm tries to maintain. The default
target delay is 15ms.
.SS tupdate
It is the time interval at which the system drop probability is calculated.
The default is 15ms.
.SS alpha
.SS beta
alpha and beta are parameters chosen to control the drop probability. These
should be in the range between 0 and 32.
.SS quantum
quantum signifies the number of bytes that may be dequeued from a queue before
switching to the next queue in the deficit round robin scheme.
.SS memory_limit
It is the maximum total memory allowed for packets of all flows. The default is
32Mb.
.SS ecn_prob
It is the drop probability threshold below which packets will be ECN marked
instead of getting dropped. The default is 10%. Setting this parameter requires
\fBecn\fR to be enabled.
.SS \fR[\fBno\fR]\fBecn\fR
It has the same semantics as \fBpie\fR and can be used to mark packets
instead of dropping them. If \fBecn\fR has been enabled, \fBnoecn\fR can
be used to turn it off and vice-a-versa.
.SS \fR[\fBno\fR]\fBbytemode\fR
It is used to scale drop probability proportional to packet size
\fBbytemode\fR to turn on bytemode, \fBnobytemode\fR to turn off
bytemode. By default, \fBbytemode\fR is turned off.
.SS \fR[\fBno_\fR]\fBdq_rate_estimator\fR
\fBdq_rate_estimator\fR can be used to calculate queue delay using Little's
Law, \fBno_dq_rate_estimator\fR can be used to calculate queue delay
using timestamp. By default, \fBdq_rate_estimator\fR is turned off.
.SH EXAMPLES
# tc qdisc add dev eth0 root fq_pie
.br
# tc -s qdisc show dev eth0
.br
qdisc fq_pie 8001: root refcnt 2 limit 10240p flows 1024 target 15.0ms tupdate
16.0ms alpha 2 beta 20 quantum 1514b memory_limit 32Mb ecn_prob 10
Sent 159173586 bytes 105261 pkt (dropped 24, overlimits 0 requeues 0)
backlog 75700b 50p requeues 0
pkts_in 105311 overlimit 0 overmemory 0 dropped 24 ecn_mark 0
new_flow_count 7332 new_flows_len 0 old_flows_len 4 memory_used 108800
# tc qdisc add dev eth0 root fq_pie dq_rate_estimator
.br
# tc -s qdisc show dev eth0
.br
qdisc fq_pie 8001: root refcnt 2 limit 10240p flows 1024 target 15.0ms tupdate
16.0ms alpha 2 beta 20 quantum 1514b memory_limit 32Mb ecn_prob 10
dq_rate_estimator
Sent 8263620 bytes 5550 pkt (dropped 4, overlimits 0 requeues 0)
backlog 805448b 532p requeues 0
pkts_in 6082 overlimit 0 overmemory 0 dropped 4 ecn_mark 0
new_flow_count 94 new_flows_len 0 old_flows_len 8 memory_used 1157632
.SH SEE ALSO
.BR tc (8),
.BR tc-pie (8),
.BR tc-fq_codel (8)
.SH SOURCES
RFC 8033: https://tools.ietf.org/html/rfc8033
.SH AUTHORS
FQ-PIE was implemented by Mohit P. Tahiliani. Please report corrections to the
Linux Networking mailing list <netdev@vger.kernel.org>.

View File

@ -284,6 +284,13 @@ bandwidth to all the flows using the queue. Each such flow is managed by the
CoDel queuing discipline. Reordering within a flow is avoided since Codel
internally uses a FIFO queue.
.TP
fq_pie
FQ-PIE (Flow Queuing with Proportional Integral controller Enhanced) is a
queuing discipline that combines Flow Queuing with the PIE AQM scheme. FQ-PIE
uses a Jenkins hash function to classify incoming packets into different flows
and is used to provide a fair share of the bandwidth to all the flows using the
qdisc. Each such flow is managed by the PIE algorithm.
.TP
gred
Generalized Random Early Detection combines multiple RED queues in order to
achieve multiple drop priorities. This is required to realize Assured
@ -855,6 +862,7 @@ was written by Alexey N. Kuznetsov and added in Linux 2.2.
.BR tc-flower (8),
.BR tc-fq (8),
.BR tc-fq_codel (8),
.BR tc-fq_pie (8),
.BR tc-fw (8),
.BR tc-hfsc (7),
.BR tc-hfsc (8),

View File

@ -142,14 +142,19 @@ static void load_good_table(FILE *fp)
}
/* idbuf is as big as buf, so this is safe */
nr = sscanf(buf, "%s%llu%lg", idbuf, &val, &rate);
if (nr < 2)
abort();
if (nr < 2) {
fprintf(stderr, "%s:%d: error parsing history file\n",
__FILE__, __LINE__);
exit(-2);
}
if (nr < 3)
rate = 0;
if (useless_number(idbuf))
continue;
if ((n = malloc(sizeof(*n))) == NULL)
abort();
if ((n = malloc(sizeof(*n))) == NULL) {
perror("nstat: malloc");
exit(-1);
}
n->id = strdup(idbuf);
n->val = val;
n->rate = rate;
@ -190,8 +195,11 @@ static void load_ugly_table(FILE *fp)
int count1, count2, skip = 0;
p = strchr(buf, ':');
if (!p)
abort();
if (!p) {
fprintf(stderr, "%s:%d: error parsing history file\n",
__FILE__, __LINE__);
exit(-2);
}
count1 = count_spaces(buf);
*p = 0;
idbuf[0] = 0;
@ -211,8 +219,10 @@ static void load_ugly_table(FILE *fp)
strncat(idbuf, p, sizeof(idbuf) - off - 1);
}
n = malloc(sizeof(*n));
if (!n)
abort();
if (!n) {
perror("nstat: malloc");
exit(-1);
}
n->id = strdup(idbuf);
n->rate = 0;
n->next = db;
@ -221,18 +231,27 @@ static void load_ugly_table(FILE *fp)
}
n = db;
nread = getline(&buf, &buflen, fp);
if (nread == -1)
abort();
if (nread == -1) {
fprintf(stderr, "%s:%d: error parsing history file\n",
__FILE__, __LINE__);
exit(-2);
}
count2 = count_spaces(buf);
if (count2 > count1)
skip = count2 - count1;
do {
p = strrchr(buf, ' ');
if (!p)
abort();
if (!p) {
fprintf(stderr, "%s:%d: error parsing history file\n",
__FILE__, __LINE__);
exit(-2);
}
*p = 0;
if (sscanf(p+1, "%llu", &n->val) != 1)
abort();
if (sscanf(p+1, "%llu", &n->val) != 1) {
fprintf(stderr, "%s:%d: error parsing history file\n",
__FILE__, __LINE__);
exit(-2);
}
/* Trick to skip "dummy" trailing ICMP MIB in 2.4 */
if (skip)
skip--;

View File

@ -23,6 +23,7 @@ static int stat_help(struct rd *rd)
pr_out("where OBJECT: = { qp }\n");
pr_out(" CRITERIA : = { type }\n");
pr_out(" COUNTER_SCOPE: = { link | dev }\n");
pr_out(" FILTER_NAME: = { cntn | lqpn | pid }\n");
pr_out("Examples:\n");
pr_out(" %s statistic qp show\n", rd->filename);
pr_out(" %s statistic qp show link mlx5_2/1\n", rd->filename);

View File

@ -70,6 +70,7 @@ TCMODULES += q_codel.o
TCMODULES += q_fq_codel.o
TCMODULES += q_fq.o
TCMODULES += q_pie.o
TCMODULES += q_fq_pie.o
TCMODULES += q_cake.o
TCMODULES += q_hhf.o
TCMODULES += q_clsact.o

318
tc/q_fq_pie.c Normal file
View File

@ -0,0 +1,318 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Flow Queue PIE
*
* Copyright (C) 2019 Mohit P. Tahiliani <tahiliani@nitk.edu.in>
* Copyright (C) 2019 Sachin D. Patil <sdp.sachin@gmail.com>
* Copyright (C) 2019 V. Saicharan <vsaicharan1998@gmail.com>
* Copyright (C) 2019 Mohit Bhasi <mohitbhasi1998@gmail.com>
* Copyright (C) 2019 Leslie Monis <lesliemonis@gmail.com>
* Copyright (C) 2019 Gautam Ramakrishnan <gautamramk@gmail.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include "utils.h"
#include "tc_util.h"
static void explain(void)
{
fprintf(stderr,
"Usage: ... fq_pie [ limit PACKETS ] [ flows NUMBER ]\n"
" [ target TIME ] [ tupdate TIME ]\n"
" [ alpha NUMBER ] [ beta NUMBER ]\n"
" [ quantum BYTES ] [ memory_limit BYTES ]\n"
" [ ecn_prob PERCENTAGE ] [ [no]ecn ]\n"
" [ [no]bytemode ] [ [no_]dq_rate_estimator ]\n");
}
#define ALPHA_MAX 32
#define BETA_MAX 32
static int fq_pie_parse_opt(struct qdisc_util *qu, int argc, char **argv,
struct nlmsghdr *n, const char *dev)
{
unsigned int limit = 0;
unsigned int flows = 0;
unsigned int target = 0;
unsigned int tupdate = 0;
unsigned int alpha = 0;
unsigned int beta = 0;
unsigned int quantum = 0;
unsigned int memory_limit = 0;
unsigned int ecn_prob = 0;
int ecn = -1;
int bytemode = -1;
int dq_rate_estimator = -1;
struct rtattr *tail;
while (argc > 0) {
if (strcmp(*argv, "limit") == 0) {
NEXT_ARG();
if (get_unsigned(&limit, *argv, 0)) {
fprintf(stderr, "Illegal \"limit\"\n");
return -1;
}
} else if (strcmp(*argv, "flows") == 0) {
NEXT_ARG();
if (get_unsigned(&flows, *argv, 0)) {
fprintf(stderr, "Illegal \"flows\"\n");
return -1;
}
} else if (strcmp(*argv, "target") == 0) {
NEXT_ARG();
if (get_time(&target, *argv)) {
fprintf(stderr, "Illegal \"target\"\n");
return -1;
}
} else if (strcmp(*argv, "tupdate") == 0) {
NEXT_ARG();
if (get_time(&tupdate, *argv)) {
fprintf(stderr, "Illegal \"tupdate\"\n");
return -1;
}
} else if (strcmp(*argv, "alpha") == 0) {
NEXT_ARG();
if (get_unsigned(&alpha, *argv, 0) ||
alpha > ALPHA_MAX) {
fprintf(stderr, "Illegal \"alpha\"\n");
return -1;
}
} else if (strcmp(*argv, "beta") == 0) {
NEXT_ARG();
if (get_unsigned(&beta, *argv, 0) ||
beta > BETA_MAX) {
fprintf(stderr, "Illegal \"beta\"\n");
return -1;
}
} else if (strcmp(*argv, "quantum") == 0) {
NEXT_ARG();
if (get_size(&quantum, *argv)) {
fprintf(stderr, "Illegal \"quantum\"\n");
return -1;
}
} else if (strcmp(*argv, "memory_limit") == 0) {
NEXT_ARG();
if (get_size(&memory_limit, *argv)) {
fprintf(stderr, "Illegal \"memory_limit\"\n");
return -1;
}
} else if (strcmp(*argv, "ecn_prob") == 0) {
NEXT_ARG();
if (get_unsigned(&ecn_prob, *argv, 0) ||
ecn_prob >= 100) {
fprintf(stderr, "Illegal \"ecn_prob\"\n");
return -1;
}
} else if (strcmp(*argv, "ecn") == 0) {
ecn = 1;
} else if (strcmp(*argv, "noecn") == 0) {
ecn = 0;
} else if (strcmp(*argv, "bytemode") == 0) {
bytemode = 1;
} else if (strcmp(*argv, "nobytemode") == 0) {
bytemode = 0;
} else if (strcmp(*argv, "dq_rate_estimator") == 0) {
dq_rate_estimator = 1;
} else if (strcmp(*argv, "no_dq_rate_estimator") == 0) {
dq_rate_estimator = 0;
} else if (strcmp(*argv, "help") == 0) {
explain();
return -1;
} else {
fprintf(stderr, "What is \"%s\"?\n", *argv);
explain();
return -1;
}
argc--;
argv++;
}
tail = addattr_nest(n, 1024, TCA_OPTIONS | NLA_F_NESTED);
if (limit)
addattr_l(n, 1024, TCA_FQ_PIE_LIMIT, &limit, sizeof(limit));
if (flows)
addattr_l(n, 1024, TCA_FQ_PIE_FLOWS, &flows, sizeof(flows));
if (target)
addattr_l(n, 1024, TCA_FQ_PIE_TARGET, &target, sizeof(target));
if (tupdate)
addattr_l(n, 1024, TCA_FQ_PIE_TUPDATE, &tupdate,
sizeof(tupdate));
if (alpha)
addattr_l(n, 1024, TCA_FQ_PIE_ALPHA, &alpha, sizeof(alpha));
if (beta)
addattr_l(n, 1024, TCA_FQ_PIE_BETA, &beta, sizeof(beta));
if (quantum)
addattr_l(n, 1024, TCA_FQ_PIE_QUANTUM, &quantum,
sizeof(quantum));
if (memory_limit)
addattr_l(n, 1024, TCA_FQ_PIE_MEMORY_LIMIT, &memory_limit,
sizeof(memory_limit));
if (ecn_prob)
addattr_l(n, 1024, TCA_FQ_PIE_ECN_PROB, &ecn_prob,
sizeof(ecn_prob));
if (ecn != -1)
addattr_l(n, 1024, TCA_FQ_PIE_ECN, &ecn, sizeof(ecn));
if (bytemode != -1)
addattr_l(n, 1024, TCA_FQ_PIE_BYTEMODE, &bytemode,
sizeof(bytemode));
if (dq_rate_estimator != -1)
addattr_l(n, 1024, TCA_FQ_PIE_DQ_RATE_ESTIMATOR,
&dq_rate_estimator, sizeof(dq_rate_estimator));
addattr_nest_end(n, tail);
return 0;
}
static int fq_pie_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
{
struct rtattr *tb[TCA_FQ_PIE_MAX + 1];
unsigned int limit = 0;
unsigned int flows = 0;
unsigned int target = 0;
unsigned int tupdate = 0;
unsigned int alpha = 0;
unsigned int beta = 0;
unsigned int quantum = 0;
unsigned int memory_limit = 0;
unsigned int ecn_prob = 0;
int ecn = -1;
int bytemode = -1;
int dq_rate_estimator = -1;
SPRINT_BUF(b1);
if (opt == NULL)
return 0;
parse_rtattr_nested(tb, TCA_FQ_PIE_MAX, opt);
if (tb[TCA_FQ_PIE_LIMIT] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_LIMIT]) >= sizeof(__u32)) {
limit = rta_getattr_u32(tb[TCA_FQ_PIE_LIMIT]);
print_uint(PRINT_ANY, "limit", "limit %up ", limit);
}
if (tb[TCA_FQ_PIE_FLOWS] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_FLOWS]) >= sizeof(__u32)) {
flows = rta_getattr_u32(tb[TCA_FQ_PIE_FLOWS]);
print_uint(PRINT_ANY, "flows", "flows %u ", flows);
}
if (tb[TCA_FQ_PIE_TARGET] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_TARGET]) >= sizeof(__u32)) {
target = rta_getattr_u32(tb[TCA_FQ_PIE_TARGET]);
print_uint(PRINT_JSON, "target", NULL, target);
print_string(PRINT_FP, NULL, "target %s ",
sprint_time(target, b1));
}
if (tb[TCA_FQ_PIE_TUPDATE] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_TUPDATE]) >= sizeof(__u32)) {
tupdate = rta_getattr_u32(tb[TCA_FQ_PIE_TUPDATE]);
print_uint(PRINT_JSON, "tupdate", NULL, tupdate);
print_string(PRINT_FP, NULL, "tupdate %s ",
sprint_time(tupdate, b1));
}
if (tb[TCA_FQ_PIE_ALPHA] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_ALPHA]) >= sizeof(__u32)) {
alpha = rta_getattr_u32(tb[TCA_FQ_PIE_ALPHA]);
print_uint(PRINT_ANY, "alpha", "alpha %u ", alpha);
}
if (tb[TCA_FQ_PIE_BETA] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_BETA]) >= sizeof(__u32)) {
beta = rta_getattr_u32(tb[TCA_FQ_PIE_BETA]);
print_uint(PRINT_ANY, "beta", "beta %u ", beta);
}
if (tb[TCA_FQ_PIE_QUANTUM] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_QUANTUM]) >= sizeof(__u32)) {
quantum = rta_getattr_u32(tb[TCA_FQ_PIE_QUANTUM]);
print_uint(PRINT_JSON, "quantum", NULL, quantum);
print_string(PRINT_FP, NULL, "quantum %s ",
sprint_size(quantum, b1));
}
if (tb[TCA_FQ_PIE_MEMORY_LIMIT] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_MEMORY_LIMIT]) >= sizeof(__u32)) {
memory_limit = rta_getattr_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]);
print_uint(PRINT_JSON, "memory_limit", NULL, memory_limit);
print_string(PRINT_FP, NULL, "memory_limit %s ",
sprint_size(memory_limit, b1));
}
if (tb[TCA_FQ_PIE_ECN_PROB] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_ECN_PROB]) >= sizeof(__u32)) {
ecn_prob = rta_getattr_u32(tb[TCA_FQ_PIE_ECN_PROB]);
print_uint(PRINT_ANY, "ecn_prob", "ecn_prob %u ", ecn_prob);
}
if (tb[TCA_FQ_PIE_ECN] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_ECN]) >= sizeof(__u32)) {
ecn = rta_getattr_u32(tb[TCA_FQ_PIE_ECN]);
if (ecn)
print_bool(PRINT_ANY, "ecn", "ecn ", true);
}
if (tb[TCA_FQ_PIE_BYTEMODE] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_BYTEMODE]) >= sizeof(__u32)) {
bytemode = rta_getattr_u32(tb[TCA_FQ_PIE_BYTEMODE]);
if (bytemode)
print_bool(PRINT_ANY, "bytemode", "bytemode ", true);
}
if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR] &&
RTA_PAYLOAD(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]) >= sizeof(__u32)) {
dq_rate_estimator =
rta_getattr_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]);
if (dq_rate_estimator)
print_bool(PRINT_ANY, "dq_rate_estimator",
"dq_rate_estimator ", true);
}
return 0;
}
static int fq_pie_print_xstats(struct qdisc_util *qu, FILE *f,
struct rtattr *xstats)
{
struct tc_fq_pie_xstats _st = {}, *st;
if (xstats == NULL)
return 0;
st = RTA_DATA(xstats);
if (RTA_PAYLOAD(xstats) < sizeof(*st)) {
memcpy(&_st, st, RTA_PAYLOAD(xstats));
st = &_st;
}
print_uint(PRINT_ANY, "pkts_in", " pkts_in %u",
st->packets_in);
print_uint(PRINT_ANY, "overlimit", " overlimit %u",
st->overlimit);
print_uint(PRINT_ANY, "overmemory", " overmemory %u",
st->overmemory);
print_uint(PRINT_ANY, "dropped", " dropped %u",
st->dropped);
print_uint(PRINT_ANY, "ecn_mark", " ecn_mark %u",
st->ecn_mark);
print_nl();
print_uint(PRINT_ANY, "new_flow_count", " new_flow_count %u",
st->new_flow_count);
print_uint(PRINT_ANY, "new_flows_len", " new_flows_len %u",
st->new_flows_len);
print_uint(PRINT_ANY, "old_flows_len", " old_flows_len %u",
st->old_flows_len);
print_uint(PRINT_ANY, "memory_used", " memory_used %u",
st->memory_usage);
return 0;
}
struct qdisc_util fq_pie_qdisc_util = {
.id = "fq_pie",
.parse_qopt = fq_pie_parse_opt,
.print_qopt = fq_pie_print_opt,
.print_xstats = fq_pie_print_xstats,
};

View File

@ -12,37 +12,37 @@ export TCPDIAG_FILE="$(dirname $0)/ss1.dump"
ts_log "[Testing ssfilter]"
ts_ss "$0" "Match dport = 22" -Htna dport = 22
test_on "ESTAB 0 0 10.0.0.1:36266 10.0.0.1:22"
test_on "ESTAB 0 0 10.0.0.1:36266 10.0.0.1:22"
ts_ss "$0" "Match dport 22" -Htna dport 22
test_on "ESTAB 0 0 10.0.0.1:36266 10.0.0.1:22"
test_on "ESTAB 0 0 10.0.0.1:36266 10.0.0.1:22"
ts_ss "$0" "Match (dport)" -Htna '( dport = 22 )'
test_on "ESTAB 0 0 10.0.0.1:36266 10.0.0.1:22"
test_on "ESTAB 0 0 10.0.0.1:36266 10.0.0.1:22"
ts_ss "$0" "Match src = 0.0.0.0" -Htna src = 0.0.0.0
test_on "LISTEN 0 128 0.0.0.0:22 0.0.0.0:*"
test_on "LISTEN 0 128 0.0.0.0:22 0.0.0.0:\*"
ts_ss "$0" "Match src 0.0.0.0" -Htna src 0.0.0.0
test_on "LISTEN 0 128 0.0.0.0:22 0.0.0.0:*"
test_on "LISTEN 0 128 0.0.0.0:22 0.0.0.0:\*"
ts_ss "$0" "Match src sport" -Htna src 0.0.0.0 sport = 22
test_on "LISTEN 0 128 0.0.0.0:22 0.0.0.0:*"
test_on "LISTEN 0 128 0.0.0.0:22 0.0.0.0:\*"
ts_ss "$0" "Match src and sport" -Htna src 0.0.0.0 and sport = 22
test_on "LISTEN 0 128 0.0.0.0:22 0.0.0.0:*"
test_on "LISTEN 0 128 0.0.0.0:22 0.0.0.0:\*"
ts_ss "$0" "Match src and sport and dport" -Htna src 10.0.0.1 and sport = 22 and dport = 50312
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"
ts_ss "$0" "Match src and sport and (dport)" -Htna 'src 10.0.0.1 and sport = 22 and ( dport = 50312 )'
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"
ts_ss "$0" "Match src and (sport and dport)" -Htna 'src 10.0.0.1 and ( sport = 22 and dport = 50312 )'
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"
ts_ss "$0" "Match (src and sport) and dport" -Htna '( src 10.0.0.1 and sport = 22 ) and dport = 50312'
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"
ts_ss "$0" "Match (src or src) and dst" -Htna '( src 0.0.0.0 or src 10.0.0.1 ) and dst 10.0.0.2'
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"
test_on "ESTAB 0 0 10.0.0.1:22 10.0.0.2:50312"