diff --git a/man/man8/tc-ctinfo.8 b/man/man8/tc-ctinfo.8 new file mode 100644 index 00000000..9015b844 --- /dev/null +++ b/man/man8/tc-ctinfo.8 @@ -0,0 +1,170 @@ +.TH "ctinfo action in tc" 8 "4 Jun 2019" "iproute2" "Linux" +.SH NAME +ctinfo \- tc connmark processing action +.SH SYNOPSIS +.B tc ... action ctinfo +[ +.B dscp +MASK [STATEMASK] ] [ +.B cpmark +[MASK] ] [ +.B zone +ZONE ] [ +.B CONTROL +] [ +.B index + +] + +.SH DESCRIPTION +CTINFO (Conntrack Information) is a tc action for retrieving data from +conntrack marks into various fields. At present it has two independent +processing modes which may be viewed as sub-functions. + +DSCP mode copies a DSCP stored in conntrack's connmark into the IPv4/v6 diffserv +field. The copying may conditionally occur based on a flag also stored in the +connmark. DSCP mode was designed to assist in restoring packet classifications on +ingress, classifications which may then be used by qdiscs such as CAKE. It may be +used in any circumstance where ingress classification needs to be maintained across +links that otherwise bleach or remap according to their own policies. + +CPMARK (copymark) mode copies the conntrack connmark into the packet's mark field. Without +additional parameters it is functionally completely equivalent to the existing +connmark action. An optional mask may be specified to mask which bits of the +connmark are restored. This may be useful when DSCP and CPMARK modes are combined. + +Simple statistics (tc -s) on DSCP restores and CPMARK copies are maintained where values for +set indicate a count of packets altered for that mode. DSCP includes an error count +where the destination packet's diffserv field was unwriteable. +.SH PARAMETERS +.SS DSCP mode parameters: +.IP mask +A mask of 6 contiguous bits indicating where the DSCP value is located in the 32 bit +conntrack mark field. A mask must be provided for this mode. mask is a 32 bit +unsigned value. +.IP statemask +A mask of at least 1 bit indicating where a conditional restore flag is located in the +32 bit conntrack mark field. The statemask bit/s must NOT overlap the mask bits. The +DSCP will be restored if the conntrack mark logically ANDed with the statemask yields +a non-zero result. statemask is an optional unsigned 32 bit value. +.SS CPMARK mode parameters: +.IP mask +Store the logically ANDed result of conntrack mark and mask into the packet's mark +field. Default is 0xffffffff i.e. the whole mark field. mask is an optional unsigned 32 bit +value +.SS Overall action parameters: +.IP zone +Specify the conntrack zone when doing conntrack lookups for packets. +zone is a 16bit unsigned decimal value. +Default is 0. +.IP CONTROL +The following keywords allow to control how the tree of qdisc, classes, +filters and actions is further traversed after this action. +.RS +.TP +.B reclassify +Restart with the first filter in the current list. +.TP +.B pipe +Continue with the next action attached to the same filter. +.TP +.B drop +Drop the packet. +.TP +.B shot +synonym for +.B drop +.TP +.B continue +Continue classification with the next filter in line. +.TP +.B pass +Finish classification process and return to calling qdisc for further packet +processing. This is the default. +.RE +.IP index +Specify an index for this action in order to being able to identify it in later +commands. index is a 32bit unsigned decimal value. +.SH EXAMPLES +Example showing conditional restoration of DSCP on ingress via an IFB +.RS +.EX + +#Set up the IFB interface +.br +tc qdisc add dev ifb4eth0 handle ffff: ingress + +#Put CAKE qdisc on it +.br +tc qdisc add dev ifb4eth0 root cake bandwidth 40mbit + +#Set interface UP +.br +ip link set dev ifb4eth0 up + +#Add 2 actions, ctinfo to restore dscp & mirred to redirect the packets to IFB +.br +tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \\ + match u32 0 0 flowid 1:1 action \\ + ctinfo dscp 0xfc000000 0x01000000 \\ + mirred egress redirect dev ifb4eth0 + +tc -s qdisc show dev eth0 ingress + + filter parent ffff: protocol all pref 10 u32 chain 0 + filter parent ffff: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1 + filter parent ffff: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw + match 00000000/00000000 at 0 + action order 1: ctinfo zone 0 pipe + index 2 ref 1 bind 1 dscp 0xfc000000 0x01000000 installed 72 sec used 0 sec DSCP set 1333 error 0 CPMARK set 0 + Action statistics: + Sent 658484 bytes 1833 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + + action order 2: mirred (Egress Redirect to device ifb4eth0) stolen + index 1 ref 1 bind 1 installed 72 sec used 0 sec + Action statistics: + Sent 658484 bytes 1833 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 +.EE +.RE + +Example showing conditional restoration of DSCP on egress + +This may appear nonsensical since iptables marking of egress packets is easy +to achieve, however the iptables flow classification rules may be extensive +and so some sort of set once and forget may be useful especially on cpu +constrained devices. +.RS +.EX + +# Send unmarked connections to a marking chain which needs to store a DSCP +and set statemask bit in the connmark +.br +iptables -t mangle -A POSTROUTING -o eth0 -m connmark \\ + --mark 0x00000000/0x01000000 -g CLASS_MARKING_CHAIN + +# Apply marked DSCP to the packets +.br +tc filter add dev eth0 protocol all prio 10 u32 \\ + match u32 0 0 flowid 1:1 action \\ + ctinfo dscp 0xfc000000 0x01000000 + +tc -s filter show dev eth0 + filter parent 800e: protocol all pref 10 u32 chain 0 + filter parent 800e: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1 + filter parent 800e: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw + match 00000000/00000000 at 0 + action order 1: ctinfo zone 0 pipe + index 1 ref 1 bind 1 dscp 0xfc000000 0x01000000 installed 7414 sec used 0 sec DSCP set 53404 error 0 CPMARK set 0 + Action statistics: + Sent 32890260 bytes 120441 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 +.br +.SH SEE ALSO +.BR tc (8), +.BR tc-cake (8) +.BR tc-connmark (8) +.BR tc-mirred (8) +.SH AUTHORS +ctinfo was written by Kevin Darbyshire-Bryant. diff --git a/tc/Makefile b/tc/Makefile index 1a305cf4..60abddee 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -48,6 +48,7 @@ TCMODULES += m_csum.o TCMODULES += m_simple.o TCMODULES += m_vlan.o TCMODULES += m_connmark.o +TCMODULES += m_ctinfo.o TCMODULES += m_bpf.o TCMODULES += m_tunnel_key.o TCMODULES += m_sample.o diff --git a/tc/m_ctinfo.c b/tc/m_ctinfo.c new file mode 100644 index 00000000..5e451f87 --- /dev/null +++ b/tc/m_ctinfo.c @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * m_ctinfo.c netfilter ctinfo mark action + * + * Copyright (c) 2019 Kevin Darbyshire-Bryant + */ + +#include +#include +#include +#include +#include "utils.h" +#include "tc_util.h" +#include + +static void +explain(void) +{ + fprintf(stderr, + "Usage: ... ctinfo [dscp mask [statemask]] [cpmark [mask]] [zone ZONE] [CONTROL] [index ]\n" + "where :\n" + "\tdscp MASK bitmask location of stored DSCP\n" + "\t STATEMASK bitmask to determine conditional restoring\n" + "\tcpmark MASK mask applied to mark on restoration\n" + "\tZONE is the conntrack zone\n" + "\tCONTROL := reclassify | pipe | drop | continue | ok |\n" + "\t goto chain \n"); +} + +static void +usage(void) +{ + explain(); + exit(-1); +} + +static int +parse_ctinfo(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + struct nlmsghdr *n) +{ + unsigned int cpmarkmask = 0, dscpmask = 0, dscpstatemask = 0; + struct tc_ctinfo sel = {}; + unsigned short zone = 0; + char **argv = *argv_p; + struct rtattr *tail; + int argc = *argc_p; + int ok = 0; + __u8 i; + + while (argc > 0) { + if (matches(*argv, "ctinfo") == 0) { + ok = 1; + NEXT_ARG_FWD(); + } else if (matches(*argv, "help") == 0) { + usage(); + } else { + break; + } + + } + + if (!ok) { + explain(); + return -1; + } + + if (argc) { + if (matches(*argv, "dscp") == 0) { + NEXT_ARG(); + if (get_u32(&dscpmask, *argv, 0)) { + fprintf(stderr, + "ctinfo: Illegal dscp \"mask\"\n"); + return -1; + } + if (NEXT_ARG_OK()) { + NEXT_ARG_FWD(); + if (!get_u32(&dscpstatemask, *argv, 0)) + NEXT_ARG_FWD(); /* was a statemask */ + } else { + NEXT_ARG_FWD(); + } + } + } + + /* cpmark has optional mask parameter, so the next arg might not */ + /* exist, or it might be the next option, or it may actually be a */ + /* 32bit mask */ + if (argc) { + if (matches(*argv, "cpmark") == 0) { + cpmarkmask = ~0; + if (NEXT_ARG_OK()) { + NEXT_ARG_FWD(); + if (!get_u32(&cpmarkmask, *argv, 0)) + NEXT_ARG_FWD(); /* was a mask */ + } else { + NEXT_ARG_FWD(); + } + } + } + + if (argc) { + if (matches(*argv, "zone") == 0) { + NEXT_ARG(); + if (get_u16(&zone, *argv, 10)) { + fprintf(stderr, "ctinfo: Illegal \"zone\"\n"); + return -1; + } + NEXT_ARG_FWD(); + } + } + + parse_action_control_dflt(&argc, &argv, &sel.action, + false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); + if (get_u32(&sel.index, *argv, 10)) { + fprintf(stderr, "ctinfo: Illegal \"index\"\n"); + return -1; + } + NEXT_ARG_FWD(); + } + } + + if (dscpmask & dscpstatemask) { + fprintf(stderr, + "ctinfo: dscp mask & statemask must NOT overlap\n"); + return -1; + } + + i = ffs(dscpmask); + if (i && ((~0 & (dscpmask >> (i - 1))) != 0x3f)) { + fprintf(stderr, + "ctinfo: dscp mask must be 6 contiguous bits long\n"); + return -1; + } + + tail = addattr_nest(n, MAX_MSG, tca_id); + addattr_l(n, MAX_MSG, TCA_CTINFO_ACT, &sel, sizeof(sel)); + addattr16(n, MAX_MSG, TCA_CTINFO_ZONE, zone); + + if (dscpmask) + addattr32(n, MAX_MSG, + TCA_CTINFO_PARMS_DSCP_MASK, dscpmask); + + if (dscpstatemask) + addattr32(n, MAX_MSG, + TCA_CTINFO_PARMS_DSCP_STATEMASK, dscpstatemask); + + if (cpmarkmask) + addattr32(n, MAX_MSG, + TCA_CTINFO_PARMS_CPMARK_MASK, cpmarkmask); + + addattr_nest_end(n, tail); + + *argc_p = argc; + *argv_p = argv; + return 0; +} + +static void print_ctinfo_stats(FILE *f, struct rtattr *tb[TCA_CTINFO_MAX + 1]) +{ + struct tcf_t *tm; + + if (tb[TCA_CTINFO_TM]) { + tm = RTA_DATA(tb[TCA_CTINFO_TM]); + + print_tm(f, tm); + } + + if (tb[TCA_CTINFO_STATS_DSCP_SET]) + print_lluint(PRINT_ANY, "dscpset", " DSCP set %llu", + rta_getattr_u64(tb[TCA_CTINFO_STATS_DSCP_SET])); + if (tb[TCA_CTINFO_STATS_DSCP_ERROR]) + print_lluint(PRINT_ANY, "dscperror", " error %llu", + rta_getattr_u64(tb[TCA_CTINFO_STATS_DSCP_ERROR])); + + if (tb[TCA_CTINFO_STATS_CPMARK_SET]) + print_lluint(PRINT_ANY, "cpmarkset", " CPMARK set %llu", + rta_getattr_u64(tb[TCA_CTINFO_STATS_CPMARK_SET])); +} + +static int print_ctinfo(struct action_util *au, FILE *f, struct rtattr *arg) +{ + unsigned int cpmarkmask = ~0, dscpmask = 0, dscpstatemask = 0; + struct rtattr *tb[TCA_CTINFO_MAX + 1]; + unsigned short zone = 0; + struct tc_ctinfo *ci; + + if (arg == NULL) + return -1; + + parse_rtattr_nested(tb, TCA_CTINFO_MAX, arg); + if (!tb[TCA_CTINFO_ACT]) { + print_string(PRINT_FP, NULL, "%s", + "[NULL ctinfo action parameters]"); + return -1; + } + + ci = RTA_DATA(tb[TCA_CTINFO_ACT]); + + if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) { + if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_DSCP_MASK]) >= + sizeof(__u32)) + dscpmask = rta_getattr_u32( + tb[TCA_CTINFO_PARMS_DSCP_MASK]); + else + print_string(PRINT_FP, NULL, "%s", + "[invalid dscp mask parameter]"); + } + + if (tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) { + if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) >= + sizeof(__u32)) + dscpstatemask = rta_getattr_u32( + tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]); + else + print_string(PRINT_FP, NULL, "%s", + "[invalid dscp statemask parameter]"); + } + + if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) { + if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_CPMARK_MASK]) >= + sizeof(__u32)) + cpmarkmask = rta_getattr_u32( + tb[TCA_CTINFO_PARMS_CPMARK_MASK]); + else + print_string(PRINT_FP, NULL, "%s", + "[invalid cpmark mask parameter]"); + } + + if (tb[TCA_CTINFO_ZONE] && RTA_PAYLOAD(tb[TCA_CTINFO_ZONE]) >= + sizeof(__u16)) + zone = rta_getattr_u16(tb[TCA_CTINFO_ZONE]); + + print_string(PRINT_ANY, "kind", "%s ", "ctinfo"); + print_hu(PRINT_ANY, "zone", "zone %u", zone); + print_action_control(f, " ", ci->action, ""); + + print_string(PRINT_FP, NULL, "%s", _SL_); + print_uint(PRINT_ANY, "index", "\t index %u", ci->index); + print_int(PRINT_ANY, "ref", " ref %d", ci->refcnt); + print_int(PRINT_ANY, "bind", " bind %d", ci->bindcnt); + + if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) { + print_0xhex(PRINT_ANY, "dscpmask", " dscp %#010llx", dscpmask); + print_0xhex(PRINT_ANY, "dscpstatemask", " %#010llx", + dscpstatemask); + } + + if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) + print_0xhex(PRINT_ANY, "cpmark", " cpmark %#010llx", + cpmarkmask); + + if (show_stats) + print_ctinfo_stats(f, tb); + + print_string(PRINT_FP, NULL, "%s", _SL_); + + return 0; +} + +struct action_util ctinfo_action_util = { + .id = "ctinfo", + .parse_aopt = parse_ctinfo, + .print_aopt = print_ctinfo, +};