From a066bac8a2775bc43d54ae7173057f75f543c44b Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 5 Oct 2018 16:25:17 -0700 Subject: [PATCH 1/5] utils: Implement get_s64() Add this helper to read signed 64-bit integers from a string. Signed-off-by: Vinicius Costa Gomes Signed-off-by: David Ahern --- include/utils.h | 1 + lib/utils.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/utils.h b/include/utils.h index eba67b6e..258d630e 100644 --- a/include/utils.h +++ b/include/utils.h @@ -144,6 +144,7 @@ int get_time_rtt(unsigned *val, const char *arg, int *raw); #define get_byte get_u8 #define get_ushort get_u16 #define get_short get_s16 +int get_s64(__s64 *val, const char *arg, int base); int get_u64(__u64 *val, const char *arg, int base); int get_u32(__u32 *val, const char *arg, int base); int get_s32(__s32 *val, const char *arg, int base); diff --git a/lib/utils.c b/lib/utils.c index 406ab8bd..cd1e0a98 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -383,6 +383,27 @@ int get_u8(__u8 *val, const char *arg, int base) return 0; } +int get_s64(__s64 *val, const char *arg, int base) +{ + long res; + char *ptr; + + errno = 0; + + if (!arg || !*arg) + return -1; + res = strtoll(arg, &ptr, base); + if (!ptr || ptr == arg || *ptr) + return -1; + if ((res == LLONG_MIN || res == LLONG_MAX) && errno == ERANGE) + return -1; + if (res > INT64_MAX || res < INT64_MIN) + return -1; + + *val = res; + return 0; +} + int get_s32(__s32 *val, const char *arg, int base) { long res; From de63cd90444ac9fdf238f950c16cafe351846691 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 5 Oct 2018 16:25:18 -0700 Subject: [PATCH 2/5] include: Add helper to retrieve a __s64 from a netlink msg This allows signed 64-bit integers to be retrieved from a netlink message. Signed-off-by: Vinicius Costa Gomes Signed-off-by: David Ahern --- include/libnetlink.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/libnetlink.h b/include/libnetlink.h index 8e411ebc..a133470c 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -203,6 +203,13 @@ static inline __u64 rta_getattr_u64(const struct rtattr *rta) memcpy(&tmp, RTA_DATA(rta), sizeof(__u64)); return tmp; } +static inline __s64 rta_getattr_s64(const struct rtattr *rta) +{ + __s64 tmp; + + memcpy(&tmp, RTA_DATA(rta), sizeof(tmp)); + return tmp; +} static inline const char *rta_getattr_str(const struct rtattr *rta) { return (const char *)RTA_DATA(rta); From d791f3ad869659da4b151eded060840a88d9656a Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Fri, 5 Oct 2018 16:25:19 -0700 Subject: [PATCH 3/5] libnetlink: Add helper for getting a __s32 from netlink msgs This function retrieves a signed 32-bit integer from a netlink message and returns it. Signed-off-by: Jesus Sanchez-Palencia Signed-off-by: David Ahern --- include/libnetlink.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/libnetlink.h b/include/libnetlink.h index a133470c..04264b87 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -203,6 +203,10 @@ static inline __u64 rta_getattr_u64(const struct rtattr *rta) memcpy(&tmp, RTA_DATA(rta), sizeof(__u64)); return tmp; } +static inline __s32 rta_getattr_s32(const struct rtattr *rta) +{ + return *(__s32 *)RTA_DATA(rta); +} static inline __s64 rta_getattr_s64(const struct rtattr *rta) { __s64 tmp; From 0dd16449356f7ba88e5374392b577f0504b3f025 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 5 Oct 2018 16:25:21 -0700 Subject: [PATCH 4/5] tc: Add support for configuring the taprio scheduler This traffic scheduler allows traffic classes states (transmission allowed/not allowed, in the simplest case) to be scheduled, according to a pre-generated time sequence. This is the basis of the IEEE 802.1Qbv specification. Example configuration: tc qdisc replace dev enp3s0 parent root handle 100 taprio \ num_tc 3 \ map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \ queues 1@0 1@1 2@2 \ base-time 1528743495910289987 \ sched-entry S 01 300000 \ sched-entry S 02 300000 \ sched-entry S 04 300000 \ clockid CLOCK_TAI The configuration format is similar to mqprio. The main difference is the presence of a schedule, built by multiple "sched-entry" definitions, each entry has the following format: sched-entry The only supported is "S", which means "SetGateStates", following the IEEE 802.1Qbv-2015 definition (Table 8-6). is a bitmask where each bit is a associated with a traffic class, so bit 0 (the least significant bit) being "on" means that traffic class 0 is "active" for that schedule entry. is a time duration in nanoseconds that specifies for how long that state defined by and should be held before moving to the next entry. This schedule is circular, that is, after the last entry is executed it starts from the first one, indefinitely. The other parameters can be defined as follows: - base-time: specifies the instant when the schedule starts, if 'base-time' is a time in the past, the schedule will start at base-time + (N * cycle-time) where N is the smallest integer so the resulting time is greater than "now", and "cycle-time" is the sum of all the intervals of the entries in the schedule; - clockid: specifies the reference clock to be used; The parameters should be similar to what the IEEE 802.1Q family of specification defines. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Jesus Sanchez-Palencia Signed-off-by: David Ahern --- tc/Makefile | 1 + tc/q_taprio.c | 400 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 401 insertions(+) create mode 100644 tc/q_taprio.c diff --git a/tc/Makefile b/tc/Makefile index 5a1a7ff9..25a28284 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -74,6 +74,7 @@ TCMODULES += e_bpf.o TCMODULES += f_matchall.o TCMODULES += q_cbs.o TCMODULES += q_etf.o +TCMODULES += q_taprio.o TCSO := ifeq ($(TC_CONFIG_ATM),y) diff --git a/tc/q_taprio.c b/tc/q_taprio.c new file mode 100644 index 00000000..562dacb8 --- /dev/null +++ b/tc/q_taprio.c @@ -0,0 +1,400 @@ +/* + * q_taprio.c Time Aware Priority Scheduler + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Vinicius Costa Gomes + * Jesus Sanchez-Palencia + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" +#include "list.h" + +struct sched_entry { + struct list_head list; + uint32_t index; + uint32_t interval; + uint32_t gatemask; + uint8_t cmd; +}; + +#define CLOCKID_INVALID (-1) +static const struct static_clockid { + const char *name; + clockid_t clockid; +} clockids_sysv[] = { + { "REALTIME", CLOCK_REALTIME }, + { "TAI", CLOCK_TAI }, + { "BOOTTIME", CLOCK_BOOTTIME }, + { "MONOTONIC", CLOCK_MONOTONIC }, + { NULL } +}; + +static void explain(void) +{ + fprintf(stderr, "Usage: ... taprio clockid CLOCKID\n"); + fprintf(stderr, " [num_tc NUMBER] [map P0 P1 ...] "); + fprintf(stderr, " [queues COUNT@OFFSET COUNT@OFFSET COUNT@OFFSET ...] "); + fprintf(stderr, " [ [sched-entry index cmd gate-mask interval] ... ] "); + fprintf(stderr, " [base-time time] "); + fprintf(stderr, "\nCLOCKID must be a valid SYS-V id (i.e. CLOCK_TAI)"); + fprintf(stderr, "\n"); +} + +static void explain_clockid(const char *val) +{ + fprintf(stderr, "taprio: illegal value for \"clockid\": \"%s\".\n", val); + fprintf(stderr, "It must be a valid SYS-V id (i.e. CLOCK_TAI)\n"); +} + +static int get_clockid(__s32 *val, const char *arg) +{ + const struct static_clockid *c; + + /* Drop the CLOCK_ prefix if that is being used. */ + if (strcasestr(arg, "CLOCK_") != NULL) + arg += sizeof("CLOCK_") - 1; + + for (c = clockids_sysv; c->name; c++) { + if (strcasecmp(c->name, arg) == 0) { + *val = c->clockid; + + return 0; + } + } + + return -1; +} + +static const char* get_clock_name(clockid_t clockid) +{ + const struct static_clockid *c; + + for (c = clockids_sysv; c->name; c++) { + if (clockid == c->clockid) + return c->name; + } + + return "invalid"; +} + +static const char *entry_cmd_to_str(__u8 cmd) +{ + switch (cmd) { + case TC_TAPRIO_CMD_SET_GATES: + return "S"; + default: + return "Invalid"; + } +} + +static int str_to_entry_cmd(const char *str) +{ + if (strcmp(str, "S") == 0) + return TC_TAPRIO_CMD_SET_GATES; + + return -1; +} + +static int add_sched_list(struct list_head *sched_entries, struct nlmsghdr *n) +{ + struct sched_entry *e; + + list_for_each_entry(e, sched_entries, list) { + struct rtattr *a; + + a = addattr_nest(n, 1024, TCA_TAPRIO_SCHED_ENTRY); + + addattr_l(n, 1024, TCA_TAPRIO_SCHED_ENTRY_CMD, &e->cmd, sizeof(e->cmd)); + addattr_l(n, 1024, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, &e->gatemask, sizeof(e->gatemask)); + addattr_l(n, 1024, TCA_TAPRIO_SCHED_ENTRY_INTERVAL, &e->interval, sizeof(e->interval)); + + addattr_nest_end(n, a); + } + + return 0; +} + +static void explain_sched_entry(void) +{ + fprintf(stderr, "Usage: ... taprio ... sched-entry \n"); +} + +static struct sched_entry *create_entry(uint32_t gatemask, uint32_t interval, uint8_t cmd) +{ + struct sched_entry *e; + + e = calloc(1, sizeof(*e)); + if (!e) + return NULL; + + e->gatemask = gatemask; + e->interval = interval; + e->cmd = cmd; + + return e; +} + +static int taprio_parse_opt(struct qdisc_util *qu, int argc, + char **argv, struct nlmsghdr *n, const char *dev) +{ + __s32 clockid = CLOCKID_INVALID; + struct tc_mqprio_qopt opt = { }; + struct list_head sched_entries; + struct rtattr *tail; + __s64 base_time = 0; + int err, idx; + + INIT_LIST_HEAD(&sched_entries); + + while (argc > 0) { + idx = 0; + if (strcmp(*argv, "num_tc") == 0) { + NEXT_ARG(); + if (get_u8(&opt.num_tc, *argv, 10)) { + fprintf(stderr, "Illegal \"num_tc\"\n"); + return -1; + } + } else if (strcmp(*argv, "map") == 0) { + while (idx < TC_QOPT_MAX_QUEUE && NEXT_ARG_OK()) { + NEXT_ARG(); + if (get_u8(&opt.prio_tc_map[idx], *argv, 10)) { + PREV_ARG(); + break; + } + idx++; + } + for ( ; idx < TC_QOPT_MAX_QUEUE; idx++) + opt.prio_tc_map[idx] = 0; + } else if (strcmp(*argv, "queues") == 0) { + char *tmp, *tok; + + while (idx < TC_QOPT_MAX_QUEUE && NEXT_ARG_OK()) { + NEXT_ARG(); + + tmp = strdup(*argv); + if (!tmp) + break; + + tok = strtok(tmp, "@"); + if (get_u16(&opt.count[idx], tok, 10)) { + free(tmp); + PREV_ARG(); + break; + } + tok = strtok(NULL, "@"); + if (get_u16(&opt.offset[idx], tok, 10)) { + free(tmp); + PREV_ARG(); + break; + } + free(tmp); + idx++; + } + } else if (strcmp(*argv, "sched-entry") == 0) { + uint32_t mask, interval; + struct sched_entry *e; + uint8_t cmd; + + NEXT_ARG(); + err = str_to_entry_cmd(*argv); + if (err < 0) { + explain_sched_entry(); + return -1; + } + cmd = err; + + NEXT_ARG(); + if (get_u32(&mask, *argv, 16)) { + explain_sched_entry(); + return -1; + } + + NEXT_ARG(); + if (get_u32(&interval, *argv, 0)) { + explain_sched_entry(); + return -1; + } + + e = create_entry(mask, interval, cmd); + if (!e) { + fprintf(stderr, "taprio: not enough memory for new schedule entry\n"); + return -1; + } + + list_add_tail(&e->list, &sched_entries); + + } else if (strcmp(*argv, "base-time") == 0) { + NEXT_ARG(); + if (get_s64(&base_time, *argv, 10)) { + PREV_ARG(); + break; + } + } else if (strcmp(*argv, "clockid") == 0) { + NEXT_ARG(); + if (clockid != CLOCKID_INVALID) { + fprintf(stderr, "taprio: duplicate \"clockid\" specification\n"); + return -1; + } + if (get_clockid(&clockid, *argv)) { + explain_clockid(*argv); + return -1; + } + } else if (strcmp(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "Unknown argument\n"); + return -1; + } + argc--; argv++; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + + if (opt.num_tc > 0) + addattr_l(n, 1024, TCA_TAPRIO_ATTR_PRIOMAP, &opt, sizeof(opt)); + + if (base_time) + addattr_l(n, 1024, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, &base_time, sizeof(base_time)); + + addattr_l(n, 1024, TCA_TAPRIO_ATTR_SCHED_CLOCKID, &clockid, sizeof(clockid)); + + if (!list_empty(&sched_entries)) { + struct rtattr *entry_list; + entry_list = addattr_nest(n, 1024, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST | NLA_F_NESTED); + + err = add_sched_list(&sched_entries, n); + if (err < 0) { + fprintf(stderr, "Could not add schedule to netlink message\n"); + return -1; + } + + addattr_nest_end(n, entry_list); + } + + tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; + + return 0; +} + +static int print_sched_list(FILE *f, struct rtattr *list) +{ + struct rtattr *item; + int rem; + + if (list == NULL) + return 0; + + rem = RTA_PAYLOAD(list); + + open_json_array(PRINT_JSON, "schedule"); + + for (item = RTA_DATA(list); RTA_OK(item, rem); item = RTA_NEXT(item, rem)) { + struct rtattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1]; + __u32 index = 0, gatemask = 0, interval = 0; + __u8 command = 0; + + parse_rtattr_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, item); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_INDEX]) + index = rta_getattr_u32(tb[TCA_TAPRIO_SCHED_ENTRY_INDEX]); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) + command = rta_getattr_u8(tb[TCA_TAPRIO_SCHED_ENTRY_CMD]); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]) + gatemask = rta_getattr_u32(tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]) + interval = rta_getattr_u32(tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); + + open_json_object(NULL); + print_uint(PRINT_ANY, "index", "\tindex %u", index); + print_string(PRINT_ANY, "cmd", " cmd %s", entry_cmd_to_str(command)); + print_0xhex(PRINT_ANY, "gatemask", " gatemask %#x", gatemask); + print_uint(PRINT_ANY, "interval", " interval %u", interval); + close_json_object(); + + print_string(PRINT_FP, NULL, "%s", _SL_); + } + + close_json_array(PRINT_ANY, ""); + + return 0; +} + +static int taprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_TAPRIO_ATTR_MAX + 1]; + struct tc_mqprio_qopt *qopt = 0; + __s32 clockid = CLOCKID_INVALID; + __s64 base_time = 0; + int i; + + if (opt == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_TAPRIO_ATTR_MAX, opt); + + if (tb[TCA_TAPRIO_ATTR_PRIOMAP] == NULL) + return -1; + + qopt = RTA_DATA(tb[TCA_TAPRIO_ATTR_PRIOMAP]); + + print_uint(PRINT_ANY, "tc", "tc %u ", qopt->num_tc); + + open_json_array(PRINT_ANY, "map"); + for (i = 0; i <= TC_PRIO_MAX; i++) + print_uint(PRINT_ANY, NULL, " %u", qopt->prio_tc_map[i]); + close_json_array(PRINT_ANY, ""); + + print_string(PRINT_FP, NULL, "%s", _SL_); + + open_json_array(PRINT_ANY, "queues"); + for (i = 0; i < qopt->num_tc; i++) { + open_json_object(NULL); + print_uint(PRINT_ANY, "offset", " offset %u", qopt->offset[i]); + print_uint(PRINT_ANY, "count", " count %u", qopt->count[i]); + close_json_object(); + } + close_json_array(PRINT_ANY, ""); + + print_string(PRINT_FP, NULL, "%s", _SL_); + + if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) + base_time = rta_getattr_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); + + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) + clockid = rta_getattr_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + + print_string(PRINT_ANY, "clockid", "clockid %s", get_clock_name(clockid)); + + print_lluint(PRINT_ANY, "base_time", " base-time %lld", base_time); + + print_string(PRINT_FP, NULL, "%s", _SL_); + + return print_sched_list(f, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]); +} + +struct qdisc_util taprio_qdisc_util = { + .id = "taprio", + .parse_qopt = taprio_parse_opt, + .print_qopt = taprio_print_opt, +}; From 579acb4bc52f84c629df5fcd2f9a054f41b48c57 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 5 Oct 2018 16:25:22 -0700 Subject: [PATCH 5/5] taprio: Add manpage for tc-taprio(8) This documents the parameters and provides an example of usage. Signed-off-by: Vinicius Costa Gomes Signed-off-by: David Ahern --- man/man8/tc-taprio.8 | 142 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 man/man8/tc-taprio.8 diff --git a/man/man8/tc-taprio.8 b/man/man8/tc-taprio.8 new file mode 100644 index 00000000..92055b43 --- /dev/null +++ b/man/man8/tc-taprio.8 @@ -0,0 +1,142 @@ +.TH TAPRIO 8 "25 Sept 2018" "iproute2" "Linux" +.SH NAME +TAPRIO \- Time Aware Priority Shaper +.SH SYNOPSIS +.B tc qdisc ... dev +dev +.B parent +classid +.B [ handle +major: +.B ] taprio num_tc +tcs +.ti +8 +.B map +P0 P1 P2 ... +.B queues +count1@offset1 count2@offset2 ... +.ti +8 +.B base-time +base-time +.B clockid +clockid +.ti +8 +.B sched-entry + +.ti +8 +.B sched-entry + +.ti +8 +.B sched-entry + +.ti +8 +.B sched-entry + + +.SH DESCRIPTION +The TAPRIO qdisc implements a simplified version of the scheduling +state machine defined by IEEE 802.1Q-2018 Section 8.6.9, which allows +configuration of a sequence of gate states, where each gate state +allows outgoing traffic for a subset (potentially empty) of traffic +classes. + +How traffic is mapped to different hardware queues is similar to +.BR mqprio(8) +and so the +.B map +and +.Q queues +parameters have the same meaning. + +The other parameters specify the schedule, and at what point in time +it should start (it can behave as the schedule started in the past). + +.SH PARAMETERS +.TP +num_tc +.BR +Number of traffic classes to use. Up to 16 classes supported. + +.TP +map +.br +The priority to traffic class map. Maps priorities 0..15 to a specified +traffic class. See +.BR mqprio(8) +for more details. + +.TP +queues +.br +Provide count and offset of queue range for each traffic class. In the +format, +.B count@offset. +Queue ranges for each traffic classes cannot overlap and must be a +contiguous range of queues. + +.TP +base-time +.br +Specifies the instant in nanoseconds, using the reference of +.B clockid, +defining the time when the schedule starts. If 'base-time' is a time +in the past, the schedule will start at + +base-time + (N * cycle-time) + +where N is the smallest integer so the resulting time is greater than +"now", and "cycle-time" is the sum of all the intervals of the entries +in the schedule; + +.TP +clockid +.br +Specifies the clock to be used by qdisc's internal timer for measuring +time and scheduling events. + +.TP +sched-entry +.br +There may multiple +.B sched-entry +parameters in a single schedule. Each one has the + +sched-entry + +format. The only supported is "S", which +means "SetGateStates", following the IEEE 802.1Q-2018 definition +(Table 8-7). is a bitmask where each bit is a associated +with a traffic class, so bit 0 (the least significant bit) being "on" +means that traffic class 0 is "active" for that schedule entry. + is a time duration, in nanoseconds, that specifies for how +long that state defined by and should be held +before moving to the next entry. + +.SH EXAMPLES + +The following example shows how an traffic schedule with three traffic +classes ("num_tc 3"), which are separated different traffic classes, +we are going to call these TC 0, TC 1 and TC 2. We could read the +"map" parameter below as: traffic with priority 3 is classified as TC +0, priority 2 is classified as TC 1 and the rest is classified as TC +2. + +The schedule will start at instant 1528743495910289987 using the +reference CLOCK_TAI. The schedule is composed of three entries each of +300us duration. + +.EX +# tc qdisc replace dev eth0 parent root handle 100 taprio \\ + num_tc 3 \\ + map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \\ + queues 1@0 1@1 2@2 \\ + base-time 1528743495910289987 \\ + sched-entry S 01 300000 \\ + sched-entry S 02 300000 \\ + sched-entry S 04 300000 \\ + clockid CLOCK_TAI +.EE + + +.SH AUTHORS +Vinicius Costa Gomes