diff --git a/include/libnetlink.h b/include/libnetlink.h index 8e411ebc..04264b87 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -203,6 +203,17 @@ static inline __u64 rta_getattr_u64(const struct rtattr *rta) memcpy(&tmp, RTA_DATA(rta), sizeof(__u64)); return tmp; } +static inline __s32 rta_getattr_s32(const struct rtattr *rta) +{ + return *(__s32 *)RTA_DATA(rta); +} +static inline __s64 rta_getattr_s64(const struct rtattr *rta) +{ + __s64 tmp; + + memcpy(&tmp, RTA_DATA(rta), sizeof(tmp)); + return tmp; +} static inline const char *rta_getattr_str(const struct rtattr *rta) { return (const char *)RTA_DATA(rta); diff --git a/include/utils.h b/include/utils.h index eba67b6e..258d630e 100644 --- a/include/utils.h +++ b/include/utils.h @@ -144,6 +144,7 @@ int get_time_rtt(unsigned *val, const char *arg, int *raw); #define get_byte get_u8 #define get_ushort get_u16 #define get_short get_s16 +int get_s64(__s64 *val, const char *arg, int base); int get_u64(__u64 *val, const char *arg, int base); int get_u32(__u32 *val, const char *arg, int base); int get_s32(__s32 *val, const char *arg, int base); diff --git a/lib/utils.c b/lib/utils.c index 406ab8bd..cd1e0a98 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -383,6 +383,27 @@ int get_u8(__u8 *val, const char *arg, int base) return 0; } +int get_s64(__s64 *val, const char *arg, int base) +{ + long res; + char *ptr; + + errno = 0; + + if (!arg || !*arg) + return -1; + res = strtoll(arg, &ptr, base); + if (!ptr || ptr == arg || *ptr) + return -1; + if ((res == LLONG_MIN || res == LLONG_MAX) && errno == ERANGE) + return -1; + if (res > INT64_MAX || res < INT64_MIN) + return -1; + + *val = res; + return 0; +} + int get_s32(__s32 *val, const char *arg, int base) { long res; diff --git a/man/man8/tc-taprio.8 b/man/man8/tc-taprio.8 new file mode 100644 index 00000000..92055b43 --- /dev/null +++ b/man/man8/tc-taprio.8 @@ -0,0 +1,142 @@ +.TH TAPRIO 8 "25 Sept 2018" "iproute2" "Linux" +.SH NAME +TAPRIO \- Time Aware Priority Shaper +.SH SYNOPSIS +.B tc qdisc ... dev +dev +.B parent +classid +.B [ handle +major: +.B ] taprio num_tc +tcs +.ti +8 +.B map +P0 P1 P2 ... +.B queues +count1@offset1 count2@offset2 ... +.ti +8 +.B base-time +base-time +.B clockid +clockid +.ti +8 +.B sched-entry + +.ti +8 +.B sched-entry + +.ti +8 +.B sched-entry + +.ti +8 +.B sched-entry + + +.SH DESCRIPTION +The TAPRIO qdisc implements a simplified version of the scheduling +state machine defined by IEEE 802.1Q-2018 Section 8.6.9, which allows +configuration of a sequence of gate states, where each gate state +allows outgoing traffic for a subset (potentially empty) of traffic +classes. + +How traffic is mapped to different hardware queues is similar to +.BR mqprio(8) +and so the +.B map +and +.Q queues +parameters have the same meaning. + +The other parameters specify the schedule, and at what point in time +it should start (it can behave as the schedule started in the past). + +.SH PARAMETERS +.TP +num_tc +.BR +Number of traffic classes to use. Up to 16 classes supported. + +.TP +map +.br +The priority to traffic class map. Maps priorities 0..15 to a specified +traffic class. See +.BR mqprio(8) +for more details. + +.TP +queues +.br +Provide count and offset of queue range for each traffic class. In the +format, +.B count@offset. +Queue ranges for each traffic classes cannot overlap and must be a +contiguous range of queues. + +.TP +base-time +.br +Specifies the instant in nanoseconds, using the reference of +.B clockid, +defining the time when the schedule starts. If 'base-time' is a time +in the past, the schedule will start at + +base-time + (N * cycle-time) + +where N is the smallest integer so the resulting time is greater than +"now", and "cycle-time" is the sum of all the intervals of the entries +in the schedule; + +.TP +clockid +.br +Specifies the clock to be used by qdisc's internal timer for measuring +time and scheduling events. + +.TP +sched-entry +.br +There may multiple +.B sched-entry +parameters in a single schedule. Each one has the + +sched-entry + +format. The only supported is "S", which +means "SetGateStates", following the IEEE 802.1Q-2018 definition +(Table 8-7). is a bitmask where each bit is a associated +with a traffic class, so bit 0 (the least significant bit) being "on" +means that traffic class 0 is "active" for that schedule entry. + is a time duration, in nanoseconds, that specifies for how +long that state defined by and should be held +before moving to the next entry. + +.SH EXAMPLES + +The following example shows how an traffic schedule with three traffic +classes ("num_tc 3"), which are separated different traffic classes, +we are going to call these TC 0, TC 1 and TC 2. We could read the +"map" parameter below as: traffic with priority 3 is classified as TC +0, priority 2 is classified as TC 1 and the rest is classified as TC +2. + +The schedule will start at instant 1528743495910289987 using the +reference CLOCK_TAI. The schedule is composed of three entries each of +300us duration. + +.EX +# tc qdisc replace dev eth0 parent root handle 100 taprio \\ + num_tc 3 \\ + map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \\ + queues 1@0 1@1 2@2 \\ + base-time 1528743495910289987 \\ + sched-entry S 01 300000 \\ + sched-entry S 02 300000 \\ + sched-entry S 04 300000 \\ + clockid CLOCK_TAI +.EE + + +.SH AUTHORS +Vinicius Costa Gomes diff --git a/tc/Makefile b/tc/Makefile index 5a1a7ff9..25a28284 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -74,6 +74,7 @@ TCMODULES += e_bpf.o TCMODULES += f_matchall.o TCMODULES += q_cbs.o TCMODULES += q_etf.o +TCMODULES += q_taprio.o TCSO := ifeq ($(TC_CONFIG_ATM),y) diff --git a/tc/q_taprio.c b/tc/q_taprio.c new file mode 100644 index 00000000..562dacb8 --- /dev/null +++ b/tc/q_taprio.c @@ -0,0 +1,400 @@ +/* + * q_taprio.c Time Aware Priority Scheduler + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Vinicius Costa Gomes + * Jesus Sanchez-Palencia + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" +#include "list.h" + +struct sched_entry { + struct list_head list; + uint32_t index; + uint32_t interval; + uint32_t gatemask; + uint8_t cmd; +}; + +#define CLOCKID_INVALID (-1) +static const struct static_clockid { + const char *name; + clockid_t clockid; +} clockids_sysv[] = { + { "REALTIME", CLOCK_REALTIME }, + { "TAI", CLOCK_TAI }, + { "BOOTTIME", CLOCK_BOOTTIME }, + { "MONOTONIC", CLOCK_MONOTONIC }, + { NULL } +}; + +static void explain(void) +{ + fprintf(stderr, "Usage: ... taprio clockid CLOCKID\n"); + fprintf(stderr, " [num_tc NUMBER] [map P0 P1 ...] "); + fprintf(stderr, " [queues COUNT@OFFSET COUNT@OFFSET COUNT@OFFSET ...] "); + fprintf(stderr, " [ [sched-entry index cmd gate-mask interval] ... ] "); + fprintf(stderr, " [base-time time] "); + fprintf(stderr, "\nCLOCKID must be a valid SYS-V id (i.e. CLOCK_TAI)"); + fprintf(stderr, "\n"); +} + +static void explain_clockid(const char *val) +{ + fprintf(stderr, "taprio: illegal value for \"clockid\": \"%s\".\n", val); + fprintf(stderr, "It must be a valid SYS-V id (i.e. CLOCK_TAI)\n"); +} + +static int get_clockid(__s32 *val, const char *arg) +{ + const struct static_clockid *c; + + /* Drop the CLOCK_ prefix if that is being used. */ + if (strcasestr(arg, "CLOCK_") != NULL) + arg += sizeof("CLOCK_") - 1; + + for (c = clockids_sysv; c->name; c++) { + if (strcasecmp(c->name, arg) == 0) { + *val = c->clockid; + + return 0; + } + } + + return -1; +} + +static const char* get_clock_name(clockid_t clockid) +{ + const struct static_clockid *c; + + for (c = clockids_sysv; c->name; c++) { + if (clockid == c->clockid) + return c->name; + } + + return "invalid"; +} + +static const char *entry_cmd_to_str(__u8 cmd) +{ + switch (cmd) { + case TC_TAPRIO_CMD_SET_GATES: + return "S"; + default: + return "Invalid"; + } +} + +static int str_to_entry_cmd(const char *str) +{ + if (strcmp(str, "S") == 0) + return TC_TAPRIO_CMD_SET_GATES; + + return -1; +} + +static int add_sched_list(struct list_head *sched_entries, struct nlmsghdr *n) +{ + struct sched_entry *e; + + list_for_each_entry(e, sched_entries, list) { + struct rtattr *a; + + a = addattr_nest(n, 1024, TCA_TAPRIO_SCHED_ENTRY); + + addattr_l(n, 1024, TCA_TAPRIO_SCHED_ENTRY_CMD, &e->cmd, sizeof(e->cmd)); + addattr_l(n, 1024, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, &e->gatemask, sizeof(e->gatemask)); + addattr_l(n, 1024, TCA_TAPRIO_SCHED_ENTRY_INTERVAL, &e->interval, sizeof(e->interval)); + + addattr_nest_end(n, a); + } + + return 0; +} + +static void explain_sched_entry(void) +{ + fprintf(stderr, "Usage: ... taprio ... sched-entry \n"); +} + +static struct sched_entry *create_entry(uint32_t gatemask, uint32_t interval, uint8_t cmd) +{ + struct sched_entry *e; + + e = calloc(1, sizeof(*e)); + if (!e) + return NULL; + + e->gatemask = gatemask; + e->interval = interval; + e->cmd = cmd; + + return e; +} + +static int taprio_parse_opt(struct qdisc_util *qu, int argc, + char **argv, struct nlmsghdr *n, const char *dev) +{ + __s32 clockid = CLOCKID_INVALID; + struct tc_mqprio_qopt opt = { }; + struct list_head sched_entries; + struct rtattr *tail; + __s64 base_time = 0; + int err, idx; + + INIT_LIST_HEAD(&sched_entries); + + while (argc > 0) { + idx = 0; + if (strcmp(*argv, "num_tc") == 0) { + NEXT_ARG(); + if (get_u8(&opt.num_tc, *argv, 10)) { + fprintf(stderr, "Illegal \"num_tc\"\n"); + return -1; + } + } else if (strcmp(*argv, "map") == 0) { + while (idx < TC_QOPT_MAX_QUEUE && NEXT_ARG_OK()) { + NEXT_ARG(); + if (get_u8(&opt.prio_tc_map[idx], *argv, 10)) { + PREV_ARG(); + break; + } + idx++; + } + for ( ; idx < TC_QOPT_MAX_QUEUE; idx++) + opt.prio_tc_map[idx] = 0; + } else if (strcmp(*argv, "queues") == 0) { + char *tmp, *tok; + + while (idx < TC_QOPT_MAX_QUEUE && NEXT_ARG_OK()) { + NEXT_ARG(); + + tmp = strdup(*argv); + if (!tmp) + break; + + tok = strtok(tmp, "@"); + if (get_u16(&opt.count[idx], tok, 10)) { + free(tmp); + PREV_ARG(); + break; + } + tok = strtok(NULL, "@"); + if (get_u16(&opt.offset[idx], tok, 10)) { + free(tmp); + PREV_ARG(); + break; + } + free(tmp); + idx++; + } + } else if (strcmp(*argv, "sched-entry") == 0) { + uint32_t mask, interval; + struct sched_entry *e; + uint8_t cmd; + + NEXT_ARG(); + err = str_to_entry_cmd(*argv); + if (err < 0) { + explain_sched_entry(); + return -1; + } + cmd = err; + + NEXT_ARG(); + if (get_u32(&mask, *argv, 16)) { + explain_sched_entry(); + return -1; + } + + NEXT_ARG(); + if (get_u32(&interval, *argv, 0)) { + explain_sched_entry(); + return -1; + } + + e = create_entry(mask, interval, cmd); + if (!e) { + fprintf(stderr, "taprio: not enough memory for new schedule entry\n"); + return -1; + } + + list_add_tail(&e->list, &sched_entries); + + } else if (strcmp(*argv, "base-time") == 0) { + NEXT_ARG(); + if (get_s64(&base_time, *argv, 10)) { + PREV_ARG(); + break; + } + } else if (strcmp(*argv, "clockid") == 0) { + NEXT_ARG(); + if (clockid != CLOCKID_INVALID) { + fprintf(stderr, "taprio: duplicate \"clockid\" specification\n"); + return -1; + } + if (get_clockid(&clockid, *argv)) { + explain_clockid(*argv); + return -1; + } + } else if (strcmp(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "Unknown argument\n"); + return -1; + } + argc--; argv++; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + + if (opt.num_tc > 0) + addattr_l(n, 1024, TCA_TAPRIO_ATTR_PRIOMAP, &opt, sizeof(opt)); + + if (base_time) + addattr_l(n, 1024, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, &base_time, sizeof(base_time)); + + addattr_l(n, 1024, TCA_TAPRIO_ATTR_SCHED_CLOCKID, &clockid, sizeof(clockid)); + + if (!list_empty(&sched_entries)) { + struct rtattr *entry_list; + entry_list = addattr_nest(n, 1024, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST | NLA_F_NESTED); + + err = add_sched_list(&sched_entries, n); + if (err < 0) { + fprintf(stderr, "Could not add schedule to netlink message\n"); + return -1; + } + + addattr_nest_end(n, entry_list); + } + + tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; + + return 0; +} + +static int print_sched_list(FILE *f, struct rtattr *list) +{ + struct rtattr *item; + int rem; + + if (list == NULL) + return 0; + + rem = RTA_PAYLOAD(list); + + open_json_array(PRINT_JSON, "schedule"); + + for (item = RTA_DATA(list); RTA_OK(item, rem); item = RTA_NEXT(item, rem)) { + struct rtattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1]; + __u32 index = 0, gatemask = 0, interval = 0; + __u8 command = 0; + + parse_rtattr_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, item); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_INDEX]) + index = rta_getattr_u32(tb[TCA_TAPRIO_SCHED_ENTRY_INDEX]); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) + command = rta_getattr_u8(tb[TCA_TAPRIO_SCHED_ENTRY_CMD]); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]) + gatemask = rta_getattr_u32(tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]); + + if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]) + interval = rta_getattr_u32(tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); + + open_json_object(NULL); + print_uint(PRINT_ANY, "index", "\tindex %u", index); + print_string(PRINT_ANY, "cmd", " cmd %s", entry_cmd_to_str(command)); + print_0xhex(PRINT_ANY, "gatemask", " gatemask %#x", gatemask); + print_uint(PRINT_ANY, "interval", " interval %u", interval); + close_json_object(); + + print_string(PRINT_FP, NULL, "%s", _SL_); + } + + close_json_array(PRINT_ANY, ""); + + return 0; +} + +static int taprio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_TAPRIO_ATTR_MAX + 1]; + struct tc_mqprio_qopt *qopt = 0; + __s32 clockid = CLOCKID_INVALID; + __s64 base_time = 0; + int i; + + if (opt == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_TAPRIO_ATTR_MAX, opt); + + if (tb[TCA_TAPRIO_ATTR_PRIOMAP] == NULL) + return -1; + + qopt = RTA_DATA(tb[TCA_TAPRIO_ATTR_PRIOMAP]); + + print_uint(PRINT_ANY, "tc", "tc %u ", qopt->num_tc); + + open_json_array(PRINT_ANY, "map"); + for (i = 0; i <= TC_PRIO_MAX; i++) + print_uint(PRINT_ANY, NULL, " %u", qopt->prio_tc_map[i]); + close_json_array(PRINT_ANY, ""); + + print_string(PRINT_FP, NULL, "%s", _SL_); + + open_json_array(PRINT_ANY, "queues"); + for (i = 0; i < qopt->num_tc; i++) { + open_json_object(NULL); + print_uint(PRINT_ANY, "offset", " offset %u", qopt->offset[i]); + print_uint(PRINT_ANY, "count", " count %u", qopt->count[i]); + close_json_object(); + } + close_json_array(PRINT_ANY, ""); + + print_string(PRINT_FP, NULL, "%s", _SL_); + + if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) + base_time = rta_getattr_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); + + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) + clockid = rta_getattr_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + + print_string(PRINT_ANY, "clockid", "clockid %s", get_clock_name(clockid)); + + print_lluint(PRINT_ANY, "base_time", " base-time %lld", base_time); + + print_string(PRINT_FP, NULL, "%s", _SL_); + + return print_sched_list(f, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]); +} + +struct qdisc_util taprio_qdisc_util = { + .id = "taprio", + .parse_qopt = taprio_parse_opt, + .print_qopt = taprio_print_opt, +};