From 8fded9ffad4d3bd0e93418cc8ceb9efff501a873 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 13 Mar 2017 08:31:58 -0700 Subject: [PATCH 01/22] update kernel headers from net-next Signed-off-by: Stephen Hemminger --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 26ee549f..75eed608 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -183,6 +183,7 @@ enum { DEVCONF_SEG6_REQUIRE_HMAC, DEVCONF_ENHANCED_DAD, DEVCONF_ADDR_GEN_MODE, + DEVCONF_DISABLE_POLICY, DEVCONF_MAX }; From 2099b983854fb4c22b27bf7e375412ca67caf115 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 14 Mar 2017 16:36:15 -0700 Subject: [PATCH 02/22] update headers from net-next Get TCA_DUMP_INVISIBLE and SCTP changes. Signed-off-by: Stephen Hemminger --- include/linux/mpls_iptunnel.h | 2 ++ include/linux/rtnetlink.h | 2 ++ include/linux/sctp.h | 31 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/include/linux/mpls_iptunnel.h b/include/linux/mpls_iptunnel.h index 4132c3c5..1a0e57b4 100644 --- a/include/linux/mpls_iptunnel.h +++ b/include/linux/mpls_iptunnel.h @@ -16,11 +16,13 @@ /* MPLS tunnel attributes * [RTA_ENCAP] = { * [MPLS_IPTUNNEL_DST] + * [MPLS_IPTUNNEL_TTL] * } */ enum { MPLS_IPTUNNEL_UNSPEC, MPLS_IPTUNNEL_DST, + MPLS_IPTUNNEL_TTL, __MPLS_IPTUNNEL_MAX, }; #define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index d42fe83c..86c605b9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -319,6 +319,7 @@ enum rtattr_type_t { RTA_EXPIRES, RTA_PAD, RTA_UID, + RTA_TTL_PROPAGATE, __RTA_MAX }; @@ -545,6 +546,7 @@ enum { TCA_STATS2, TCA_STAB, TCA_PAD, + TCA_DUMP_INVISIBLE, __TCA_MAX }; diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 5e08b3de..008cf5f9 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -115,6 +115,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_PR_SUPPORTED 113 #define SCTP_DEFAULT_PRINFO 114 #define SCTP_PR_ASSOC_STATUS 115 +#define SCTP_RECONFIG_SUPPORTED 117 #define SCTP_ENABLE_STREAM_RESET 118 #define SCTP_RESET_STREAMS 119 #define SCTP_RESET_ASSOC 120 @@ -502,6 +503,28 @@ struct sctp_stream_reset_event { __u16 strreset_stream_list[]; }; +#define SCTP_ASSOC_RESET_DENIED 0x0004 +#define SCTP_ASSOC_RESET_FAILED 0x0008 +struct sctp_assoc_reset_event { + __u16 assocreset_type; + __u16 assocreset_flags; + __u32 assocreset_length; + sctp_assoc_t assocreset_assoc_id; + __u32 assocreset_local_tsn; + __u32 assocreset_remote_tsn; +}; + +#define SCTP_ASSOC_CHANGE_DENIED 0x0004 +#define SCTP_ASSOC_CHANGE_FAILED 0x0008 +struct sctp_stream_change_event { + __u16 strchange_type; + __u16 strchange_flags; + __u32 strchange_length; + sctp_assoc_t strchange_assoc_id; + __u16 strchange_instrms; + __u16 strchange_outstrms; +}; + /* * Described in Section 7.3 * Ancillary Data and Notification Interest Options @@ -518,6 +541,8 @@ struct sctp_event_subscribe { __u8 sctp_authentication_event; __u8 sctp_sender_dry_event; __u8 sctp_stream_reset_event; + __u8 sctp_assoc_reset_event; + __u8 sctp_stream_change_event; }; /* @@ -543,6 +568,8 @@ union sctp_notification { struct sctp_authkey_event sn_authkey_event; struct sctp_sender_dry_event sn_sender_dry_event; struct sctp_stream_reset_event sn_strreset_event; + struct sctp_assoc_reset_event sn_assocreset_event; + struct sctp_stream_change_event sn_strchange_event; }; /* Section 5.3.1 @@ -572,6 +599,10 @@ enum sctp_sn_type { #define SCTP_SENDER_DRY_EVENT SCTP_SENDER_DRY_EVENT SCTP_STREAM_RESET_EVENT, #define SCTP_STREAM_RESET_EVENT SCTP_STREAM_RESET_EVENT + SCTP_ASSOC_RESET_EVENT, +#define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT + SCTP_STREAM_CHANGE_EVENT, +#define SCTP_STREAM_CHANGE_EVENT SCTP_STREAM_CHANGE_EVENT }; /* Notification error codes used to fill up the error fields in some From 7c581a124d5c67ee7356d88777f421f9881d28da Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 8 Mar 2017 13:04:42 +0100 Subject: [PATCH 03/22] iproute2: add support for invisible qdisc dumping Support the new TCA_DUMP_INVISIBLE netlink attribute that allows asking kernel to perform 'full qdisc dump', as for historical reasons some of the default qdiscs are being hidden by the kernel. The command syntax is being extended by voluntary 'invisible' argument to 'tc qdisc show'. Signed-off-by: Jiri Kosina --- tc/tc_qdisc.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c index 3a3701c2..1e9d9097 100644 --- a/tc/tc_qdisc.c +++ b/tc/tc_qdisc.c @@ -34,7 +34,7 @@ static int usage(void) fprintf(stderr, " [ stab [ help | STAB_OPTIONS] ]\n"); fprintf(stderr, " [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n"); fprintf(stderr, "\n"); - fprintf(stderr, " tc qdisc show [ dev STRING ] [ ingress | clsact ]\n"); + fprintf(stderr, " tc qdisc show [ dev STRING ] [ ingress | clsact ] [ invisible ]\n"); fprintf(stderr, "Where:\n"); fprintf(stderr, "QDISC_KIND := { [p|b]fifo | tbf | prio | cbq | red | etc. }\n"); fprintf(stderr, "OPTIONS := ... try tc qdisc add help\n"); @@ -292,6 +292,7 @@ static int tc_qdisc_list(int argc, char **argv) { struct tcmsg t = { .tcm_family = AF_UNSPEC }; char d[16] = {}; + bool dump_invisible = false; while (argc > 0) { if (strcmp(*argv, "dev") == 0) { @@ -306,6 +307,8 @@ static int tc_qdisc_list(int argc, char **argv) t.tcm_parent = TC_H_INGRESS; } else if (matches(*argv, "help") == 0) { usage(); + } else if (strcmp(*argv, "invisible") == 0) { + dump_invisible = true; } else { fprintf(stderr, "What is \"%s\"? Try \"tc qdisc help\".\n", *argv); return -1; @@ -325,7 +328,25 @@ static int tc_qdisc_list(int argc, char **argv) filter_ifindex = t.tcm_ifindex; } - if (rtnl_dump_request(&rth, RTM_GETQDISC, &t, sizeof(t)) < 0) { + if (dump_invisible) { + struct { + struct nlmsghdr n; + struct tcmsg t; + char buf[256]; + } req = { + .n.nlmsg_type = RTM_GETQDISC, + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + }; + + req.t.tcm_family = AF_UNSPEC; + + addattr(&req.n, 256, TCA_DUMP_INVISIBLE); + if (rtnl_dump_request_n(&rth, &req.n) < 0) { + perror("Cannot send dump request"); + return 1; + } + + } else if (rtnl_dump_request(&rth, RTM_GETQDISC, &t, sizeof(t)) < 0) { perror("Cannot send dump request"); return 1; } From b285ba9ea405667266aebcd76a3d1f29d8058e8e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 20 Mar 2017 10:18:50 -0700 Subject: [PATCH 04/22] update headers from net-next (post 4.11-rc3) Signed-off-by: Stephen Hemminger --- include/linux/pkt_sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index df7451d3..099bf552 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -617,6 +617,14 @@ struct tc_drr_stats { #define TC_QOPT_BITMASK 15 #define TC_QOPT_MAX_QUEUE 16 +enum { + TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */ + TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */ + __TC_MQPRIO_HW_OFFLOAD_MAX +}; + +#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) + struct tc_mqprio_qopt { __u8 num_tc; __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; From 45f78b4deced9163072a0d09fcf69935862b7692 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 4 Apr 2017 14:57:29 -0700 Subject: [PATCH 05/22] update kernel headers from net-next Signed-off-by: Stephen Hemminger --- include/linux/bpf.h | 31 +++++++++++++++++- include/linux/devlink.h | 67 ++++++++++++++++++++++++++++++++++++++- include/linux/if_link.h | 7 ++++ include/linux/ipv6.h | 1 + include/linux/rtnetlink.h | 2 ++ include/linux/sctp.h | 1 + include/linux/sysctl.h | 1 + 7 files changed, 108 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 178e20c3..76bdb27b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -81,6 +81,7 @@ enum bpf_cmd { BPF_OBJ_GET, BPF_PROG_ATTACH, BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, }; enum bpf_map_type { @@ -96,6 +97,8 @@ enum bpf_map_type { BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_MAP_TYPE_LPM_TRIE, + BPF_MAP_TYPE_ARRAY_OF_MAPS, + BPF_MAP_TYPE_HASH_OF_MAPS, }; enum bpf_prog_type { @@ -152,6 +155,7 @@ union bpf_attr { __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ __u32 map_flags; /* prealloc or not */ + __u32 inner_map_fd; /* fd pointing to the inner map */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -186,6 +190,17 @@ union bpf_attr { __u32 attach_type; __u32 attach_flags; }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; + __u32 data_size_out; + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + } test; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -456,6 +471,18 @@ union bpf_attr { * Return: * > 0 length of the string including the trailing NUL on success * < 0 error + * + * u64 bpf_bpf_get_socket_cookie(skb) + * Get the cookie for the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: 8 Bytes non-decreasing number on success or 0 if the socket + * field is missing inside sk_buff + * + * u32 bpf_get_socket_uid(skb) + * Get the owner uid of the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: uid of the socket owner on success or 0 if the socket pointer + * inside sk_buff is NULL */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -503,7 +530,9 @@ union bpf_attr { FN(get_numa_node_id), \ FN(skb_change_head), \ FN(xdp_adjust_head), \ - FN(probe_read_str), + FN(probe_read_str), \ + FN(get_socket_cookie), \ + FN(get_socket_uid), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/include/linux/devlink.h b/include/linux/devlink.h index 2ad3585b..0c8af618 100644 --- a/include/linux/devlink.h +++ b/include/linux/devlink.h @@ -65,8 +65,12 @@ enum devlink_command { #define DEVLINK_CMD_ESWITCH_MODE_SET /* obsolete, never use this! */ \ DEVLINK_CMD_ESWITCH_SET - /* add new commands above here */ + DEVLINK_CMD_DPIPE_TABLE_GET, + DEVLINK_CMD_DPIPE_ENTRIES_GET, + DEVLINK_CMD_DPIPE_HEADERS_GET, + DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 }; @@ -148,10 +152,71 @@ enum devlink_attr { DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ DEVLINK_ATTR_ESWITCH_INLINE_MODE, /* u8 */ + DEVLINK_ATTR_DPIPE_TABLES, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE_NAME, /* string */ + DEVLINK_ATTR_DPIPE_TABLE_SIZE, /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_MATCHES, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE_ACTIONS, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, /* u8 */ + + DEVLINK_ATTR_DPIPE_ENTRIES, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY_INDEX, /* u64 */ + DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY_COUNTER, /* u64 */ + + DEVLINK_ATTR_DPIPE_MATCH, /* nested */ + DEVLINK_ATTR_DPIPE_MATCH_VALUE, /* nested */ + DEVLINK_ATTR_DPIPE_MATCH_TYPE, /* u32 */ + + DEVLINK_ATTR_DPIPE_ACTION, /* nested */ + DEVLINK_ATTR_DPIPE_ACTION_VALUE, /* nested */ + DEVLINK_ATTR_DPIPE_ACTION_TYPE, /* u32 */ + + DEVLINK_ATTR_DPIPE_VALUE, + DEVLINK_ATTR_DPIPE_VALUE_MASK, + DEVLINK_ATTR_DPIPE_VALUE_MAPPING, /* u32 */ + + DEVLINK_ATTR_DPIPE_HEADERS, /* nested */ + DEVLINK_ATTR_DPIPE_HEADER, /* nested */ + DEVLINK_ATTR_DPIPE_HEADER_NAME, /* string */ + DEVLINK_ATTR_DPIPE_HEADER_ID, /* u32 */ + DEVLINK_ATTR_DPIPE_HEADER_FIELDS, /* nested */ + DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, /* u8 */ + DEVLINK_ATTR_DPIPE_HEADER_INDEX, /* u32 */ + + DEVLINK_ATTR_DPIPE_FIELD, /* nested */ + DEVLINK_ATTR_DPIPE_FIELD_NAME, /* string */ + DEVLINK_ATTR_DPIPE_FIELD_ID, /* u32 */ + DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, /* u32 */ + DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, /* u32 */ + + DEVLINK_ATTR_PAD, + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1 }; +/* Mapping between internal resource described by the field and system + * structure + */ +enum devlink_dpipe_field_mapping_type { + DEVLINK_DPIPE_FIELD_MAPPING_TYPE_NONE, + DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, +}; + +/* Match type - specify the type of the match */ +enum devlink_dpipe_match_type { + DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT, +}; + +/* Action type - specify the action type */ +enum devlink_dpipe_action_type { + DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY, +}; + #endif /* _LINUX_DEVLINK_H_ */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h index b0bdbd6e..2bb46f08 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -536,11 +536,18 @@ enum { #define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) /* GTP section */ + +enum ifla_gtp_role { + GTP_ROLE_GGSN = 0, + GTP_ROLE_SGSN, +}; + enum { IFLA_GTP_UNSPEC, IFLA_GTP_FD0, IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, + IFLA_GTP_ROLE, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 75eed608..5c08b222 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -184,6 +184,7 @@ enum { DEVCONF_ENHANCED_DAD, DEVCONF_ADDR_GEN_MODE, DEVCONF_DISABLE_POLICY, + DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN, DEVCONF_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 86c605b9..a96db837 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -122,6 +122,8 @@ enum { RTM_NEWNETCONF = 80, #define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_DELNETCONF, +#define RTM_DELNETCONF RTM_DELNETCONF RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 008cf5f9..4a169feb 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -115,6 +115,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_PR_SUPPORTED 113 #define SCTP_DEFAULT_PRINFO 114 #define SCTP_PR_ASSOC_STATUS 115 +#define SCTP_PR_STREAM_STATUS 116 #define SCTP_RECONFIG_SUPPORTED 117 #define SCTP_ENABLE_STREAM_RESET 118 #define SCTP_RESET_STREAMS 119 diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 41631d35..9b8de52d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -568,6 +568,7 @@ enum { NET_IPV6_PROXY_NDP=23, NET_IPV6_ACCEPT_SOURCE_ROUTE=25, NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, + NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, __NET_IPV6_MAX }; From c44d18ea96add9309ae72f9ca57dd534585de53b Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Tue, 11 Apr 2017 09:37:20 +0100 Subject: [PATCH 06/22] iproute: Add support for ttl-propagation attribute Add support for setting and displaying the ttl-propagation attribute initially used by MPLS to control propagation of MPLS TTL to IPv4/IPv6 TTL/hop-limit on popping final label on a per-route basis. Signed-off-by: Robert Shearman Acked-by: David Ahern --- ip/iproute.c | 22 ++++++++++++++++++++++ man/man8/ip-route.8.in | 10 +++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/ip/iproute.c b/ip/iproute.c index 5e23613d..eedcc2f6 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -77,6 +77,7 @@ static void usage(void) fprintf(stderr, "NODE_SPEC := [ TYPE ] PREFIX [ tos TOS ]\n"); fprintf(stderr, " [ table TABLE_ID ] [ proto RTPROTO ]\n"); fprintf(stderr, " [ scope SCOPE ] [ metric METRIC ]\n"); + fprintf(stderr, " [ ttl-propagate { enabled | disabled } ]\n"); fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n"); fprintf(stderr, "NH := [ encap ENCAPTYPE ENCAPHDR ] [ via [ FAMILY ] ADDRESS ]\n"); fprintf(stderr, " [ dev STRING ] [ weight NUMBER ] NHFLAGS\n"); @@ -715,6 +716,13 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) fprintf(fp, "%u", pref); } } + if (tb[RTA_TTL_PROPAGATE]) { + fprintf(fp, "ttl-propagate "); + if (rta_getattr_u8(tb[RTA_TTL_PROPAGATE])) + fprintf(fp, "enabled"); + else + fprintf(fp, "disabled"); + } fprintf(fp, "\n"); fflush(fp); return 0; @@ -1185,6 +1193,20 @@ static int iproute_modify(int cmd, unsigned int flags, int argc, char **argv) if (rta->rta_len > RTA_LENGTH(0)) addraw_l(&req.n, 1024, RTA_DATA(rta), RTA_PAYLOAD(rta)); + } else if (strcmp(*argv, "ttl-propagate") == 0) { + __u8 ttl_prop; + + NEXT_ARG(); + if (matches(*argv, "enabled") == 0) + ttl_prop = 1; + else if (matches(*argv, "disabled") == 0) + ttl_prop = 0; + else + invarg("\"ttl-propagate\" value is invalid\n", + *argv); + + addattr8(&req.n, sizeof(req), RTA_TTL_PROPAGATE, + ttl_prop); } else { int type; inet_prefix dst; diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index d6e06649..fbe2711a 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -75,7 +75,9 @@ replace " } " .B scope .IR SCOPE " ] [ " .B metric -.IR METRIC " ]" +.IR METRIC " ] [ " +.B ttl-propagate +.RB "{ " enabled " | " disabled " } ]" .ti -8 .IR INFO_SPEC " := " "NH OPTIONS FLAGS" " [" @@ -710,6 +712,12 @@ is a set of encapsulation attributes specific to the the route will be deleted after the expires time. .B Only support IPv6 at present. + +.TP +.BR ttl-propagate " { " enabled " | " disabled " } " +Control whether TTL should be propagated from any encap into the +un-encapsulated packet, overriding any global configuration. Only +supported for MPLS at present. .RE .TP From 9688cf3b7ab6a5ee41a9a2892885afefc0ce5fb9 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Tue, 11 Apr 2017 09:37:21 +0100 Subject: [PATCH 07/22] iproute: Add support for MPLS LWT ttl attribute Add support for setting and displaying the ttl attribute for MPLS IP lighweight tunnels. Signed-off-by: Robert Shearman Acked-by: David Ahern --- ip/iproute_lwtunnel.c | 31 +++++++++++++++++++++++++++++-- man/man8/ip-route.8.in | 9 ++++++++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c index 0fa1cab0..845a115e 100644 --- a/ip/iproute_lwtunnel.c +++ b/ip/iproute_lwtunnel.c @@ -84,6 +84,9 @@ static void print_encap_mpls(FILE *fp, struct rtattr *encap) if (tb[MPLS_IPTUNNEL_DST]) fprintf(fp, " %s ", format_host_rta(AF_MPLS, tb[MPLS_IPTUNNEL_DST])); + if (tb[MPLS_IPTUNNEL_TTL]) + fprintf(fp, "ttl %u ", + rta_getattr_u8(tb[MPLS_IPTUNNEL_TTL])); } static void print_encap_ip(FILE *fp, struct rtattr *encap) @@ -247,6 +250,7 @@ static int parse_encap_mpls(struct rtattr *rta, size_t len, inet_prefix addr; int argc = *argcp; char **argv = *argvp; + int ttl_ok = 0; if (get_addr(&addr, *argv, AF_MPLS)) { fprintf(stderr, @@ -258,8 +262,31 @@ static int parse_encap_mpls(struct rtattr *rta, size_t len, rta_addattr_l(rta, len, MPLS_IPTUNNEL_DST, &addr.data, addr.bytelen); - *argcp = argc; - *argvp = argv; + argc--; + argv++; + + while (argc > 0) { + if (strcmp(*argv, "ttl") == 0) { + __u8 ttl; + + NEXT_ARG(); + if (ttl_ok++) + duparg2("ttl", *argv); + if (get_u8(&ttl, *argv, 0)) + invarg("\"ttl\" value is invalid\n", *argv); + rta_addattr8(rta, len, MPLS_IPTUNNEL_TTL, ttl); + } else { + break; + } + argc--; argv++; + } + + /* argv is currently the first unparsed argument, + * but the lwt_parse_encap() caller will move to the next, + * so step back + */ + *argcp = argc + 1; + *argvp = argv - 1; return 0; } diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index fbe2711a..d2a44acf 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -181,7 +181,9 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .ti -8 .IR ENCAP_MPLS " := " .BR mpls " [ " -.IR LABEL " ]" +.IR LABEL " ] [" +.B ttl +.IR TTL " ]" .ti -8 .IR ENCAP_IP " := " @@ -666,6 +668,11 @@ is a set of encapsulation attributes specific to the .I MPLSLABEL - mpls label stack with labels separated by .I "/" +.sp + +.B ttl +.I TTL +- TTL to use for MPLS header or 0 to inherit from IP header .in -2 .sp From 3ad6d176389d4157bf4146cfea41337f08cfa62f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 23 Mar 2017 19:51:20 -0700 Subject: [PATCH 08/22] netlink: Add flag to suppress print of nlmsg error Allow callers of the dump API to handle nlmsg errors (e.g., an unsupported feature). Setting RTNL_HANDLE_F_SUPPRESS_NLERR in the rtnl_handle avoids unnecessary messages to the users in some case. For example, RTNETLINK answers: Operation not supported when probing for support of a new feature. Signed-off-by: David Ahern --- include/libnetlink.h | 1 + lib/libnetlink.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/libnetlink.h b/include/libnetlink.h index bd0267df..c43ab0a2 100644 --- a/include/libnetlink.h +++ b/include/libnetlink.h @@ -21,6 +21,7 @@ struct rtnl_handle { int proto; FILE *dump_fp; #define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01 +#define RTNL_HANDLE_F_SUPPRESS_NLERR 0x02 int flags; }; diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 9303b668..5b75b2db 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -299,7 +299,8 @@ static void rtnl_dump_error(const struct rtnl_handle *rth, errno == EOPNOTSUPP)) return; - perror("RTNETLINK answers"); + if (!(rth->flags & RTNL_HANDLE_F_SUPPRESS_NLERR)) + perror("RTNETLINK answers"); } } From f052f5dfe0b87f31c95176611b7887802514208e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 23 Mar 2017 19:51:21 -0700 Subject: [PATCH 09/22] ip netconf: Show all address families by default in dumps Currently, 'ip netconf' only shows ipv4 and ipv6 netconf settings. If IPv6 is not enabled, the dump ends with RTNETLINK answers: Operation not supported when IPv6 request is attempted. Further, if the mpls_router module is also loaded a separate request is needed to get MPLS settings. To make this better going forward, use the new PF_UNSPEC dump all option if the kernel supports it. If the kernel does not, it sets NLMSG_ERROR and returns EOPNOTSUPP which is trapped and we fall back to the existing output to maintain compatibility with existing kernels. Signed-off-by: David Ahern --- ip/ipnetconf.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ip/ipnetconf.c b/ip/ipnetconf.c index af539f5e..dc085102 100644 --- a/ip/ipnetconf.c +++ b/ip/ipnetconf.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "rt_names.h" #include "utils.h" @@ -197,16 +198,26 @@ static int do_show(int argc, char **argv) } rtnl_listen(&rth, print_netconf, stdout); } else { + rth.flags = RTNL_HANDLE_F_SUPPRESS_NLERR; dump: if (rtnl_wilddump_request(&rth, filter.family, RTM_GETNETCONF) < 0) { perror("Cannot send dump request"); exit(1); } if (rtnl_dump_filter(&rth, print_netconf2, stdout) < 0) { + /* kernel does not support netconf dump on AF_UNSPEC; + * fall back to requesting by family + */ + if (errno == EOPNOTSUPP && + filter.family == AF_UNSPEC) { + filter.family = AF_INET; + goto dump; + } + perror("RTNETLINK answers"); fprintf(stderr, "Dump terminated\n"); exit(1); } - if (preferred_family == AF_UNSPEC) { + if (preferred_family == AF_UNSPEC && filter.family == AF_INET) { preferred_family = AF_INET6; filter.family = AF_INET6; goto dump; From c6858ef43113404a528a66a1310d154297648ea1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 23 Mar 2017 19:51:22 -0700 Subject: [PATCH 10/22] ip netconf: show all families on dev request Currently specifying a device to ip netconf and it dumps only values for IPv4. Change this to dump data for all families unless a specific family is given. Signed-off-by: David Ahern --- ip/ipnetconf.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/ip/ipnetconf.c b/ip/ipnetconf.c index dc085102..696e3dd5 100644 --- a/ip/ipnetconf.c +++ b/ip/ipnetconf.c @@ -56,6 +56,7 @@ int print_netconf(const struct sockaddr_nl *who, struct rtnl_ctrl_data *ctrl, struct netconfmsg *ncm = NLMSG_DATA(n); int len = n->nlmsg_len; struct rtattr *tb[NETCONFA_MAX+1]; + int ifindex = 0; if (n->nlmsg_type == NLMSG_ERROR) return -1; @@ -77,6 +78,12 @@ int print_netconf(const struct sockaddr_nl *who, struct rtnl_ctrl_data *ctrl, parse_rtattr(tb, NETCONFA_MAX, netconf_rta(ncm), NLMSG_PAYLOAD(n, sizeof(*ncm))); + if (tb[NETCONFA_IFINDEX]) + ifindex = rta_getattr_u32(tb[NETCONFA_IFINDEX]); + + if (filter.ifindex && filter.ifindex != ifindex) + return 0; + switch (ncm->ncm_family) { case AF_INET: fprintf(fp, "ipv4 "); @@ -93,9 +100,7 @@ int print_netconf(const struct sockaddr_nl *who, struct rtnl_ctrl_data *ctrl, } if (tb[NETCONFA_IFINDEX]) { - int *ifindex = (int *)rta_getattr_str(tb[NETCONFA_IFINDEX]); - - switch (*ifindex) { + switch (ifindex) { case NETCONFA_IFINDEX_ALL: fprintf(fp, "all "); break; @@ -103,7 +108,7 @@ int print_netconf(const struct sockaddr_nl *who, struct rtnl_ctrl_data *ctrl, fprintf(fp, "default "); break; default: - fprintf(fp, "dev %s ", ll_index_to_name(*ifindex)); + fprintf(fp, "dev %s ", ll_index_to_name(ifindex)); break; } } @@ -169,8 +174,6 @@ static int do_show(int argc, char **argv) ipnetconf_reset_filter(0); filter.family = preferred_family; - if (filter.family == AF_UNSPEC) - filter.family = AF_INET; while (argc > 0) { if (strcmp(*argv, "dev") == 0) { @@ -186,11 +189,11 @@ static int do_show(int argc, char **argv) } ll_init_map(&rth); - if (filter.ifindex) { + + if (filter.ifindex && filter.family != AF_UNSPEC) { req.ncm.ncm_family = filter.family; - if (filter.ifindex) - addattr_l(&req.n, sizeof(req), NETCONFA_IFINDEX, - &filter.ifindex, sizeof(filter.ifindex)); + addattr_l(&req.n, sizeof(req), NETCONFA_IFINDEX, + &filter.ifindex, sizeof(filter.ifindex)); if (rtnl_send(&rth, &req.n, req.n.nlmsg_len) < 0) { perror("Can not send request"); From 0da8250be8a3bdac49d8a1e256ecf6e3767084fb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 14 Apr 2017 16:09:56 -0700 Subject: [PATCH 11/22] ip vrf: Add command name next to pid 'ip vrf pids' is used to list processes bound to a vrf, but it only shows the pid leaving a lot of work for the user. Add the command name to the output. With this patch you get the more user friendly: $ ip vrf pids mgmt 1121 ntpd 1418 gdm-session-wor 1488 gnome-session 1491 dbus-launch 1492 dbus-daemon 1565 sshd ... Signed-off-by: David Ahern --- include/utils.h | 1 + ip/ipvrf.c | 24 ++++++++++++++---------- lib/fs.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/include/utils.h b/include/utils.h index 22369e0b..8c12e1e2 100644 --- a/include/utils.h +++ b/include/utils.h @@ -260,5 +260,6 @@ int get_real_family(int rtm_type, int rtm_family); int cmd_exec(const char *cmd, char **argv, bool do_fork); int make_path(const char *path, mode_t mode); char *find_cgroup2_mount(void); +int get_command_name(const char *pid, char *comm, size_t len); #endif /* __UTILS_H__ */ diff --git a/ip/ipvrf.c b/ip/ipvrf.c index 5e204a9e..0f611b44 100644 --- a/ip/ipvrf.c +++ b/ip/ipvrf.c @@ -111,27 +111,31 @@ static void read_cgroup_pids(const char *base_path, char *name) { char path[PATH_MAX]; char buf[4096]; - ssize_t n; - int fd; + FILE *fp; if (snprintf(path, sizeof(path), "%s/vrf/%s%s", base_path, name, CGRP_PROC_FILE) >= sizeof(path)) return; - fd = open(path, O_RDONLY); - if (fd < 0) + fp = fopen(path, "r"); + if (!fp) return; /* no cgroup file, nothing to show */ /* dump contents (pids) of cgroup.procs */ - while (1) { - n = read(fd, buf, sizeof(buf) - 1); - if (n <= 0) - break; + while (fgets(buf, sizeof(buf), fp)) { + char *nl, comm[32]; - printf("%s", buf); + nl = strchr(buf, '\n'); + if (nl) + *nl = '\0'; + + if (get_command_name(buf, comm, sizeof(comm))) + strcpy(comm, ""); + + printf("%5s %s\n", buf, comm); } - close(fd); + fclose(fp); } /* recurse path looking for PATH[/NETNS]/vrf/NAME */ diff --git a/lib/fs.c b/lib/fs.c index 12a4657a..c59ac564 100644 --- a/lib/fs.c +++ b/lib/fs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -149,3 +150,44 @@ out: return rc; } + +int get_command_name(const char *pid, char *comm, size_t len) +{ + char path[PATH_MAX]; + char line[128]; + FILE *fp; + + if (snprintf(path, sizeof(path), + "/proc/%s/status", pid) >= sizeof(path)) { + return -1; + } + + fp = fopen(path, "r"); + if (!fp) + return -1; + + comm[0] = '\0'; + while (fgets(line, sizeof(line), fp)) { + char *nl, *name; + + name = strstr(line, "Name:"); + if (!name) + continue; + + name += 5; + while (isspace(*name)) + name++; + + nl = strchr(name, '\n'); + if (nl) + *nl = '\0'; + + strncpy(comm, name, len - 1); + comm[len - 1] = '\0'; + break; + } + + fclose(fp); + + return 0; +} From 2c6a0636e26d2d2392f6a1870be300540b88b866 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 16 Apr 2017 10:18:10 -0700 Subject: [PATCH 12/22] Update kernel headers from 4.11 net-next Signed-off-by: Stephen Hemminger --- include/linux/bpf.h | 2 +- include/linux/netlink.h | 48 +++++++++++++++++++++++++++++++----- include/linux/netlink_diag.h | 10 ++++++++ 3 files changed, 53 insertions(+), 7 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 76bdb27b..16dc501f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -472,7 +472,7 @@ union bpf_attr { * > 0 length of the string including the trailing NUL on success * < 0 error * - * u64 bpf_bpf_get_socket_cookie(skb) + * u64 bpf_get_socket_cookie(skb) * Get the cookie for the socket stored inside sk_buff. * @skb: pointer to skb * Return: 8 Bytes non-decreasing number on success or 0 if the socket diff --git a/include/linux/netlink.h b/include/linux/netlink.h index a982b3c0..3a53b9aa 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -50,12 +50,12 @@ struct nlmsghdr { /* Flags values */ -#define NLM_F_REQUEST 1 /* It is request message. */ -#define NLM_F_MULTI 2 /* Multipart message, terminated by NLMSG_DONE */ -#define NLM_F_ACK 4 /* Reply with ack, with zero or error code */ -#define NLM_F_ECHO 8 /* Echo this request */ -#define NLM_F_DUMP_INTR 16 /* Dump was inconsistent due to sequence change */ -#define NLM_F_DUMP_FILTERED 32 /* Dump was filtered as requested */ +#define NLM_F_REQUEST 0x01 /* It is request message. */ +#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ +#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ +#define NLM_F_ECHO 0x08 /* Echo this request */ +#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ +#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ /* Modifiers to GET request */ #define NLM_F_ROOT 0x100 /* specify tree root */ @@ -69,6 +69,10 @@ struct nlmsghdr { #define NLM_F_CREATE 0x400 /* Create, if it does not exist */ #define NLM_F_APPEND 0x800 /* Add to end of list */ +/* Flags for ACK message */ +#define NLM_F_CAPPED 0x100 /* request was capped */ +#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ + /* 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL 4.4BSD CHANGE NLM_F_REPLACE @@ -101,6 +105,37 @@ struct nlmsghdr { struct nlmsgerr { int error; struct nlmsghdr msg; + /* + * followed by the message contents unless NETLINK_CAP_ACK was set + * or the ACK indicates success (error == 0) + * message length is aligned with NLMSG_ALIGN() + */ + /* + * followed by TLVs defined in enum nlmsgerr_attrs + * if NETLINK_EXT_ACK was set + */ +}; + +/** + * enum nlmsgerr_attrs - nlmsgerr attributes + * @NLMSGERR_ATTR_UNUSED: unused + * @NLMSGERR_ATTR_MSG: error message string (string) + * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original + * message, counting from the beginning of the header (u32) + * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to + * be used - in the success case - to identify a created + * object or operation or similar (binary) + * @__NLMSGERR_ATTR_MAX: number of attributes + * @NLMSGERR_ATTR_MAX: highest attribute number + */ +enum nlmsgerr_attrs { + NLMSGERR_ATTR_UNUSED, + NLMSGERR_ATTR_MSG, + NLMSGERR_ATTR_OFFS, + NLMSGERR_ATTR_COOKIE, + + __NLMSGERR_ATTR_MAX, + NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 }; #define NETLINK_ADD_MEMBERSHIP 1 @@ -113,6 +148,7 @@ struct nlmsgerr { #define NETLINK_LISTEN_ALL_NSID 8 #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 +#define NETLINK_EXT_ACK 11 struct nl_pktinfo { __u32 group; diff --git a/include/linux/netlink_diag.h b/include/linux/netlink_diag.h index defd25fb..c8c8c7d2 100644 --- a/include/linux/netlink_diag.h +++ b/include/linux/netlink_diag.h @@ -38,6 +38,7 @@ enum { NETLINK_DIAG_GROUPS, NETLINK_DIAG_RX_RING, NETLINK_DIAG_TX_RING, + NETLINK_DIAG_FLAGS, __NETLINK_DIAG_MAX, }; @@ -50,5 +51,14 @@ enum { #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ /* deprecated since 4.6 */ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ +#define NDIAG_SHOW_FLAGS 0x00000008 /* show flags of a netlink socket */ + +/* flags */ +#define NDIAG_FLAG_CB_RUNNING 0x00000001 +#define NDIAG_FLAG_PKTINFO 0x00000002 +#define NDIAG_FLAG_BROADCAST_ERROR 0x00000004 +#define NDIAG_FLAG_NO_ENOBUFS 0x00000008 +#define NDIAG_FLAG_LISTEN_ALL_NSID 0x00000010 +#define NDIAG_FLAG_CAP_ACK 0x00000020 #endif From 85dd6ab5106079b7aec0985b5f14aa452658d34f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 16 Apr 2017 10:18:53 -0700 Subject: [PATCH 13/22] add seg6.h kernel headers Signed-off-by: Stephen Hemminger --- include/linux/seg6.h | 54 +++++++++++++++++++++++++++++++++++ include/linux/seg6_genl.h | 32 +++++++++++++++++++++ include/linux/seg6_hmac.h | 22 ++++++++++++++ include/linux/seg6_iptunnel.h | 39 +++++++++++++++++++++++++ 4 files changed, 147 insertions(+) create mode 100644 include/linux/seg6.h create mode 100644 include/linux/seg6_genl.h create mode 100644 include/linux/seg6_hmac.h create mode 100644 include/linux/seg6_iptunnel.h diff --git a/include/linux/seg6.h b/include/linux/seg6.h new file mode 100644 index 00000000..07152792 --- /dev/null +++ b/include/linux/seg6.h @@ -0,0 +1,54 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_SEG6_H +#define _LINUX_SEG6_H + +#include +#include /* For struct in6_addr. */ + +/* + * SRH + */ +struct ipv6_sr_hdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 type; + __u8 segments_left; + __u8 first_segment; + __u8 flags; + __u16 reserved; + + struct in6_addr segments[0]; +}; + +#define SR6_FLAG1_PROTECTED (1 << 6) +#define SR6_FLAG1_OAM (1 << 5) +#define SR6_FLAG1_ALERT (1 << 4) +#define SR6_FLAG1_HMAC (1 << 3) + +#define SR6_TLV_INGRESS 1 +#define SR6_TLV_EGRESS 2 +#define SR6_TLV_OPAQUE 3 +#define SR6_TLV_PADDING 4 +#define SR6_TLV_HMAC 5 + +#define sr_has_hmac(srh) ((srh)->flags & SR6_FLAG1_HMAC) + +struct sr6_tlv { + __u8 type; + __u8 len; + __u8 data[0]; +}; + +#endif diff --git a/include/linux/seg6_genl.h b/include/linux/seg6_genl.h new file mode 100644 index 00000000..99382f94 --- /dev/null +++ b/include/linux/seg6_genl.h @@ -0,0 +1,32 @@ +#ifndef _LINUX_SEG6_GENL_H +#define _LINUX_SEG6_GENL_H + +#define SEG6_GENL_NAME "SEG6" +#define SEG6_GENL_VERSION 0x1 + +enum { + SEG6_ATTR_UNSPEC, + SEG6_ATTR_DST, + SEG6_ATTR_DSTLEN, + SEG6_ATTR_HMACKEYID, + SEG6_ATTR_SECRET, + SEG6_ATTR_SECRETLEN, + SEG6_ATTR_ALGID, + SEG6_ATTR_HMACINFO, + __SEG6_ATTR_MAX, +}; + +#define SEG6_ATTR_MAX (__SEG6_ATTR_MAX - 1) + +enum { + SEG6_CMD_UNSPEC, + SEG6_CMD_SETHMAC, + SEG6_CMD_DUMPHMAC, + SEG6_CMD_SET_TUNSRC, + SEG6_CMD_GET_TUNSRC, + __SEG6_CMD_MAX, +}; + +#define SEG6_CMD_MAX (__SEG6_CMD_MAX - 1) + +#endif diff --git a/include/linux/seg6_hmac.h b/include/linux/seg6_hmac.h new file mode 100644 index 00000000..704f93e8 --- /dev/null +++ b/include/linux/seg6_hmac.h @@ -0,0 +1,22 @@ +#ifndef _LINUX_SEG6_HMAC_H +#define _LINUX_SEG6_HMAC_H + +#include +#include + +#define SEG6_HMAC_SECRET_LEN 64 +#define SEG6_HMAC_FIELD_LEN 32 + +struct sr6_tlv_hmac { + struct sr6_tlv tlvhdr; + __u16 reserved; + __be32 hmackeyid; + __u8 hmac[SEG6_HMAC_FIELD_LEN]; +}; + +enum { + SEG6_HMAC_ALGO_SHA1 = 1, + SEG6_HMAC_ALGO_SHA256 = 2, +}; + +#endif diff --git a/include/linux/seg6_iptunnel.h b/include/linux/seg6_iptunnel.h new file mode 100644 index 00000000..c9bba0e7 --- /dev/null +++ b/include/linux/seg6_iptunnel.h @@ -0,0 +1,39 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_SEG6_IPTUNNEL_H +#define _LINUX_SEG6_IPTUNNEL_H + +#include /* For struct ipv6_sr_hdr. */ + +enum { + SEG6_IPTUNNEL_UNSPEC, + SEG6_IPTUNNEL_SRH, + __SEG6_IPTUNNEL_MAX, +}; +#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1) + +struct seg6_iptunnel_encap { + int mode; + struct ipv6_sr_hdr srh[0]; +}; + +#define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3)) + +enum { + SEG6_IPTUN_MODE_INLINE, + SEG6_IPTUN_MODE_ENCAP, +}; + + +#endif From 93863328238c249077f9076e27688231930bd549 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Sat, 15 Apr 2017 12:17:15 +0200 Subject: [PATCH 14/22] ip: add ip sr command to control SR-IPv6 internal structures This patch adds commands to support the tunnel source properties ("ip sr tunsrc") and the HMAC key -> secret, algorithm binding ("ip sr hmac"). Signed-off-by: David Lebrun --- ip/Makefile | 2 +- ip/ip.c | 3 +- ip/ip_common.h | 1 + ip/ipseg6.c | 238 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 242 insertions(+), 2 deletions(-) create mode 100644 ip/ipseg6.c diff --git a/ip/Makefile b/ip/Makefile index 035d42c7..e08c170a 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -9,7 +9,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \ link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \ iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \ iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o ipila.o \ - ipvrf.o iplink_xstats.o + ipvrf.o iplink_xstats.o ipseg6.o RTMONOBJ=rtmon.o diff --git a/ip/ip.c b/ip/ip.c index 07050b07..7c14a8ec 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -52,7 +52,7 @@ static void usage(void) "where OBJECT := { link | address | addrlabel | route | rule | neigh | ntable |\n" " tunnel | tuntap | maddress | mroute | mrule | monitor | xfrm |\n" " netns | l2tp | fou | macsec | tcp_metrics | token | netconf | ila |\n" -" vrf }\n" +" vrf | sr }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n" " -h[uman-readable] | -iec |\n" " -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n" @@ -101,6 +101,7 @@ static const struct cmd { { "netns", do_netns }, { "netconf", do_ipnetconf }, { "vrf", do_ipvrf}, + { "sr", do_seg6 }, { "help", do_help }, { 0 } }; diff --git a/ip/ip_common.h b/ip/ip_common.h index 5a39623a..202fc399 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -60,6 +60,7 @@ int do_iptoken(int argc, char **argv); int do_ipvrf(int argc, char **argv); void vrf_reset(void); int netns_identify_pid(const char *pidstr, char *name, int len); +int do_seg6(int argc, char **argv); int iplink_get(unsigned int flags, char *name, __u32 filt_mask); int iplink_ifla_xstats(int argc, char **argv); diff --git a/ip/ipseg6.c b/ip/ipseg6.c new file mode 100644 index 00000000..a8f5c691 --- /dev/null +++ b/ip/ipseg6.c @@ -0,0 +1,238 @@ +/* + * seg6.c "ip sr/seg6" + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation; + * + * Author: David Lebrun + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "utils.h" +#include "ip_common.h" +#include "libgenl.h" + +#define HMAC_KEY_PROMPT "Enter secret for HMAC key ID (blank to delete): " + +static void usage(void) +{ + fprintf(stderr, "Usage: ip sr { COMMAND | help }\n"); + fprintf(stderr, " ip sr hmac show\n"); + fprintf(stderr, " ip sr hmac set KEYID ALGO\n"); + fprintf(stderr, " ip sr tunsrc show\n"); + fprintf(stderr, " ip sr tunsrc set ADDRESS\n"); + fprintf(stderr, "where ALGO := { sha1 | sha256 }\n"); + exit(-1); +} + +static struct rtnl_handle grth = { .fd = -1 }; +static int genl_family = -1; + +#define SEG6_REQUEST(_req, _bufsiz, _cmd, _flags) \ + GENL_REQUEST(_req, _bufsiz, genl_family, 0, \ + SEG6_GENL_VERSION, _cmd, _flags) + +static struct { + unsigned int cmd; + struct in6_addr addr; + __u32 keyid; + const char *pass; + __u8 alg_id; +} opts; + +static int process_msg(const struct sockaddr_nl *who, struct nlmsghdr *n, + void *arg) +{ + struct rtattr *attrs[SEG6_ATTR_MAX + 1]; + struct genlmsghdr *ghdr; + FILE *fp = (FILE *)arg; + int len = n->nlmsg_len; + + if (n->nlmsg_type != genl_family) + return -1; + + len -= NLMSG_LENGTH(GENL_HDRLEN); + if (len < 0) + return -1; + + ghdr = NLMSG_DATA(n); + + parse_rtattr(attrs, SEG6_ATTR_MAX, (void *)ghdr + GENL_HDRLEN, len); + + switch (ghdr->cmd) { + case SEG6_CMD_DUMPHMAC: + { + char secret[64]; + char *algstr; + __u8 slen = rta_getattr_u8(attrs[SEG6_ATTR_SECRETLEN]); + __u8 alg_id = rta_getattr_u8(attrs[SEG6_ATTR_ALGID]); + + memset(secret, 0, 64); + + if (slen > 63) { + fprintf(stderr, "HMAC secret length %d > 63, " + "truncated\n", slen); + slen = 63; + } + memcpy(secret, RTA_DATA(attrs[SEG6_ATTR_SECRET]), slen); + + switch (alg_id) { + case SEG6_HMAC_ALGO_SHA1: + algstr = "sha1"; + break; + case SEG6_HMAC_ALGO_SHA256: + algstr = "sha256"; + break; + default: + algstr = ""; + } + + fprintf(fp, "hmac %u ", + rta_getattr_u32(attrs[SEG6_ATTR_HMACKEYID])); + fprintf(fp, "algo %s ", algstr); + fprintf(fp, "secret \"%s\" ", secret); + + fprintf(fp, "\n"); + break; + } + case SEG6_CMD_GET_TUNSRC: + { + fprintf(fp, "tunsrc addr %s\n", + rt_addr_n2a(AF_INET6, 16, + RTA_DATA(attrs[SEG6_ATTR_DST]))); + break; + } + } + + return 0; +} + +static int seg6_do_cmd(void) +{ + SEG6_REQUEST(req, 1024, opts.cmd, NLM_F_REQUEST); + int repl = 0, dump = 0; + + if (genl_family < 0) { + if (rtnl_open_byproto(&grth, 0, NETLINK_GENERIC) < 0) { + fprintf(stderr, "Cannot open generic netlink socket\n"); + exit(1); + } + genl_family = genl_resolve_family(&grth, SEG6_GENL_NAME); + if (genl_family < 0) + exit(1); + req.n.nlmsg_type = genl_family; + } + + switch (opts.cmd) { + case SEG6_CMD_SETHMAC: + { + addattr32(&req.n, sizeof(req), SEG6_ATTR_HMACKEYID, opts.keyid); + addattr8(&req.n, sizeof(req), SEG6_ATTR_SECRETLEN, + strlen(opts.pass)); + addattr8(&req.n, sizeof(req), SEG6_ATTR_ALGID, opts.alg_id); + if (strlen(opts.pass)) + addattr_l(&req.n, sizeof(req), SEG6_ATTR_SECRET, + opts.pass, strlen(opts.pass)); + break; + } + case SEG6_CMD_SET_TUNSRC: + addattr_l(&req.n, sizeof(req), SEG6_ATTR_DST, &opts.addr, + sizeof(struct in6_addr)); + break; + case SEG6_CMD_DUMPHMAC: + dump = 1; + break; + case SEG6_CMD_GET_TUNSRC: + repl = 1; + break; + } + + if (!repl && !dump) { + if (rtnl_talk(&grth, &req.n, NULL, 0) < 0) + return -1; + } else if (repl) { + if (rtnl_talk(&grth, &req.n, &req.n, sizeof(req)) < 0) + return -2; + if (process_msg(NULL, &req.n, stdout) < 0) { + fprintf(stderr, "Error parsing reply\n"); + exit(1); + } + } else { + req.n.nlmsg_flags |= NLM_F_DUMP; + req.n.nlmsg_seq = grth.dump = ++grth.seq; + if (rtnl_send(&grth, &req, req.n.nlmsg_len) < 0) { + perror("Failed to send dump request"); + exit(1); + } + + if (rtnl_dump_filter(&grth, process_msg, stdout) < 0) { + fprintf(stderr, "Dump terminated\n"); + exit(1); + } + } + + return 0; +} + +int do_seg6(int argc, char **argv) +{ + if (argc < 1 || matches(*argv, "help") == 0) + usage(); + + memset(&opts, 0, sizeof(opts)); + + if (matches(*argv, "hmac") == 0) { + NEXT_ARG(); + if (matches(*argv, "show") == 0) { + opts.cmd = SEG6_CMD_DUMPHMAC; + } else if (matches(*argv, "set") == 0) { + NEXT_ARG(); + if (get_u32(&opts.keyid, *argv, 0) || opts.keyid == 0) + invarg("hmac KEYID value is invalid", *argv); + NEXT_ARG(); + if (strcmp(*argv, "sha1") == 0) { + opts.alg_id = SEG6_HMAC_ALGO_SHA1; + } else if (strcmp(*argv, "sha256") == 0) { + opts.alg_id = SEG6_HMAC_ALGO_SHA256; + } else { + invarg("hmac ALGO value is invalid", *argv); + } + opts.cmd = SEG6_CMD_SETHMAC; + opts.pass = getpass(HMAC_KEY_PROMPT); + } else { + invarg("unknown", *argv); + } + } else if (matches(*argv, "tunsrc") == 0) { + NEXT_ARG(); + if (matches(*argv, "show") == 0) { + opts.cmd = SEG6_CMD_GET_TUNSRC; + } else if (matches(*argv, "set") == 0) { + NEXT_ARG(); + opts.cmd = SEG6_CMD_SET_TUNSRC; + if (!inet_get_addr(*argv, NULL, &opts.addr)) + invarg("tunsrc ADDRESS value is invalid", + *argv); + } else { + invarg("unknown", *argv); + } + } else { + invarg("unknown", *argv); + } + + return seg6_do_cmd(); +} From e8493916a8ede9970732e33ea52d30b83071f401 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Sat, 15 Apr 2017 12:17:16 +0200 Subject: [PATCH 15/22] iproute: add support for SR-IPv6 lwtunnel encapsulation This patch adds support for SEG6 encapsulation type ("ip route add ... encap seg6 ..."). Signed-off-by: David Lebrun --- ip/iproute.c | 6 +- ip/iproute_lwtunnel.c | 154 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+), 2 deletions(-) diff --git a/ip/iproute.c b/ip/iproute.c index eedcc2f6..b4ca2911 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -99,8 +99,10 @@ static void usage(void) fprintf(stderr, "TIME := NUMBER[s|ms]\n"); fprintf(stderr, "BOOL := [1|0]\n"); fprintf(stderr, "FEATURES := ecn\n"); - fprintf(stderr, "ENCAPTYPE := [ mpls | ip | ip6 ]\n"); - fprintf(stderr, "ENCAPHDR := [ MPLSLABEL ]\n"); + fprintf(stderr, "ENCAPTYPE := [ mpls | ip | ip6 | seg6 ]\n"); + fprintf(stderr, "ENCAPHDR := [ MPLSLABEL | SEG6HDR ]\n"); + fprintf(stderr, "SEG6HDR := [ mode SEGMODE ] segs ADDR1,ADDRi,ADDRn [hmac HMACKEYID] [cleanup]\n"); + fprintf(stderr, "SEGMODE := [ encap | inline ]\n"); exit(-1); } diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c index 845a115e..1395f033 100644 --- a/ip/iproute_lwtunnel.c +++ b/ip/iproute_lwtunnel.c @@ -19,6 +19,13 @@ #include #include #include + +#ifndef __USE_KERNEL_IPV6_DEFS +#define __USE_KERNEL_IPV6_DEFS +#endif +#include +#include +#include #include #include "rt_names.h" @@ -39,6 +46,8 @@ static const char *format_encap_type(int type) return "ila"; case LWTUNNEL_ENCAP_BPF: return "bpf"; + case LWTUNNEL_ENCAP_SEG6: + return "seg6"; default: return "unknown"; } @@ -69,12 +78,49 @@ static int read_encap_type(const char *name) return LWTUNNEL_ENCAP_ILA; else if (strcmp(name, "bpf") == 0) return LWTUNNEL_ENCAP_BPF; + else if (strcmp(name, "seg6") == 0) + return LWTUNNEL_ENCAP_SEG6; else if (strcmp(name, "help") == 0) encap_type_usage(); return LWTUNNEL_ENCAP_NONE; } +static void print_encap_seg6(FILE *fp, struct rtattr *encap) +{ + struct rtattr *tb[SEG6_IPTUNNEL_MAX+1]; + struct seg6_iptunnel_encap *tuninfo; + struct ipv6_sr_hdr *srh; + int i; + + parse_rtattr_nested(tb, SEG6_IPTUNNEL_MAX, encap); + + if (!tb[SEG6_IPTUNNEL_SRH]) + return; + + tuninfo = RTA_DATA(tb[SEG6_IPTUNNEL_SRH]); + fprintf(fp, "mode %s ", + (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP) ? "encap" : "inline"); + + srh = tuninfo->srh; + + fprintf(fp, "segs %d [ ", srh->first_segment + 1); + + for (i = srh->first_segment; i >= 0; i--) + fprintf(fp, "%s ", + rt_addr_n2a(AF_INET6, 16, &srh->segments[i])); + + fprintf(fp, "] "); + + if (sr_has_hmac(srh)) { + unsigned int offset = ((srh->hdrlen + 1) << 3) - 40; + struct sr6_tlv_hmac *tlv; + + tlv = (struct sr6_tlv_hmac *)((char *)srh + offset); + fprintf(fp, "hmac 0x%X ", ntohl(tlv->hmackeyid)); + } +} + static void print_encap_mpls(FILE *fp, struct rtattr *encap) { struct rtattr *tb[MPLS_IPTUNNEL_MAX+1]; @@ -241,9 +287,114 @@ void lwt_print_encap(FILE *fp, struct rtattr *encap_type, case LWTUNNEL_ENCAP_BPF: print_encap_bpf(fp, encap); break; + case LWTUNNEL_ENCAP_SEG6: + print_encap_seg6(fp, encap); + break; } } +static int parse_encap_seg6(struct rtattr *rta, size_t len, int *argcp, + char ***argvp) +{ + int mode_ok = 0, segs_ok = 0, hmac_ok = 0; + struct seg6_iptunnel_encap *tuninfo; + struct ipv6_sr_hdr *srh; + char **argv = *argvp; + char segbuf[1024]; + int argc = *argcp; + int encap = -1; + __u32 hmac = 0; + int nsegs = 0; + int srhlen; + char *s; + int i; + + while (argc > 0) { + if (strcmp(*argv, "mode") == 0) { + NEXT_ARG(); + if (mode_ok++) + duparg2("mode", *argv); + if (strcmp(*argv, "encap") == 0) + encap = 1; + else if (strcmp(*argv, "inline") == 0) + encap = 0; + else + invarg("\"mode\" value is invalid\n", *argv); + } else if (strcmp(*argv, "segs") == 0) { + NEXT_ARG(); + if (segs_ok++) + duparg2("segs", *argv); + if (encap == -1) + invarg("\"segs\" provided before \"mode\"\n", + *argv); + + strncpy(segbuf, *argv, 1024); + segbuf[1023] = 0; + } else if (strcmp(*argv, "hmac") == 0) { + NEXT_ARG(); + if (hmac_ok++) + duparg2("hmac", *argv); + get_u32(&hmac, *argv, 0); + } else { + break; + } + argc--; argv++; + } + + s = segbuf; + for (i = 0; *s; *s++ == ',' ? i++ : *s); + nsegs = i + 1; + + if (!encap) + nsegs++; + + srhlen = 8 + 16*nsegs; + + if (hmac) + srhlen += 40; + + tuninfo = malloc(sizeof(*tuninfo) + srhlen); + memset(tuninfo, 0, sizeof(*tuninfo) + srhlen); + + if (encap) + tuninfo->mode = SEG6_IPTUN_MODE_ENCAP; + else + tuninfo->mode = SEG6_IPTUN_MODE_INLINE; + + srh = tuninfo->srh; + srh->hdrlen = (srhlen >> 3) - 1; + srh->type = 4; + srh->segments_left = nsegs - 1; + srh->first_segment = nsegs - 1; + + if (hmac) + srh->flags |= SR6_FLAG1_HMAC; + + i = srh->first_segment; + for (s = strtok(segbuf, ","); s; s = strtok(NULL, ",")) { + inet_get_addr(s, NULL, &srh->segments[i]); + i--; + } + + if (hmac) { + struct sr6_tlv_hmac *tlv; + + tlv = (struct sr6_tlv_hmac *)((char *)srh + srhlen - 40); + tlv->tlvhdr.type = SR6_TLV_HMAC; + tlv->tlvhdr.len = 38; + tlv->hmackeyid = htonl(hmac); + } + + rta_addattr_l(rta, len, SEG6_IPTUNNEL_SRH, tuninfo, + sizeof(*tuninfo) + srhlen); + free(tuninfo); + + *argcp = argc + 1; + *argvp = argv - 1; + + return 0; +} + static int parse_encap_mpls(struct rtattr *rta, size_t len, int *argcp, char ***argvp) { @@ -600,6 +751,9 @@ int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp) if (parse_encap_bpf(rta, len, &argc, &argv) < 0) exit(-1); break; + case LWTUNNEL_ENCAP_SEG6: + parse_encap_seg6(rta, len, &argc, &argv); + break; default: fprintf(stderr, "Error: unsupported encap type\n"); break; From e1b7f883e50de51b7db3d1527b159bf81567fb64 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Sat, 15 Apr 2017 12:17:17 +0200 Subject: [PATCH 16/22] man: add documentation for IPv6 SR commands This patch adds information about seg6 encapsulation in the ip-route manual, as well as the ip-sr manual page. Signed-off-by: David Lebrun --- man/man8/ip-route.8.in | 39 ++++++++++++++++++++++++++-- man/man8/ip-sr.8 | 58 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 man/man8/ip-sr.8 diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index d2a44acf..c8eb38a7 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -176,7 +176,7 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .ti -8 .IR ENCAP " := [ " -.IR MPLS " | " IP | " BPF " ] " +.IR MPLS " | " IP " | " BPF " | " SEG6 " ] " .ti -8 .IR ENCAP_MPLS " := " @@ -197,7 +197,6 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .B ttl .IR TTL " ]" - .ti -8 .IR ENCAP_BPF " := " .BR bpf " [ " @@ -210,6 +209,16 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .B headroom .IR SIZE " ]" +.ti -8 +.IR ENCAP_SEG6 " := " +.B seg6 +.BR mode " [ " +.BR encap " | " inline " ] " +.B segs +.IR SEGMENTS " [ " +.B hmac +.IR KEYID " ]" + .SH DESCRIPTION .B ip route is used to manipulate entries in the kernel routing tables. @@ -656,6 +665,8 @@ is a string specifying the supported encapsulation type. Namely: .BI bpf - Execution of BPF program .sp +.BI seg6 +- encapsulation type IPv6 Segment Routing .in -8 .I ENCAPHDR @@ -712,6 +723,25 @@ is a set of encapsulation attributes specific to the .in -2 .sp +.B seg6 +.in +2 +.B mode inline +- Directly insert Segment Routing Header after IPv6 header +.sp + +.B mode encap +- Encapsulate packet in an outer IPv6 header with SRH +.sp + +.I SEGMENTS +- List of comma-separated IPv6 addresses +.sp + +.I KEYID +- Numerical value in decimal representation. See \fBip-sr\fR(8). +.in -2 +.sp + .in -8 .TP @@ -1001,6 +1031,11 @@ ip route add 10.1.1.0/30 encap mpls 200/300 via 10.1.1.1 dev eth0 .RS 4 Adds an ipv4 route with mpls encapsulation attributes attached to it. .RE +.PP +ip -6 route add 2001:db8:1::/64 encap seg6 mode encap segs 2001:db8:42::1,2001:db8:ffff::2 dev eth0 +.RS 4 +Adds an IPv6 route with SRv6 encapsulation and two segments attached. +.RE .SH SEE ALSO .br .BR ip (8) diff --git a/man/man8/ip-sr.8 b/man/man8/ip-sr.8 new file mode 100644 index 00000000..6be1cc54 --- /dev/null +++ b/man/man8/ip-sr.8 @@ -0,0 +1,58 @@ +.TH IP\-SR 8 "14 Apr 2017" "iproute2" "Linux" +.SH "NAME" +ip-sr \- IPv6 Segment Routing management +.SH SYNOPSIS +.sp +.ad l +.in +8 +.ti -8 +.B ip sr +.RI " { " COMMAND " | " +.BR help " }" +.sp +.ti -8 + +.ti -8 +.B ip sr hmac show + +.ti -8 +.B ip sr hmac set +.I KEYID ALGO + +.ti -8 +.B ip sr tunsrc show + +.ti -8 +.B ip sr tunsrc set +.I ADDRESS + +.SH DESCRIPTION +The \fBip sr\fR command is used to configure IPv6 Segment Routing (SRv6) +internal parameters. +.PP +Those parameters include the mapping between an HMAC key ID and its associated +hashing algorithm and secret, and the IPv6 address to use as source for encapsulated +packets. +.PP +The \fBip sr hmac set\fR command prompts for a passphrase that will be used as the +HMAC secret for the corresponding key ID. A blank passphrase removes the mapping. +The currently supported algorithms for \fIALGO\fR are \fBsha1\fR and \fBsha256\fR. +.PP +If the tunnel source is set to the address :: (which is the default), then an address +of the egress interface will be selected. As this operation may hinder performances, +it is recommended to set a non-default address. + +.SH EXAMPLES +.PP +.SS Configure an HMAC mapping for key ID 42 and hashing algorithm SHA-256 +.nf +# ip sr hmac set 42 sha256 +.PP +.SS Set the tunnel source address to 2001:db8::1 +.nf +# ip sr tunsrc set 2001:db8::1 +.SH SEE ALSO +.br +.BR ip-route (8) +.SH AUTHOR +David Lebrun From 5b0aa8873709f29bed1c106090c32be24f1b2424 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 21 Apr 2017 17:41:33 -0700 Subject: [PATCH 17/22] update kernel headers from net-next Signed-off-by: Stephen Hemminger --- include/linux/bpf.h | 1 + include/linux/if_tunnel.h | 3 +++ include/linux/xfrm.h | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 16dc501f..eedf24d6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -603,6 +603,7 @@ struct __sk_buff { __u32 tc_classid; __u32 data; __u32 data_end; + __u32 napi_id; }; struct bpf_tunnel_key { diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 4f975f57..7375335a 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -75,6 +75,7 @@ enum { IFLA_IPTUN_ENCAP_SPORT, IFLA_IPTUN_ENCAP_DPORT, IFLA_IPTUN_COLLECT_METADATA, + IFLA_IPTUN_FWMARK, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) @@ -132,6 +133,7 @@ enum { IFLA_GRE_ENCAP_DPORT, IFLA_GRE_COLLECT_METADATA, IFLA_GRE_IGNORE_DF, + IFLA_GRE_FWMARK, __IFLA_GRE_MAX, }; @@ -147,6 +149,7 @@ enum { IFLA_VTI_OKEY, IFLA_VTI_LOCAL, IFLA_VTI_REMOTE, + IFLA_VTI_FWMARK, __IFLA_VTI_MAX, }; diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index d2dd1fd6..3af99e02 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -303,6 +303,7 @@ enum xfrm_attr_type_t { XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, + XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -494,6 +495,13 @@ struct xfrm_address_filter { __u8 dplen; }; +struct xfrm_user_offload { + int ifindex; + __u8 flags; +}; +#define XFRM_OFFLOAD_IPV6 1 +#define XFRM_OFFLOAD_INBOUND 2 + /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 From 0e3cdd9ce0a482d218d06b9dbb20765aa32849bc Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 21 Apr 2017 17:47:30 -0700 Subject: [PATCH 18/22] remove unused header file sysctl.h Not referred to in current source tree. Signed-off-by: Stephen Hemminger --- include/linux/sysctl.h | 933 ----------------------------------------- 1 file changed, 933 deletions(-) delete mode 100644 include/linux/sysctl.h diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h deleted file mode 100644 index 9b8de52d..00000000 --- a/include/linux/sysctl.h +++ /dev/null @@ -1,933 +0,0 @@ -/* - * sysctl.h: General linux system control interface - * - * Begun 24 March 1995, Stephen Tweedie - * - **************************************************************** - **************************************************************** - ** - ** WARNING: - ** The values in this file are exported to user space via - ** the sysctl() binary interface. Do *NOT* change the - ** numbering of any existing values here, and do not change - ** any numbers within any one set of values. If you have to - ** redefine an existing interface, use a new number for it. - ** The kernel will then return -ENOTDIR to any application using - ** the old binary interface. - ** - **************************************************************** - **************************************************************** - */ - -#ifndef _LINUX_SYSCTL_H -#define _LINUX_SYSCTL_H - -#include -#include - - -#define CTL_MAXNAME 10 /* how many path components do we allow in a - call to sysctl? In other words, what is - the largest acceptable value for the nlen - member of a struct __sysctl_args to have? */ - -struct __sysctl_args { - int *name; - int nlen; - void *oldval; - size_t *oldlenp; - void *newval; - size_t newlen; - unsigned long __unused[4]; -}; - -/* Define sysctl names first */ - -/* Top-level names: */ - -enum -{ - CTL_KERN=1, /* General kernel info and control */ - CTL_VM=2, /* VM management */ - CTL_NET=3, /* Networking */ - CTL_PROC=4, /* removal breaks strace(1) compilation */ - CTL_FS=5, /* Filesystems */ - CTL_DEBUG=6, /* Debugging */ - CTL_DEV=7, /* Devices */ - CTL_BUS=8, /* Busses */ - CTL_ABI=9, /* Binary emulation */ - CTL_CPU=10, /* CPU stuff (speed scaling, etc) */ - CTL_ARLAN=254, /* arlan wireless driver */ - CTL_S390DBF=5677, /* s390 debug */ - CTL_SUNRPC=7249, /* sunrpc debug */ - CTL_PM=9899, /* frv power management */ - CTL_FRV=9898, /* frv specific sysctls */ -}; - -/* CTL_BUS names: */ -enum -{ - CTL_BUS_ISA=1 /* ISA */ -}; - -/* /proc/sys/fs/inotify/ */ -enum -{ - INOTIFY_MAX_USER_INSTANCES=1, /* max instances per user */ - INOTIFY_MAX_USER_WATCHES=2, /* max watches per user */ - INOTIFY_MAX_QUEUED_EVENTS=3 /* max queued events per instance */ -}; - -/* CTL_KERN names: */ -enum -{ - KERN_OSTYPE=1, /* string: system version */ - KERN_OSRELEASE=2, /* string: system release */ - KERN_OSREV=3, /* int: system revision */ - KERN_VERSION=4, /* string: compile time info */ - KERN_SECUREMASK=5, /* struct: maximum rights mask */ - KERN_PROF=6, /* table: profiling information */ - KERN_NODENAME=7, /* string: hostname */ - KERN_DOMAINNAME=8, /* string: domainname */ - - KERN_PANIC=15, /* int: panic timeout */ - KERN_REALROOTDEV=16, /* real root device to mount after initrd */ - - KERN_SPARC_REBOOT=21, /* reboot command on Sparc */ - KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */ - KERN_PRINTK=23, /* struct: control printk logging parameters */ - KERN_NAMETRANS=24, /* Name translation */ - KERN_PPC_HTABRECLAIM=25, /* turn htab reclaimation on/off on PPC */ - KERN_PPC_ZEROPAGED=26, /* turn idle page zeroing on/off on PPC */ - KERN_PPC_POWERSAVE_NAP=27, /* use nap mode for power saving */ - KERN_MODPROBE=28, /* string: modprobe path */ - KERN_SG_BIG_BUFF=29, /* int: sg driver reserved buffer size */ - KERN_ACCT=30, /* BSD process accounting parameters */ - KERN_PPC_L2CR=31, /* l2cr register on PPC */ - - KERN_RTSIGNR=32, /* Number of rt sigs queued */ - KERN_RTSIGMAX=33, /* Max queuable */ - - KERN_SHMMAX=34, /* long: Maximum shared memory segment */ - KERN_MSGMAX=35, /* int: Maximum size of a messege */ - KERN_MSGMNB=36, /* int: Maximum message queue size */ - KERN_MSGPOOL=37, /* int: Maximum system message pool size */ - KERN_SYSRQ=38, /* int: Sysreq enable */ - KERN_MAX_THREADS=39, /* int: Maximum nr of threads in the system */ - KERN_RANDOM=40, /* Random driver */ - KERN_SHMALL=41, /* int: Maximum size of shared memory */ - KERN_MSGMNI=42, /* int: msg queue identifiers */ - KERN_SEM=43, /* struct: sysv semaphore limits */ - KERN_SPARC_STOP_A=44, /* int: Sparc Stop-A enable */ - KERN_SHMMNI=45, /* int: shm array identifiers */ - KERN_OVERFLOWUID=46, /* int: overflow UID */ - KERN_OVERFLOWGID=47, /* int: overflow GID */ - KERN_SHMPATH=48, /* string: path to shm fs */ - KERN_HOTPLUG=49, /* string: path to uevent helper (deprecated) */ - KERN_IEEE_EMULATION_WARNINGS=50, /* int: unimplemented ieee instructions */ - KERN_S390_USER_DEBUG_LOGGING=51, /* int: dumps of user faults */ - KERN_CORE_USES_PID=52, /* int: use core or core.%pid */ - KERN_TAINTED=53, /* int: various kernel tainted flags */ - KERN_CADPID=54, /* int: PID of the process to notify on CAD */ - KERN_PIDMAX=55, /* int: PID # limit */ - KERN_CORE_PATTERN=56, /* string: pattern for core-file names */ - KERN_PANIC_ON_OOPS=57, /* int: whether we will panic on an oops */ - KERN_HPPA_PWRSW=58, /* int: hppa soft-power enable */ - KERN_HPPA_UNALIGNED=59, /* int: hppa unaligned-trap enable */ - KERN_PRINTK_RATELIMIT=60, /* int: tune printk ratelimiting */ - KERN_PRINTK_RATELIMIT_BURST=61, /* int: tune printk ratelimiting */ - KERN_PTY=62, /* dir: pty driver */ - KERN_NGROUPS_MAX=63, /* int: NGROUPS_MAX */ - KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */ - KERN_HZ_TIMER=65, /* int: hz timer on or off */ - KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */ - KERN_BOOTLOADER_TYPE=67, /* int: boot loader type */ - KERN_RANDOMIZE=68, /* int: randomize virtual address space */ - KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ - KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ - KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ - KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ - KERN_COMPAT_LOG=73, /* int: print compat layer messages */ - KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */ - KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ - KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ - KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */ -}; - - - -/* CTL_VM names: */ -enum -{ - VM_UNUSED1=1, /* was: struct: Set vm swapping control */ - VM_UNUSED2=2, /* was; int: Linear or sqrt() swapout for hogs */ - VM_UNUSED3=3, /* was: struct: Set free page thresholds */ - VM_UNUSED4=4, /* Spare */ - VM_OVERCOMMIT_MEMORY=5, /* Turn off the virtual memory safety limit */ - VM_UNUSED5=6, /* was: struct: Set buffer memory thresholds */ - VM_UNUSED7=7, /* was: struct: Set cache memory thresholds */ - VM_UNUSED8=8, /* was: struct: Control kswapd behaviour */ - VM_UNUSED9=9, /* was: struct: Set page table cache parameters */ - VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */ - VM_DIRTY_BACKGROUND=11, /* dirty_background_ratio */ - VM_DIRTY_RATIO=12, /* dirty_ratio */ - VM_DIRTY_WB_CS=13, /* dirty_writeback_centisecs */ - VM_DIRTY_EXPIRE_CS=14, /* dirty_expire_centisecs */ - VM_NR_PDFLUSH_THREADS=15, /* nr_pdflush_threads */ - VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */ - VM_PAGEBUF=17, /* struct: Control pagebuf parameters */ - VM_HUGETLB_PAGES=18, /* int: Number of available Huge Pages */ - VM_SWAPPINESS=19, /* Tendency to steal mapped memory */ - VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones */ - VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ - VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ - VM_LAPTOP_MODE=23, /* vm laptop mode */ - VM_BLOCK_DUMP=24, /* block dump mode */ - VM_HUGETLB_GROUP=25, /* permitted hugetlb group */ - VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ - VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ - VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ - VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ - VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ - VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ - VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ - VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ - VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ - VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ -}; - - -/* CTL_NET names: */ -enum -{ - NET_CORE=1, - NET_ETHER=2, - NET_802=3, - NET_UNIX=4, - NET_IPV4=5, - NET_IPX=6, - NET_ATALK=7, - NET_NETROM=8, - NET_AX25=9, - NET_BRIDGE=10, - NET_ROSE=11, - NET_IPV6=12, - NET_X25=13, - NET_TR=14, - NET_DECNET=15, - NET_ECONET=16, - NET_SCTP=17, - NET_LLC=18, - NET_NETFILTER=19, - NET_DCCP=20, - NET_IRDA=412, -}; - -/* /proc/sys/kernel/random */ -enum -{ - RANDOM_POOLSIZE=1, - RANDOM_ENTROPY_COUNT=2, - RANDOM_READ_THRESH=3, - RANDOM_WRITE_THRESH=4, - RANDOM_BOOT_ID=5, - RANDOM_UUID=6 -}; - -/* /proc/sys/kernel/pty */ -enum -{ - PTY_MAX=1, - PTY_NR=2 -}; - -/* /proc/sys/bus/isa */ -enum -{ - BUS_ISA_MEM_BASE=1, - BUS_ISA_PORT_BASE=2, - BUS_ISA_PORT_SHIFT=3 -}; - -/* /proc/sys/net/core */ -enum -{ - NET_CORE_WMEM_MAX=1, - NET_CORE_RMEM_MAX=2, - NET_CORE_WMEM_DEFAULT=3, - NET_CORE_RMEM_DEFAULT=4, -/* was NET_CORE_DESTROY_DELAY */ - NET_CORE_MAX_BACKLOG=6, - NET_CORE_FASTROUTE=7, - NET_CORE_MSG_COST=8, - NET_CORE_MSG_BURST=9, - NET_CORE_OPTMEM_MAX=10, - NET_CORE_HOT_LIST_LENGTH=11, - NET_CORE_DIVERT_VERSION=12, - NET_CORE_NO_CONG_THRESH=13, - NET_CORE_NO_CONG=14, - NET_CORE_LO_CONG=15, - NET_CORE_MOD_CONG=16, - NET_CORE_DEV_WEIGHT=17, - NET_CORE_SOMAXCONN=18, - NET_CORE_BUDGET=19, - NET_CORE_AEVENT_ETIME=20, - NET_CORE_AEVENT_RSEQTH=21, - NET_CORE_WARNINGS=22, -}; - -/* /proc/sys/net/ethernet */ - -/* /proc/sys/net/802 */ - -/* /proc/sys/net/unix */ - -enum -{ - NET_UNIX_DESTROY_DELAY=1, - NET_UNIX_DELETE_DELAY=2, - NET_UNIX_MAX_DGRAM_QLEN=3, -}; - -/* /proc/sys/net/netfilter */ -enum -{ - NET_NF_CONNTRACK_MAX=1, - NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2, - NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3, - NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4, - NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5, - NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6, - NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7, - NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8, - NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9, - NET_NF_CONNTRACK_UDP_TIMEOUT=10, - NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11, - NET_NF_CONNTRACK_ICMP_TIMEOUT=12, - NET_NF_CONNTRACK_GENERIC_TIMEOUT=13, - NET_NF_CONNTRACK_BUCKETS=14, - NET_NF_CONNTRACK_LOG_INVALID=15, - NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, - NET_NF_CONNTRACK_TCP_LOOSE=17, - NET_NF_CONNTRACK_TCP_BE_LIBERAL=18, - NET_NF_CONNTRACK_TCP_MAX_RETRANS=19, - NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, - NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, - NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, - NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, - NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, - NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, - NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, - NET_NF_CONNTRACK_COUNT=27, - NET_NF_CONNTRACK_ICMPV6_TIMEOUT=28, - NET_NF_CONNTRACK_FRAG6_TIMEOUT=29, - NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30, - NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31, - NET_NF_CONNTRACK_CHECKSUM=32, -}; - -/* /proc/sys/net/ipv4 */ -enum -{ - /* v2.0 compatibile variables */ - NET_IPV4_FORWARD=8, - NET_IPV4_DYNADDR=9, - - NET_IPV4_CONF=16, - NET_IPV4_NEIGH=17, - NET_IPV4_ROUTE=18, - NET_IPV4_FIB_HASH=19, - NET_IPV4_NETFILTER=20, - - NET_IPV4_TCP_TIMESTAMPS=33, - NET_IPV4_TCP_WINDOW_SCALING=34, - NET_IPV4_TCP_SACK=35, - NET_IPV4_TCP_RETRANS_COLLAPSE=36, - NET_IPV4_DEFAULT_TTL=37, - NET_IPV4_AUTOCONFIG=38, - NET_IPV4_NO_PMTU_DISC=39, - NET_IPV4_TCP_SYN_RETRIES=40, - NET_IPV4_IPFRAG_HIGH_THRESH=41, - NET_IPV4_IPFRAG_LOW_THRESH=42, - NET_IPV4_IPFRAG_TIME=43, - NET_IPV4_TCP_MAX_KA_PROBES=44, - NET_IPV4_TCP_KEEPALIVE_TIME=45, - NET_IPV4_TCP_KEEPALIVE_PROBES=46, - NET_IPV4_TCP_RETRIES1=47, - NET_IPV4_TCP_RETRIES2=48, - NET_IPV4_TCP_FIN_TIMEOUT=49, - NET_IPV4_IP_MASQ_DEBUG=50, - NET_TCP_SYNCOOKIES=51, - NET_TCP_STDURG=52, - NET_TCP_RFC1337=53, - NET_TCP_SYN_TAILDROP=54, - NET_TCP_MAX_SYN_BACKLOG=55, - NET_IPV4_LOCAL_PORT_RANGE=56, - NET_IPV4_ICMP_ECHO_IGNORE_ALL=57, - NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS=58, - NET_IPV4_ICMP_SOURCEQUENCH_RATE=59, - NET_IPV4_ICMP_DESTUNREACH_RATE=60, - NET_IPV4_ICMP_TIMEEXCEED_RATE=61, - NET_IPV4_ICMP_PARAMPROB_RATE=62, - NET_IPV4_ICMP_ECHOREPLY_RATE=63, - NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES=64, - NET_IPV4_IGMP_MAX_MEMBERSHIPS=65, - NET_TCP_TW_RECYCLE=66, - NET_IPV4_ALWAYS_DEFRAG=67, - NET_IPV4_TCP_KEEPALIVE_INTVL=68, - NET_IPV4_INET_PEER_THRESHOLD=69, - NET_IPV4_INET_PEER_MINTTL=70, - NET_IPV4_INET_PEER_MAXTTL=71, - NET_IPV4_INET_PEER_GC_MINTIME=72, - NET_IPV4_INET_PEER_GC_MAXTIME=73, - NET_TCP_ORPHAN_RETRIES=74, - NET_TCP_ABORT_ON_OVERFLOW=75, - NET_TCP_SYNACK_RETRIES=76, - NET_TCP_MAX_ORPHANS=77, - NET_TCP_MAX_TW_BUCKETS=78, - NET_TCP_FACK=79, - NET_TCP_REORDERING=80, - NET_TCP_ECN=81, - NET_TCP_DSACK=82, - NET_TCP_MEM=83, - NET_TCP_WMEM=84, - NET_TCP_RMEM=85, - NET_TCP_APP_WIN=86, - NET_TCP_ADV_WIN_SCALE=87, - NET_IPV4_NONLOCAL_BIND=88, - NET_IPV4_ICMP_RATELIMIT=89, - NET_IPV4_ICMP_RATEMASK=90, - NET_TCP_TW_REUSE=91, - NET_TCP_FRTO=92, - NET_TCP_LOW_LATENCY=93, - NET_IPV4_IPFRAG_SECRET_INTERVAL=94, - NET_IPV4_IGMP_MAX_MSF=96, - NET_TCP_NO_METRICS_SAVE=97, - NET_TCP_DEFAULT_WIN_SCALE=105, - NET_TCP_MODERATE_RCVBUF=106, - NET_TCP_TSO_WIN_DIVISOR=107, - NET_TCP_BIC_BETA=108, - NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, - NET_TCP_CONG_CONTROL=110, - NET_TCP_ABC=111, - NET_IPV4_IPFRAG_MAX_DIST=112, - NET_TCP_MTU_PROBING=113, - NET_TCP_BASE_MSS=114, - NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, - NET_TCP_DMA_COPYBREAK=116, - NET_TCP_SLOW_START_AFTER_IDLE=117, - NET_CIPSOV4_CACHE_ENABLE=118, - NET_CIPSOV4_CACHE_BUCKET_SIZE=119, - NET_CIPSOV4_RBM_OPTFMT=120, - NET_CIPSOV4_RBM_STRICTVALID=121, - NET_TCP_AVAIL_CONG_CONTROL=122, - NET_TCP_ALLOWED_CONG_CONTROL=123, - NET_TCP_MAX_SSTHRESH=124, - NET_TCP_FRTO_RESPONSE=125, -}; - -enum { - NET_IPV4_ROUTE_FLUSH=1, - NET_IPV4_ROUTE_MIN_DELAY=2, /* obsolete since 2.6.25 */ - NET_IPV4_ROUTE_MAX_DELAY=3, /* obsolete since 2.6.25 */ - NET_IPV4_ROUTE_GC_THRESH=4, - NET_IPV4_ROUTE_MAX_SIZE=5, - NET_IPV4_ROUTE_GC_MIN_INTERVAL=6, - NET_IPV4_ROUTE_GC_TIMEOUT=7, - NET_IPV4_ROUTE_GC_INTERVAL=8, /* obsolete since 2.6.38 */ - NET_IPV4_ROUTE_REDIRECT_LOAD=9, - NET_IPV4_ROUTE_REDIRECT_NUMBER=10, - NET_IPV4_ROUTE_REDIRECT_SILENCE=11, - NET_IPV4_ROUTE_ERROR_COST=12, - NET_IPV4_ROUTE_ERROR_BURST=13, - NET_IPV4_ROUTE_GC_ELASTICITY=14, - NET_IPV4_ROUTE_MTU_EXPIRES=15, - NET_IPV4_ROUTE_MIN_PMTU=16, - NET_IPV4_ROUTE_MIN_ADVMSS=17, - NET_IPV4_ROUTE_SECRET_INTERVAL=18, - NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS=19, -}; - -enum -{ - NET_PROTO_CONF_ALL=-2, - NET_PROTO_CONF_DEFAULT=-3 - - /* And device ifindices ... */ -}; - -enum -{ - NET_IPV4_CONF_FORWARDING=1, - NET_IPV4_CONF_MC_FORWARDING=2, - NET_IPV4_CONF_PROXY_ARP=3, - NET_IPV4_CONF_ACCEPT_REDIRECTS=4, - NET_IPV4_CONF_SECURE_REDIRECTS=5, - NET_IPV4_CONF_SEND_REDIRECTS=6, - NET_IPV4_CONF_SHARED_MEDIA=7, - NET_IPV4_CONF_RP_FILTER=8, - NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE=9, - NET_IPV4_CONF_BOOTP_RELAY=10, - NET_IPV4_CONF_LOG_MARTIANS=11, - NET_IPV4_CONF_TAG=12, - NET_IPV4_CONF_ARPFILTER=13, - NET_IPV4_CONF_MEDIUM_ID=14, - NET_IPV4_CONF_NOXFRM=15, - NET_IPV4_CONF_NOPOLICY=16, - NET_IPV4_CONF_FORCE_IGMP_VERSION=17, - NET_IPV4_CONF_ARP_ANNOUNCE=18, - NET_IPV4_CONF_ARP_IGNORE=19, - NET_IPV4_CONF_PROMOTE_SECONDARIES=20, - NET_IPV4_CONF_ARP_ACCEPT=21, - NET_IPV4_CONF_ARP_NOTIFY=22, -}; - -/* /proc/sys/net/ipv4/netfilter */ -enum -{ - NET_IPV4_NF_CONNTRACK_MAX=1, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9, - NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT=10, - NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11, - NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT=12, - NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT=13, - NET_IPV4_NF_CONNTRACK_BUCKETS=14, - NET_IPV4_NF_CONNTRACK_LOG_INVALID=15, - NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, - NET_IPV4_NF_CONNTRACK_TCP_LOOSE=17, - NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL=18, - NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS=19, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, - NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, - NET_IPV4_NF_CONNTRACK_COUNT=27, - NET_IPV4_NF_CONNTRACK_CHECKSUM=28, -}; - -/* /proc/sys/net/ipv6 */ -enum { - NET_IPV6_CONF=16, - NET_IPV6_NEIGH=17, - NET_IPV6_ROUTE=18, - NET_IPV6_ICMP=19, - NET_IPV6_BINDV6ONLY=20, - NET_IPV6_IP6FRAG_HIGH_THRESH=21, - NET_IPV6_IP6FRAG_LOW_THRESH=22, - NET_IPV6_IP6FRAG_TIME=23, - NET_IPV6_IP6FRAG_SECRET_INTERVAL=24, - NET_IPV6_MLD_MAX_MSF=25, -}; - -enum { - NET_IPV6_ROUTE_FLUSH=1, - NET_IPV6_ROUTE_GC_THRESH=2, - NET_IPV6_ROUTE_MAX_SIZE=3, - NET_IPV6_ROUTE_GC_MIN_INTERVAL=4, - NET_IPV6_ROUTE_GC_TIMEOUT=5, - NET_IPV6_ROUTE_GC_INTERVAL=6, - NET_IPV6_ROUTE_GC_ELASTICITY=7, - NET_IPV6_ROUTE_MTU_EXPIRES=8, - NET_IPV6_ROUTE_MIN_ADVMSS=9, - NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS=10 -}; - -enum { - NET_IPV6_FORWARDING=1, - NET_IPV6_HOP_LIMIT=2, - NET_IPV6_MTU=3, - NET_IPV6_ACCEPT_RA=4, - NET_IPV6_ACCEPT_REDIRECTS=5, - NET_IPV6_AUTOCONF=6, - NET_IPV6_DAD_TRANSMITS=7, - NET_IPV6_RTR_SOLICITS=8, - NET_IPV6_RTR_SOLICIT_INTERVAL=9, - NET_IPV6_RTR_SOLICIT_DELAY=10, - NET_IPV6_USE_TEMPADDR=11, - NET_IPV6_TEMP_VALID_LFT=12, - NET_IPV6_TEMP_PREFERED_LFT=13, - NET_IPV6_REGEN_MAX_RETRY=14, - NET_IPV6_MAX_DESYNC_FACTOR=15, - NET_IPV6_MAX_ADDRESSES=16, - NET_IPV6_FORCE_MLD_VERSION=17, - NET_IPV6_ACCEPT_RA_DEFRTR=18, - NET_IPV6_ACCEPT_RA_PINFO=19, - NET_IPV6_ACCEPT_RA_RTR_PREF=20, - NET_IPV6_RTR_PROBE_INTERVAL=21, - NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, - NET_IPV6_PROXY_NDP=23, - NET_IPV6_ACCEPT_SOURCE_ROUTE=25, - NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, - NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, - __NET_IPV6_MAX -}; - -/* /proc/sys/net/ipv6/icmp */ -enum { - NET_IPV6_ICMP_RATELIMIT=1 -}; - -/* /proc/sys/net//neigh/ */ -enum { - NET_NEIGH_MCAST_SOLICIT=1, - NET_NEIGH_UCAST_SOLICIT=2, - NET_NEIGH_APP_SOLICIT=3, - NET_NEIGH_RETRANS_TIME=4, - NET_NEIGH_REACHABLE_TIME=5, - NET_NEIGH_DELAY_PROBE_TIME=6, - NET_NEIGH_GC_STALE_TIME=7, - NET_NEIGH_UNRES_QLEN=8, - NET_NEIGH_PROXY_QLEN=9, - NET_NEIGH_ANYCAST_DELAY=10, - NET_NEIGH_PROXY_DELAY=11, - NET_NEIGH_LOCKTIME=12, - NET_NEIGH_GC_INTERVAL=13, - NET_NEIGH_GC_THRESH1=14, - NET_NEIGH_GC_THRESH2=15, - NET_NEIGH_GC_THRESH3=16, - NET_NEIGH_RETRANS_TIME_MS=17, - NET_NEIGH_REACHABLE_TIME_MS=18, -}; - -/* /proc/sys/net/dccp */ -enum { - NET_DCCP_DEFAULT=1, -}; - -/* /proc/sys/net/ipx */ -enum { - NET_IPX_PPROP_BROADCASTING=1, - NET_IPX_FORWARDING=2 -}; - -/* /proc/sys/net/llc */ -enum { - NET_LLC2=1, - NET_LLC_STATION=2, -}; - -/* /proc/sys/net/llc/llc2 */ -enum { - NET_LLC2_TIMEOUT=1, -}; - -/* /proc/sys/net/llc/station */ -enum { - NET_LLC_STATION_ACK_TIMEOUT=1, -}; - -/* /proc/sys/net/llc/llc2/timeout */ -enum { - NET_LLC2_ACK_TIMEOUT=1, - NET_LLC2_P_TIMEOUT=2, - NET_LLC2_REJ_TIMEOUT=3, - NET_LLC2_BUSY_TIMEOUT=4, -}; - -/* /proc/sys/net/appletalk */ -enum { - NET_ATALK_AARP_EXPIRY_TIME=1, - NET_ATALK_AARP_TICK_TIME=2, - NET_ATALK_AARP_RETRANSMIT_LIMIT=3, - NET_ATALK_AARP_RESOLVE_TIME=4 -}; - - -/* /proc/sys/net/netrom */ -enum { - NET_NETROM_DEFAULT_PATH_QUALITY=1, - NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER=2, - NET_NETROM_NETWORK_TTL_INITIALISER=3, - NET_NETROM_TRANSPORT_TIMEOUT=4, - NET_NETROM_TRANSPORT_MAXIMUM_TRIES=5, - NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY=6, - NET_NETROM_TRANSPORT_BUSY_DELAY=7, - NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE=8, - NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT=9, - NET_NETROM_ROUTING_CONTROL=10, - NET_NETROM_LINK_FAILS_COUNT=11, - NET_NETROM_RESET=12 -}; - -/* /proc/sys/net/ax25 */ -enum { - NET_AX25_IP_DEFAULT_MODE=1, - NET_AX25_DEFAULT_MODE=2, - NET_AX25_BACKOFF_TYPE=3, - NET_AX25_CONNECT_MODE=4, - NET_AX25_STANDARD_WINDOW=5, - NET_AX25_EXTENDED_WINDOW=6, - NET_AX25_T1_TIMEOUT=7, - NET_AX25_T2_TIMEOUT=8, - NET_AX25_T3_TIMEOUT=9, - NET_AX25_IDLE_TIMEOUT=10, - NET_AX25_N2=11, - NET_AX25_PACLEN=12, - NET_AX25_PROTOCOL=13, - NET_AX25_DAMA_SLAVE_TIMEOUT=14 -}; - -/* /proc/sys/net/rose */ -enum { - NET_ROSE_RESTART_REQUEST_TIMEOUT=1, - NET_ROSE_CALL_REQUEST_TIMEOUT=2, - NET_ROSE_RESET_REQUEST_TIMEOUT=3, - NET_ROSE_CLEAR_REQUEST_TIMEOUT=4, - NET_ROSE_ACK_HOLD_BACK_TIMEOUT=5, - NET_ROSE_ROUTING_CONTROL=6, - NET_ROSE_LINK_FAIL_TIMEOUT=7, - NET_ROSE_MAX_VCS=8, - NET_ROSE_WINDOW_SIZE=9, - NET_ROSE_NO_ACTIVITY_TIMEOUT=10 -}; - -/* /proc/sys/net/x25 */ -enum { - NET_X25_RESTART_REQUEST_TIMEOUT=1, - NET_X25_CALL_REQUEST_TIMEOUT=2, - NET_X25_RESET_REQUEST_TIMEOUT=3, - NET_X25_CLEAR_REQUEST_TIMEOUT=4, - NET_X25_ACK_HOLD_BACK_TIMEOUT=5, - NET_X25_FORWARD=6 -}; - -/* /proc/sys/net/token-ring */ -enum -{ - NET_TR_RIF_TIMEOUT=1 -}; - -/* /proc/sys/net/decnet/ */ -enum { - NET_DECNET_NODE_TYPE = 1, - NET_DECNET_NODE_ADDRESS = 2, - NET_DECNET_NODE_NAME = 3, - NET_DECNET_DEFAULT_DEVICE = 4, - NET_DECNET_TIME_WAIT = 5, - NET_DECNET_DN_COUNT = 6, - NET_DECNET_DI_COUNT = 7, - NET_DECNET_DR_COUNT = 8, - NET_DECNET_DST_GC_INTERVAL = 9, - NET_DECNET_CONF = 10, - NET_DECNET_NO_FC_MAX_CWND = 11, - NET_DECNET_MEM = 12, - NET_DECNET_RMEM = 13, - NET_DECNET_WMEM = 14, - NET_DECNET_DEBUG_LEVEL = 255 -}; - -/* /proc/sys/net/decnet/conf/ */ -enum { - NET_DECNET_CONF_LOOPBACK = -2, - NET_DECNET_CONF_DDCMP = -3, - NET_DECNET_CONF_PPP = -4, - NET_DECNET_CONF_X25 = -5, - NET_DECNET_CONF_GRE = -6, - NET_DECNET_CONF_ETHER = -7 - - /* ... and ifindex of devices */ -}; - -/* /proc/sys/net/decnet/conf// */ -enum { - NET_DECNET_CONF_DEV_PRIORITY = 1, - NET_DECNET_CONF_DEV_T1 = 2, - NET_DECNET_CONF_DEV_T2 = 3, - NET_DECNET_CONF_DEV_T3 = 4, - NET_DECNET_CONF_DEV_FORWARDING = 5, - NET_DECNET_CONF_DEV_BLKSIZE = 6, - NET_DECNET_CONF_DEV_STATE = 7 -}; - -/* /proc/sys/net/sctp */ -enum { - NET_SCTP_RTO_INITIAL = 1, - NET_SCTP_RTO_MIN = 2, - NET_SCTP_RTO_MAX = 3, - NET_SCTP_RTO_ALPHA = 4, - NET_SCTP_RTO_BETA = 5, - NET_SCTP_VALID_COOKIE_LIFE = 6, - NET_SCTP_ASSOCIATION_MAX_RETRANS = 7, - NET_SCTP_PATH_MAX_RETRANS = 8, - NET_SCTP_MAX_INIT_RETRANSMITS = 9, - NET_SCTP_HB_INTERVAL = 10, - NET_SCTP_PRESERVE_ENABLE = 11, - NET_SCTP_MAX_BURST = 12, - NET_SCTP_ADDIP_ENABLE = 13, - NET_SCTP_PRSCTP_ENABLE = 14, - NET_SCTP_SNDBUF_POLICY = 15, - NET_SCTP_SACK_TIMEOUT = 16, - NET_SCTP_RCVBUF_POLICY = 17, -}; - -/* /proc/sys/net/bridge */ -enum { - NET_BRIDGE_NF_CALL_ARPTABLES = 1, - NET_BRIDGE_NF_CALL_IPTABLES = 2, - NET_BRIDGE_NF_CALL_IP6TABLES = 3, - NET_BRIDGE_NF_FILTER_VLAN_TAGGED = 4, - NET_BRIDGE_NF_FILTER_PPPOE_TAGGED = 5, -}; - -/* proc/sys/net/irda */ -enum { - NET_IRDA_DISCOVERY=1, - NET_IRDA_DEVNAME=2, - NET_IRDA_DEBUG=3, - NET_IRDA_FAST_POLL=4, - NET_IRDA_DISCOVERY_SLOTS=5, - NET_IRDA_DISCOVERY_TIMEOUT=6, - NET_IRDA_SLOT_TIMEOUT=7, - NET_IRDA_MAX_BAUD_RATE=8, - NET_IRDA_MIN_TX_TURN_TIME=9, - NET_IRDA_MAX_TX_DATA_SIZE=10, - NET_IRDA_MAX_TX_WINDOW=11, - NET_IRDA_MAX_NOREPLY_TIME=12, - NET_IRDA_WARN_NOREPLY_TIME=13, - NET_IRDA_LAP_KEEPALIVE_TIME=14, -}; - - -/* CTL_FS names: */ -enum -{ - FS_NRINODE=1, /* int:current number of allocated inodes */ - FS_STATINODE=2, - FS_MAXINODE=3, /* int:maximum number of inodes that can be allocated */ - FS_NRDQUOT=4, /* int:current number of allocated dquots */ - FS_MAXDQUOT=5, /* int:maximum number of dquots that can be allocated */ - FS_NRFILE=6, /* int:current number of allocated filedescriptors */ - FS_MAXFILE=7, /* int:maximum number of filedescriptors that can be allocated */ - FS_DENTRY=8, - FS_NRSUPER=9, /* int:current number of allocated super_blocks */ - FS_MAXSUPER=10, /* int:maximum number of super_blocks that can be allocated */ - FS_OVERFLOWUID=11, /* int: overflow UID */ - FS_OVERFLOWGID=12, /* int: overflow GID */ - FS_LEASES=13, /* int: leases enabled */ - FS_DIR_NOTIFY=14, /* int: directory notification enabled */ - FS_LEASE_TIME=15, /* int: maximum time to wait for a lease break */ - FS_DQSTATS=16, /* disc quota usage statistics and control */ - FS_XFS=17, /* struct: control xfs parameters */ - FS_AIO_NR=18, /* current system-wide number of aio requests */ - FS_AIO_MAX_NR=19, /* system-wide maximum number of aio requests */ - FS_INOTIFY=20, /* inotify submenu */ - FS_OCFS2=988, /* ocfs2 */ -}; - -/* /proc/sys/fs/quota/ */ -enum { - FS_DQ_LOOKUPS = 1, - FS_DQ_DROPS = 2, - FS_DQ_READS = 3, - FS_DQ_WRITES = 4, - FS_DQ_CACHE_HITS = 5, - FS_DQ_ALLOCATED = 6, - FS_DQ_FREE = 7, - FS_DQ_SYNCS = 8, - FS_DQ_WARNINGS = 9, -}; - -/* CTL_DEBUG names: */ - -/* CTL_DEV names: */ -enum { - DEV_CDROM=1, - DEV_HWMON=2, - DEV_PARPORT=3, - DEV_RAID=4, - DEV_MAC_HID=5, - DEV_SCSI=6, - DEV_IPMI=7, -}; - -/* /proc/sys/dev/cdrom */ -enum { - DEV_CDROM_INFO=1, - DEV_CDROM_AUTOCLOSE=2, - DEV_CDROM_AUTOEJECT=3, - DEV_CDROM_DEBUG=4, - DEV_CDROM_LOCK=5, - DEV_CDROM_CHECK_MEDIA=6 -}; - -/* /proc/sys/dev/parport */ -enum { - DEV_PARPORT_DEFAULT=-3 -}; - -/* /proc/sys/dev/raid */ -enum { - DEV_RAID_SPEED_LIMIT_MIN=1, - DEV_RAID_SPEED_LIMIT_MAX=2 -}; - -/* /proc/sys/dev/parport/default */ -enum { - DEV_PARPORT_DEFAULT_TIMESLICE=1, - DEV_PARPORT_DEFAULT_SPINTIME=2 -}; - -/* /proc/sys/dev/parport/parport n */ -enum { - DEV_PARPORT_SPINTIME=1, - DEV_PARPORT_BASE_ADDR=2, - DEV_PARPORT_IRQ=3, - DEV_PARPORT_DMA=4, - DEV_PARPORT_MODES=5, - DEV_PARPORT_DEVICES=6, - DEV_PARPORT_AUTOPROBE=16 -}; - -/* /proc/sys/dev/parport/parport n/devices/ */ -enum { - DEV_PARPORT_DEVICES_ACTIVE=-3, -}; - -/* /proc/sys/dev/parport/parport n/devices/device n */ -enum { - DEV_PARPORT_DEVICE_TIMESLICE=1, -}; - -/* /proc/sys/dev/mac_hid */ -enum { - DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES=1, - DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES=2, - DEV_MAC_HID_MOUSE_BUTTON_EMULATION=3, - DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE=4, - DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE=5, - DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES=6 -}; - -/* /proc/sys/dev/scsi */ -enum { - DEV_SCSI_LOGGING_LEVEL=1, -}; - -/* /proc/sys/dev/ipmi */ -enum { - DEV_IPMI_POWEROFF_POWERCYCLE=1, -}; - -/* /proc/sys/abi */ -enum -{ - ABI_DEFHANDLER_COFF=1, /* default handler for coff binaries */ - ABI_DEFHANDLER_ELF=2, /* default handler for ELF binaries */ - ABI_DEFHANDLER_LCALL7=3,/* default handler for procs using lcall7 */ - ABI_DEFHANDLER_LIBCSO=4,/* default handler for an libc.so ELF interp */ - ABI_TRACE=5, /* tracing flags */ - ABI_FAKE_UTSNAME=6, /* fake target utsname information */ -}; - - -#endif /* _LINUX_SYSCTL_H */ From ad4b1425c31821d3dcd3484ce75114c4be703313 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Fri, 21 Apr 2017 14:14:53 -0400 Subject: [PATCH 19/22] iplink: Expose IFLA_*_FWMARK attributes for supported link types This attribute allows the administrator to adjust the packet marking attribute of tunnels that support policy based routing. Signed-off-by: Craig Gallek --- ip/link_gre.c | 16 ++++++++++++++++ ip/link_gre6.c | 24 +++++++++++++++++++++++- ip/link_ip6tnl.c | 23 ++++++++++++++++++----- ip/link_iptnl.c | 16 ++++++++++++++++ ip/link_vti.c | 16 ++++++++++++++++ ip/link_vti6.c | 15 +++++++++++++++ 6 files changed, 104 insertions(+), 6 deletions(-) diff --git a/ip/link_gre.c b/ip/link_gre.c index 35d437a1..82df9006 100644 --- a/ip/link_gre.c +++ b/ip/link_gre.c @@ -42,11 +42,13 @@ static void print_usage(FILE *f) " [ [no]encap-csum ]\n" " [ [no]encap-csum6 ]\n" " [ [no]encap-remcsum ]\n" + " [ fwmark MARK ]\n" "\n" "Where: ADDR := { IP_ADDRESS | any }\n" " TOS := { NUMBER | inherit }\n" " TTL := { 1..255 | inherit }\n" " KEY := { DOTTED_QUAD | NUMBER }\n" + " MARK := { 0x0..0xffffffff }\n" ); } @@ -91,6 +93,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv, __u16 encapsport = 0; __u16 encapdport = 0; __u8 metadata = 0; + __u32 fwmark = 0; if (!(n->nlmsg_flags & NLM_F_CREATE)) { if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { @@ -160,6 +163,9 @@ get_failed: if (greinfo[IFLA_GRE_COLLECT_METADATA]) metadata = 1; + + if (greinfo[IFLA_GRE_FWMARK]) + fwmark = rta_getattr_u32(greinfo[IFLA_GRE_FWMARK]); } while (argc > 0) { @@ -305,6 +311,10 @@ get_failed: encapflags |= ~TUNNEL_ENCAP_FLAG_REMCSUM; } else if (strcmp(*argv, "external") == 0) { metadata = 1; + } else if (strcmp(*argv, "fwmark") == 0) { + NEXT_ARG(); + if (get_u32(&fwmark, *argv, 0)) + invarg("invalid fwmark\n", *argv); } else usage(); argc--; argv++; @@ -335,6 +345,7 @@ get_failed: addattr32(n, 1024, IFLA_GRE_LINK, link); addattr_l(n, 1024, IFLA_GRE_TTL, &ttl, 1); addattr_l(n, 1024, IFLA_GRE_TOS, &tos, 1); + addattr32(n, 1024, IFLA_GRE_FWMARK, fwmark); } else { addattr_l(n, 1024, IFLA_GRE_COLLECT_METADATA, NULL, 0); } @@ -426,6 +437,11 @@ static void gre_print_direct_opt(FILE *f, struct rtattr *tb[]) fputs("icsum ", f); if (oflags & GRE_CSUM) fputs("ocsum ", f); + + if (tb[IFLA_GRE_FWMARK] && rta_getattr_u32(tb[IFLA_GRE_FWMARK])) { + fprintf(f, "fwmark 0x%x ", + rta_getattr_u32(tb[IFLA_GRE_FWMARK])); + } } static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) diff --git a/ip/link_gre6.c b/ip/link_gre6.c index 1b4fb051..205bada7 100644 --- a/ip/link_gre6.c +++ b/ip/link_gre6.c @@ -43,6 +43,7 @@ static void print_usage(FILE *f) " [ tclass TCLASS ]\n" " [ flowlabel FLOWLABEL ]\n" " [ dscp inherit ]\n" + " [ fwmark MARK ]\n" " [ dev PHYS_DEV ]\n" " [ noencap ]\n" " [ encap { fou | gue | none } ]\n" @@ -57,7 +58,8 @@ static void print_usage(FILE *f) " KEY := { DOTTED_QUAD | NUMBER }\n" " ELIM := { none | 0..255 }(default=%d)\n" " TCLASS := { 0x0..0xff | inherit }\n" - " FLOWLABEL := { 0x0..0xfffff | inherit }\n", + " FLOWLABEL := { 0x0..0xfffff | inherit }\n" + " MARK := { 0x0..0xffffffff | inherit }\n", DEFAULT_TNL_HOP_LIMIT, IPV6_DEFAULT_TNL_ENCAP_LIMIT ); } @@ -103,6 +105,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv, __u16 encapsport = 0; __u16 encapdport = 0; int len; + __u32 fwmark = 0; if (!(n->nlmsg_flags & NLM_F_CREATE)) { if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { @@ -174,6 +177,9 @@ get_failed: if (greinfo[IFLA_GRE_ENCAP_DPORT]) encapdport = rta_getattr_u16(greinfo[IFLA_GRE_ENCAP_DPORT]); + + if (greinfo[IFLA_GRE_FWMARK]) + fwmark = rta_getattr_u32(greinfo[IFLA_GRE_FWMARK]); } while (argc > 0) { @@ -339,6 +345,16 @@ get_failed: encapflags |= TUNNEL_ENCAP_FLAG_REMCSUM; } else if (strcmp(*argv, "noencap-remcsum") == 0) { encapflags &= ~TUNNEL_ENCAP_FLAG_REMCSUM; + } else if (strcmp(*argv, "fwmark") == 0) { + NEXT_ARG(); + if (strcmp(*argv, "inherit") == 0) { + flags |= IP6_TNL_F_USE_ORIG_FWMARK; + fwmark = 0; + } else { + if (get_u32(&fwmark, *argv, 0)) + invarg("invalid fwmark\n", *argv); + flags &= ~IP6_TNL_F_USE_ORIG_FWMARK; + } } else usage(); argc--; argv++; @@ -356,6 +372,7 @@ get_failed: addattr_l(n, 1024, IFLA_GRE_ENCAP_LIMIT, &encap_limit, 1); addattr_l(n, 1024, IFLA_GRE_FLOWINFO, &flowinfo, 4); addattr32(n, 1024, IFLA_GRE_FLAGS, flags); + addattr32(n, 1024, IFLA_GRE_FWMARK, fwmark); addattr16(n, 1024, IFLA_GRE_ENCAP_TYPE, encaptype); addattr16(n, 1024, IFLA_GRE_ENCAP_FLAGS, encapflags); @@ -461,6 +478,11 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (oflags & GRE_CSUM) fputs("ocsum ", f); + if (flags & IP6_TNL_F_USE_ORIG_FWMARK) + fprintf(f, "fwmark inherit "); + else if (tb[IFLA_GRE_FWMARK] && rta_getattr_u32(tb[IFLA_GRE_FWMARK])) + fprintf(f, "fwmark 0x%x ", rta_getattr_u32(tb[IFLA_GRE_FWMARK])); + if (tb[IFLA_GRE_ENCAP_TYPE] && rta_getattr_u16(tb[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) { __u16 type = rta_getattr_u16(tb[IFLA_GRE_ENCAP_TYPE]); diff --git a/ip/link_ip6tnl.c b/ip/link_ip6tnl.c index 6bb968d3..505fb476 100644 --- a/ip/link_ip6tnl.c +++ b/ip/link_ip6tnl.c @@ -41,7 +41,7 @@ static void print_usage(FILE *f) " [ tclass TCLASS ]\n" " [ flowlabel FLOWLABEL ]\n" " [ dscp inherit ]\n" - " [ fwmark inherit ]\n" + " [ fwmark MARK ]\n" " [ noencap ]\n" " [ encap { fou | gue | none } ]\n" " [ encap-sport PORT ]\n" @@ -55,7 +55,8 @@ static void print_usage(FILE *f) " ELIM := { none | 0..255 }(default=%d)\n" " HLIM := 0..255 (default=%d)\n" " TCLASS := { 0x0..0xff | inherit }\n" - " FLOWLABEL := { 0x0..0xfffff | inherit }\n", + " FLOWLABEL := { 0x0..0xfffff | inherit }\n" + " MARK := { 0x0..0xffffffff | inherit }\n", IPV6_DEFAULT_TNL_ENCAP_LIMIT, DEFAULT_TNL_HOP_LIMIT ); } @@ -99,6 +100,7 @@ static int ip6tunnel_parse_opt(struct link_util *lu, int argc, char **argv, __u16 encapsport = 0; __u16 encapdport = 0; __u8 metadata = 0; + __u32 fwmark = 0; if (!(n->nlmsg_flags & NLM_F_CREATE)) { if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { @@ -153,6 +155,9 @@ get_failed: proto = rta_getattr_u8(iptuninfo[IFLA_IPTUN_PROTO]); if (iptuninfo[IFLA_IPTUN_COLLECT_METADATA]) metadata = 1; + + if (iptuninfo[IFLA_IPTUN_FWMARK]) + fwmark = rta_getattr_u32(iptuninfo[IFLA_IPTUN_FWMARK]); } while (argc > 0) { @@ -252,9 +257,14 @@ get_failed: flags |= IP6_TNL_F_RCV_DSCP_COPY; } else if (strcmp(*argv, "fwmark") == 0) { NEXT_ARG(); - if (strcmp(*argv, "inherit") != 0) - invarg("not inherit", *argv); - flags |= IP6_TNL_F_USE_ORIG_FWMARK; + if (strcmp(*argv, "inherit") == 0) { + flags |= IP6_TNL_F_USE_ORIG_FWMARK; + fwmark = 0; + } else { + if (get_u32(&fwmark, *argv, 0)) + invarg("invalid fwmark\n", *argv); + flags &= ~IP6_TNL_F_USE_ORIG_FWMARK; + } } else if (strcmp(*argv, "noencap") == 0) { encaptype = TUNNEL_ENCAP_NONE; } else if (strcmp(*argv, "encap") == 0) { @@ -308,6 +318,7 @@ get_failed: addattr32(n, 1024, IFLA_IPTUN_FLOWINFO, flowinfo); addattr32(n, 1024, IFLA_IPTUN_FLAGS, flags); addattr32(n, 1024, IFLA_IPTUN_LINK, link); + addattr32(n, 1024, IFLA_IPTUN_FWMARK, fwmark); addattr16(n, 1024, IFLA_IPTUN_ENCAP_TYPE, encaptype); addattr16(n, 1024, IFLA_IPTUN_ENCAP_FLAGS, encapflags); @@ -398,6 +409,8 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb if (flags & IP6_TNL_F_USE_ORIG_FWMARK) fprintf(f, "fwmark inherit "); + else if (tb[IFLA_IPTUN_FWMARK] && rta_getattr_u32(tb[IFLA_IPTUN_FWMARK])) + fprintf(f, "fwmark 0x%x ", rta_getattr_u32(tb[IFLA_IPTUN_FWMARK])); if (tb[IFLA_IPTUN_ENCAP_TYPE] && rta_getattr_u16(tb[IFLA_IPTUN_ENCAP_TYPE]) != diff --git a/ip/link_iptnl.c b/ip/link_iptnl.c index f180b921..2f74d9b7 100644 --- a/ip/link_iptnl.c +++ b/ip/link_iptnl.c @@ -52,10 +52,12 @@ static void print_usage(FILE *f, int sit) " [ isatap ]\n"); } fprintf(f, " [ external ]\n"); + fprintf(f, " [ fwmark MARK ]\n"); fprintf(f, "\n"); fprintf(f, "Where: ADDR := { IP_ADDRESS | any }\n"); fprintf(f, " TOS := { NUMBER | inherit }\n"); fprintf(f, " TTL := { 1..255 | inherit }\n"); + fprintf(f, " MARK := { 0x0..0xffffffff }\n"); } static void usage(int sit) __attribute__((noreturn)); @@ -101,6 +103,7 @@ static int iptunnel_parse_opt(struct link_util *lu, int argc, char **argv, __u16 encapsport = 0; __u16 encapdport = 0; __u8 metadata = 0; + __u32 fwmark = 0; if (!(n->nlmsg_flags & NLM_F_CREATE)) { if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) { @@ -179,6 +182,10 @@ get_failed: rta_getattr_u16(iptuninfo[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]); if (iptuninfo[IFLA_IPTUN_COLLECT_METADATA]) metadata = 1; + + if (iptuninfo[IFLA_IPTUN_FWMARK]) + fwmark = rta_getattr_u32(iptuninfo[IFLA_IPTUN_FWMARK]); + } while (argc > 0) { @@ -301,6 +308,10 @@ get_failed: ip6rdprefixlen = 16; ip6rdrelayprefix = 0; ip6rdrelayprefixlen = 0; + } else if (strcmp(*argv, "fwmark") == 0) { + NEXT_ARG(); + if (get_u32(&fwmark, *argv, 0)) + invarg("invalid fwmark\n", *argv); } else usage(strcmp(lu->id, "sit") == 0); argc--, argv++; @@ -322,6 +333,7 @@ get_failed: addattr8(n, 1024, IFLA_IPTUN_TTL, ttl); addattr8(n, 1024, IFLA_IPTUN_TOS, tos); addattr8(n, 1024, IFLA_IPTUN_PMTUDISC, pmtudisc); + addattr32(n, 1024, IFLA_IPTUN_FWMARK, fwmark); addattr16(n, 1024, IFLA_IPTUN_ENCAP_TYPE, encaptype); addattr16(n, 1024, IFLA_IPTUN_ENCAP_FLAGS, encapflags); @@ -471,6 +483,10 @@ static void iptunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[ else fputs("noencap-remcsum ", f); } + + if (tb[IFLA_IPTUN_FWMARK] && rta_getattr_u32(tb[IFLA_IPTUN_FWMARK])) + fprintf(f, "fwmark 0x%x ", + rta_getattr_u32(tb[IFLA_IPTUN_FWMARK])); } static void iptunnel_print_help(struct link_util *lu, int argc, char **argv, diff --git a/ip/link_vti.c b/ip/link_vti.c index 95bc23e9..d5242ac7 100644 --- a/ip/link_vti.c +++ b/ip/link_vti.c @@ -31,9 +31,11 @@ static void print_usage(FILE *f) " [ local ADDR ]\n" " [ [i|o]key KEY ]\n" " [ dev PHYS_DEV ]\n" + " [ fwmark MARK ]\n" "\n" "Where: ADDR := { IP_ADDRESS }\n" " KEY := { DOTTED_QUAD | NUMBER }\n" + " MARK := { 0x0..0xffffffff }\n" ); } @@ -67,6 +69,7 @@ static int vti_parse_opt(struct link_util *lu, int argc, char **argv, unsigned int saddr = 0; unsigned int daddr = 0; unsigned int link = 0; + unsigned int fwmark = 0; int len; if (!(n->nlmsg_flags & NLM_F_CREATE)) { @@ -109,6 +112,9 @@ get_failed: if (vtiinfo[IFLA_VTI_LINK]) link = rta_getattr_u8(vtiinfo[IFLA_VTI_LINK]); + + if (vtiinfo[IFLA_VTI_FWMARK]) + fwmark = rta_getattr_u32(vtiinfo[IFLA_VTI_FWMARK]); } while (argc > 0) { @@ -180,6 +186,10 @@ get_failed: *argv); exit(-1); } + } else if (strcmp(*argv, "fwmark") == 0) { + NEXT_ARG(); + if (get_u32(&fwmark, *argv, 0)) + invarg("invalid fwmark\n", *argv); } else usage(); argc--; argv++; @@ -189,6 +199,7 @@ get_failed: addattr32(n, 1024, IFLA_VTI_OKEY, okey); addattr_l(n, 1024, IFLA_VTI_LOCAL, &saddr, 4); addattr_l(n, 1024, IFLA_VTI_REMOTE, &daddr, 4); + addattr32(n, 1024, IFLA_VTI_FWMARK, fwmark); if (link) addattr32(n, 1024, IFLA_VTI_LINK, link); @@ -242,6 +253,11 @@ static void vti_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) inet_ntop(AF_INET, RTA_DATA(tb[IFLA_VTI_OKEY]), s2, sizeof(s2)); fprintf(f, "okey %s ", s2); } + + if (tb[IFLA_VTI_FWMARK] && rta_getattr_u32(tb[IFLA_VTI_FWMARK])) { + fprintf(f, "fwmark 0x%x ", + rta_getattr_u32(tb[IFLA_VTI_FWMARK])); + } } static void vti_print_help(struct link_util *lu, int argc, char **argv, diff --git a/ip/link_vti6.c b/ip/link_vti6.c index 9ca127af..be4e33ce 100644 --- a/ip/link_vti6.c +++ b/ip/link_vti6.c @@ -32,10 +32,12 @@ static void usage(void) fprintf(stderr, " type { vti6 } [ remote ADDR ] [ local ADDR ]\n"); fprintf(stderr, " [ [i|o]key KEY ]\n"); fprintf(stderr, " [ dev PHYS_DEV ]\n"); + fprintf(stderr, " [ fwmark MARK ]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where: NAME := STRING\n"); fprintf(stderr, " ADDR := { IPV6_ADDRESS }\n"); fprintf(stderr, " KEY := { DOTTED_QUAD | NUMBER }\n"); + fprintf(stderr, " MARK := { 0x0..0xffffffff }\n"); exit(-1); } @@ -62,6 +64,7 @@ static int vti6_parse_opt(struct link_util *lu, int argc, char **argv, unsigned int ikey = 0; unsigned int okey = 0; unsigned int link = 0; + __u32 fwmark = 0; int len; if (!(n->nlmsg_flags & NLM_F_CREATE)) { @@ -104,6 +107,9 @@ get_failed: if (vtiinfo[IFLA_VTI_LINK]) link = rta_getattr_u8(vtiinfo[IFLA_VTI_LINK]); + + if (vtiinfo[IFLA_VTI_FWMARK]) + fwmark = rta_getattr_u32(vtiinfo[IFLA_VTI_FWMARK]); } while (argc > 0) { @@ -178,6 +184,10 @@ get_failed: link = if_nametoindex(*argv); if (link == 0) exit(-1); + } else if (strcmp(*argv, "fwmark") == 0) { + NEXT_ARG(); + if (get_u32(&fwmark, *argv, 0)) + invarg("invalid fwmark\n", *argv); } else usage(); argc--; argv++; @@ -187,6 +197,7 @@ get_failed: addattr32(n, 1024, IFLA_VTI_OKEY, okey); addattr_l(n, 1024, IFLA_VTI_LOCAL, &saddr, sizeof(saddr)); addattr_l(n, 1024, IFLA_VTI_REMOTE, &daddr, sizeof(daddr)); + addattr32(n, 1024, IFLA_VTI_FWMARK, fwmark); if (link) addattr32(n, 1024, IFLA_VTI_LINK, link); @@ -239,6 +250,10 @@ static void vti6_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) inet_ntop(AF_INET, RTA_DATA(tb[IFLA_VTI_OKEY]), s2, sizeof(s2)); fprintf(f, "okey %s ", s2); } + + if (tb[IFLA_VTI_FWMARK] && rta_getattr_u32(tb[IFLA_VTI_FWMARK])) { + fprintf(f, "fwmark 0x%x ", rta_getattr_u32(tb[IFLA_VTI_FWMARK])); + } } struct link_util vti6_link_util = { From 7ff1fce5490b8c5c13de614529f82fd1a89b868f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 1 May 2017 09:27:33 -0700 Subject: [PATCH 20/22] update headers to 4.11 net-next Signed-off-by: Stephen Hemminger --- include/linux/bpf.h | 3 +-- include/linux/devlink.h | 7 +++++++ include/linux/if_arp.h | 1 + include/linux/if_link.h | 5 ++++- include/linux/if_packet.h | 1 + include/linux/pkt_cls.h | 5 +++++ 6 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eedf24d6..6b491a81 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -481,8 +481,7 @@ union bpf_attr { * u32 bpf_get_socket_uid(skb) * Get the owner uid of the socket stored inside sk_buff. * @skb: pointer to skb - * Return: uid of the socket owner on success or 0 if the socket pointer - * inside sk_buff is NULL + * Return: uid of the socket owner on success or overflowuid if failed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ diff --git a/include/linux/devlink.h b/include/linux/devlink.h index 0c8af618..76440050 100644 --- a/include/linux/devlink.h +++ b/include/linux/devlink.h @@ -119,6 +119,11 @@ enum devlink_eswitch_inline_mode { DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT, }; +enum devlink_eswitch_encap_mode { + DEVLINK_ESWITCH_ENCAP_MODE_NONE, + DEVLINK_ESWITCH_ENCAP_MODE_BASIC, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -195,6 +200,8 @@ enum devlink_attr { DEVLINK_ATTR_PAD, + DEVLINK_ATTR_ESWITCH_ENCAP_MODE, /* u8 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index d001bdb2..8ce598b9 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -95,6 +95,7 @@ #define ARPHRD_IP6GRE 823 /* GRE over IPv6 */ #define ARPHRD_NETLINK 824 /* Netlink header */ #define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */ +#define ARPHRD_VSOCKMON 826 /* Vsock monitor header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 2bb46f08..2dfb6387 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -321,6 +321,7 @@ enum { IFLA_BRPORT_MCAST_FLOOD, IFLA_BRPORT_MCAST_TO_UCAST, IFLA_BRPORT_VLAN_TUNNEL, + IFLA_BRPORT_BCAST_FLOOD, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -885,7 +886,9 @@ enum { /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) -#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST) +#define XDP_FLAGS_SKB_MODE (2U << 0) +#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ + XDP_FLAGS_SKB_MODE) enum { IFLA_XDP_UNSPEC, diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 9e7edfd8..4df96a7d 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -66,6 +66,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_CBPF 6 #define PACKET_FANOUT_EBPF 7 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 +#define PACKET_FANOUT_FLAG_UNIQUEID 0x2000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 struct tpacket_stats { diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 7a69f2a4..f1129e38 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -432,6 +432,11 @@ enum { TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */ TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */ + __TCA_FLOWER_MAX, }; From a872b870a51135cde29e595a4782d910a9d39393 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 28 Apr 2017 15:44:29 +0200 Subject: [PATCH 21/22] bpf: add support for generic xdp Follow-up to commit c7272ca72009 ("bpf: add initial support for attaching xdp progs") to also support generic XDP. This adds an indicator for loaded generic XDP programs when programs are loaded as shown in c7272ca72009, but the driver still lacks native XDP support. # ip link [...] 3: eno1: mtu 1500 xdpgeneric qdisc [...] link/ether 0c:c4:7a:03:f9:25 brd ff:ff:ff:ff:ff:ff [...] In case the driver does support native XDP, but the user wants to load the program as generic XDP (e.g. for testing purposes), then this can be done with the same semantics as in c7272ca72009, but with 'xdpgeneric' instead of 'xdp' command for loading: # ip -force link set dev eno1 xdpgeneric obj xdp.o Signed-off-by: Daniel Borkmann Acked-by: David S. Miller --- ip/iplink.c | 7 +++++-- ip/iplink_xdp.c | 46 +++++++++++++++++++++++++++++++------------ ip/xdp.h | 2 +- man/man8/ip-link.8.in | 19 ++++++++++++++++-- 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/ip/iplink.c b/ip/iplink.c index da3f9a77..ae1c70eb 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -612,9 +612,12 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, if (get_integer(&mtu, *argv, 0)) invarg("Invalid \"mtu\" value\n", *argv); addattr_l(&req->n, sizeof(*req), IFLA_MTU, &mtu, 4); - } else if (strcmp(*argv, "xdp") == 0) { + } else if (strcmp(*argv, "xdpgeneric") == 0 || + strcmp(*argv, "xdp") == 0) { + bool generic = strcmp(*argv, "xdpgeneric") == 0; + NEXT_ARG(); - if (xdp_parse(&argc, &argv, req)) + if (xdp_parse(&argc, &argv, req, generic)) exit(-1); } else if (strcmp(*argv, "netns") == 0) { NEXT_ARG(); diff --git a/ip/iplink_xdp.c b/ip/iplink_xdp.c index a81ed971..a1380eec 100644 --- a/ip/iplink_xdp.c +++ b/ip/iplink_xdp.c @@ -19,41 +19,56 @@ extern int force; +struct xdp_req { + struct iplink_req *req; + __u32 flags; +}; + static void xdp_ebpf_cb(void *raw, int fd, const char *annotation) { - __u32 flags = !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0; - struct iplink_req *req = raw; - struct rtattr *xdp; + struct xdp_req *xdp = raw; + struct iplink_req *req = xdp->req; + struct rtattr *xdp_attr; - xdp = addattr_nest(&req->n, sizeof(*req), IFLA_XDP); + xdp_attr = addattr_nest(&req->n, sizeof(*req), IFLA_XDP); addattr32(&req->n, sizeof(*req), IFLA_XDP_FD, fd); - addattr32(&req->n, sizeof(*req), IFLA_XDP_FLAGS, flags); - addattr_nest_end(&req->n, xdp); + if (xdp->flags) + addattr32(&req->n, sizeof(*req), IFLA_XDP_FLAGS, xdp->flags); + addattr_nest_end(&req->n, xdp_attr); } static const struct bpf_cfg_ops bpf_cb_ops = { .ebpf_cb = xdp_ebpf_cb, }; -static int xdp_delete(struct iplink_req *req) +static int xdp_delete(struct xdp_req *xdp) { - xdp_ebpf_cb(req, -1, NULL); + xdp_ebpf_cb(xdp, -1, NULL); return 0; } -int xdp_parse(int *argc, char ***argv, struct iplink_req *req) +int xdp_parse(int *argc, char ***argv, struct iplink_req *req, bool generic) { struct bpf_cfg_in cfg = { .argc = *argc, .argv = *argv, }; + struct xdp_req xdp = { + .req = req, + }; + + if (!force) + xdp.flags |= XDP_FLAGS_UPDATE_IF_NOEXIST; + if (generic) + xdp.flags |= XDP_FLAGS_SKB_MODE; if (*argc == 1) { if (strcmp(**argv, "none") == 0 || strcmp(**argv, "off") == 0) - return xdp_delete(req); + return xdp_delete(&xdp); } - if (bpf_parse_common(BPF_PROG_TYPE_XDP, &cfg, &bpf_cb_ops, req)) + + if (bpf_parse_common(BPF_PROG_TYPE_XDP, &cfg, &bpf_cb_ops, &xdp)) return -1; *argc = cfg.argc; @@ -64,12 +79,17 @@ int xdp_parse(int *argc, char ***argv, struct iplink_req *req) void xdp_dump(FILE *fp, struct rtattr *xdp) { struct rtattr *tb[IFLA_XDP_MAX + 1]; + __u32 flags = 0; parse_rtattr_nested(tb, IFLA_XDP_MAX, xdp); + if (!tb[IFLA_XDP_ATTACHED] || !rta_getattr_u8(tb[IFLA_XDP_ATTACHED])) return; - fprintf(fp, "xdp "); - /* More to come here in future for 'ip -d link' (digest, etc) ... */ + if (tb[IFLA_XDP_FLAGS]) + flags = rta_getattr_u32(tb[IFLA_XDP_FLAGS]); + + fprintf(fp, "xdp%s ", + flags & XDP_FLAGS_SKB_MODE ? "generic" : ""); } diff --git a/ip/xdp.h b/ip/xdp.h index bc696458..1b95e0f6 100644 --- a/ip/xdp.h +++ b/ip/xdp.h @@ -3,7 +3,7 @@ #include "utils.h" -int xdp_parse(int *argc, char ***argv, struct iplink_req *req); +int xdp_parse(int *argc, char ***argv, struct iplink_req *req, bool generic); void xdp_dump(FILE *fp, struct rtattr *tb); #endif /* __XDP__ */ diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index a5ddfe7a..52571b72 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -126,7 +126,7 @@ ip-link \- network device configuration .RB "[ " port_guid " eui64 ] ]" .br .in -9 -.RB "[ " xdp " { " off " | " +.RB "[ { " xdp " | " xdpgeneric " } { " off " | " .br .in +8 .BR object @@ -1572,8 +1572,23 @@ which may impact security and/or performance. (e.g. VF multicast promiscuous mod .TP .B xdp object "|" pinned "|" off -set (or unset) a XDP ("express data path") BPF program to run on every +set (or unset) a XDP ("eXpress Data Path") BPF program to run on every packet at driver level. +.B ip link +output will indicate a +.B xdp +flag for the networking device. If the driver does not have native XDP +support, the kernel will fall back to a slower, driver-independent "generic" +XDP variant. The +.B ip link +output will in that case indicate +.B xdpgeneric +instead of +.B xdp +only. If the driver does have native XDP support, but the program is +loaded under +.B xdpgeneric object "|" pinned +then the kernel will use the generic XDP variant instead of the native one. .B off (or From cfd2e727f074d16bb2ef820406be32ee6e2a04e3 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Sun, 30 Apr 2017 17:16:02 +0300 Subject: [PATCH 22/22] ip xfrm: Add xfrm state crypto offload syntax: ip xfrm state .... offload dev dir Example to add inbound offload: ip xfrm state .... offload dev mlx0 dir in Example to add outbound offload: ip xfrm state .... offload dev mlx0 dir out Signed-off-by: Boris Pismenny Signed-off-by: Ilan Tayari --- ip/ipxfrm.c | 19 +++++++++++++++++++ ip/xfrm_state.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c index b0cfac17..d5eb22e2 100644 --- a/ip/ipxfrm.c +++ b/ip/ipxfrm.c @@ -862,6 +862,25 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family, } fprintf(fp, "%s", _SL_); } + if (tb[XFRMA_OFFLOAD_DEV]) { + struct xfrm_user_offload *xuo; + + if (prefix) + fputs(prefix, fp); + fprintf(fp, "crypto offload parameters: "); + + if (RTA_PAYLOAD(tb[XFRMA_OFFLOAD_DEV]) < sizeof(*xuo)) { + fprintf(fp, "(ERROR truncated)"); + fprintf(fp, "%s", _SL_); + return; + } + + xuo = (struct xfrm_user_offload *) + RTA_DATA(tb[XFRMA_OFFLOAD_DEV]); + fprintf(fp, "dev %s dir %s", ll_index_to_name(xuo->ifindex), + (xuo->flags & XFRM_OFFLOAD_INBOUND) ? "in" : "out"); + fprintf(fp, "%s", _SL_); + } } static int xfrm_selector_iszero(struct xfrm_selector *s) diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index ea7d4f34..e11c93bf 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -60,6 +60,7 @@ static void usage(void) fprintf(stderr, " [ replay-seq-hi SEQ ] [ replay-oseq-hi SEQ ]\n"); fprintf(stderr, " [ flag FLAG-LIST ] [ sel SELECTOR ] [ LIMIT-LIST ] [ encap ENCAP ]\n"); fprintf(stderr, " [ coa ADDR[/PLEN] ] [ ctx CTX ] [ extra-flag EXTRA-FLAG-LIST ]\n"); + fprintf(stderr, " [ offload [dev DEV] dir DIR ]\n"); fprintf(stderr, "Usage: ip xfrm state allocspi ID [ mode MODE ] [ mark MARK [ mask MASK ] ]\n"); fprintf(stderr, " [ reqid REQID ] [ seq SEQ ] [ min SPI max SPI ]\n"); fprintf(stderr, "Usage: ip xfrm state { delete | get } ID [ mark MARK [ mask MASK ] ]\n"); @@ -108,6 +109,7 @@ static void usage(void) fprintf(stderr, "LIMIT := { time-soft | time-hard | time-use-soft | time-use-hard } SECONDS |\n"); fprintf(stderr, " { byte-soft | byte-hard } SIZE | { packet-soft | packet-hard } COUNT\n"); fprintf(stderr, "ENCAP := { espinudp | espinudp-nonike } SPORT DPORT OADDR\n"); + fprintf(stderr, "DIR := in | out\n"); exit(-1); } @@ -264,6 +266,24 @@ static int xfrm_state_extra_flag_parse(__u32 *extra_flags, int *argcp, char ***a return 0; } +static int xfrm_offload_dir_parse(__u8 *dir, int *argcp, char ***argvp) +{ + int argc = *argcp; + char **argv = *argvp; + + if (strcmp(*argv, "in") == 0) + *dir = XFRM_OFFLOAD_INBOUND; + else if (strcmp(*argv, "out") == 0) + *dir = 0; + else + invarg("DIR value is invalid", *argv); + + *argcp = argc; + *argvp = argv; + + return 0; +} + static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv) { struct rtnl_handle rth; @@ -283,6 +303,10 @@ static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv) }; struct xfrm_replay_state replay = {}; struct xfrm_replay_state_esn replay_esn = {}; + struct xfrm_user_offload xuo = {}; + unsigned int ifindex = 0; + __u8 dir = 0; + bool is_offload = false; __u32 replay_window = 0; __u32 seq = 0, oseq = 0, seq_hi = 0, oseq_hi = 0; char *idp = NULL; @@ -394,6 +418,25 @@ static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv) xfrm_sctx_parse((char *)&ctx.str, context, &ctx.sctx); addattr_l(&req.n, sizeof(req.buf), XFRMA_SEC_CTX, (void *)&ctx, ctx.sctx.len); + } else if (strcmp(*argv, "offload") == 0) { + is_offload = true; + NEXT_ARG(); + if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); + ifindex = ll_name_to_index(*argv); + if (!ifindex) { + invarg("value after \"offload dev\" is invalid", *argv); + is_offload = false; + } + NEXT_ARG(); + } + if (strcmp(*argv, "dir") == 0) { + NEXT_ARG(); + xfrm_offload_dir_parse(&dir, &argc, &argv); + } else { + invarg("value after \"offload dir\" is invalid", *argv); + is_offload = false; + } } else { /* try to assume ALGO */ int type = xfrm_algotype_getbyname(*argv); @@ -531,6 +574,12 @@ static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv) exit(-1); } + if (is_offload) { + xuo.ifindex = ifindex; + xuo.flags = dir; + addattr_l(&req.n, sizeof(req.buf), XFRMA_OFFLOAD_DEV, &xuo, + sizeof(xuo)); + } if (req.xsinfo.flags & XFRM_STATE_ESN || replay_window > (sizeof(replay.bitmap) * 8)) { replay_esn.seq = seq;