diff --git a/include/linux/if_alg.h b/include/linux/if_alg.h new file mode 100644 index 00000000..f2acd2fd --- /dev/null +++ b/include/linux/if_alg.h @@ -0,0 +1,42 @@ +/* + * if_alg: User-space algorithm interface + * + * Copyright (c) 2010 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _LINUX_IF_ALG_H +#define _LINUX_IF_ALG_H + +#include + +struct sockaddr_alg { + __u16 salg_family; + __u8 salg_type[14]; + __u32 salg_feat; + __u32 salg_mask; + __u8 salg_name[64]; +}; + +struct af_alg_iv { + __u32 ivlen; + __u8 iv[0]; +}; + +/* Socket options */ +#define ALG_SET_KEY 1 +#define ALG_SET_IV 2 +#define ALG_SET_OP 3 +#define ALG_SET_AEAD_ASSOCLEN 4 +#define ALG_SET_AEAD_AUTHSIZE 5 + +/* Operations */ +#define ALG_OP_DECRYPT 0 +#define ALG_OP_ENCRYPT 1 + +#endif /* _LINUX_IF_ALG_H */ diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h new file mode 100644 index 00000000..9635a62f --- /dev/null +++ b/include/linux/if_bonding.h @@ -0,0 +1,130 @@ +/* + * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. + * + * + * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes + * NCM: Network and Communications Management, Inc. + * + * BUT, I'm the one who modified it for ethernet, so: + * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov + * + * This software may be used and distributed according to the terms + * of the GNU Public License, incorporated herein by reference. + * + * 2003/03/18 - Amir Noam + * - Added support for getting slave's speed and duplex via ethtool. + * Needed for 802.3ad and other future modes. + * + * 2003/03/18 - Tsippy Mendelson and + * Shmulik Hen + * - Enable support of modes that need to use the unique mac address of + * each slave. + * + * 2003/03/18 - Tsippy Mendelson and + * Amir Noam + * - Moved driver's private data types to bonding.h + * + * 2003/03/18 - Amir Noam , + * Tsippy Mendelson and + * Shmulik Hen + * - Added support for IEEE 802.3ad Dynamic link aggregation mode. + * + * 2003/05/01 - Amir Noam + * - Added ABI version control to restore compatibility between + * new/old ifenslave and new/old bonding. + * + * 2003/12/01 - Shmulik Hen + * - Code cleanup and style changes + * + * 2005/05/05 - Jason Gabler + * - added definitions for various XOR hashing policies + */ + +#ifndef _LINUX_IF_BONDING_H +#define _LINUX_IF_BONDING_H + +#include +#include +#include + +/* userland - kernel ABI version (2003/05/08) */ +#define BOND_ABI_VERSION 2 + +/* + * We can remove these ioctl definitions in 2.5. People should use the + * SIOC*** versions of them instead + */ +#define BOND_ENSLAVE_OLD (SIOCDEVPRIVATE) +#define BOND_RELEASE_OLD (SIOCDEVPRIVATE + 1) +#define BOND_SETHWADDR_OLD (SIOCDEVPRIVATE + 2) +#define BOND_SLAVE_INFO_QUERY_OLD (SIOCDEVPRIVATE + 11) +#define BOND_INFO_QUERY_OLD (SIOCDEVPRIVATE + 12) +#define BOND_CHANGE_ACTIVE_OLD (SIOCDEVPRIVATE + 13) + +#define BOND_CHECK_MII_STATUS (SIOCGMIIPHY) + +#define BOND_MODE_ROUNDROBIN 0 +#define BOND_MODE_ACTIVEBACKUP 1 +#define BOND_MODE_XOR 2 +#define BOND_MODE_BROADCAST 3 +#define BOND_MODE_8023AD 4 +#define BOND_MODE_TLB 5 +#define BOND_MODE_ALB 6 /* TLB + RLB (receive load balancing) */ + +/* each slave's link has 4 states */ +#define BOND_LINK_UP 0 /* link is up and running */ +#define BOND_LINK_FAIL 1 /* link has just gone down */ +#define BOND_LINK_DOWN 2 /* link has been down for too long time */ +#define BOND_LINK_BACK 3 /* link is going back */ + +/* each slave has several states */ +#define BOND_STATE_ACTIVE 0 /* link is active */ +#define BOND_STATE_BACKUP 1 /* link is backup */ + +#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ + +#define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */ + +#define BOND_DEFAULT_RESEND_IGMP 1 /* Default number of IGMP membership reports */ + +/* hashing types */ +#define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */ +#define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */ +#define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */ +#define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */ +#define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */ + +typedef struct ifbond { + __s32 bond_mode; + __s32 num_slaves; + __s32 miimon; +} ifbond; + +typedef struct ifslave { + __s32 slave_id; /* Used as an IN param to the BOND_SLAVE_INFO_QUERY ioctl */ + char slave_name[IFNAMSIZ]; + __s8 link; + __s8 state; + __u32 link_failure_count; +} ifslave; + +struct ad_info { + __u16 aggregator_id; + __u16 ports; + __u16 actor_key; + __u16 partner_key; + __u8 partner_system[ETH_ALEN]; +}; + +#endif /* _LINUX_IF_BONDING_H */ + +/* + * Local variables: + * version-control: t + * kept-new-versions: 5 + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ + diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h new file mode 100644 index 00000000..9e7edfd8 --- /dev/null +++ b/include/linux/if_packet.h @@ -0,0 +1,301 @@ +#ifndef __LINUX_IF_PACKET_H +#define __LINUX_IF_PACKET_H + +#include + +struct sockaddr_pkt { + unsigned short spkt_family; + unsigned char spkt_device[14]; + __be16 spkt_protocol; +}; + +struct sockaddr_ll { + unsigned short sll_family; + __be16 sll_protocol; + int sll_ifindex; + unsigned short sll_hatype; + unsigned char sll_pkttype; + unsigned char sll_halen; + unsigned char sll_addr[8]; +}; + +/* Packet types */ + +#define PACKET_HOST 0 /* To us */ +#define PACKET_BROADCAST 1 /* To all */ +#define PACKET_MULTICAST 2 /* To group */ +#define PACKET_OTHERHOST 3 /* To someone else */ +#define PACKET_OUTGOING 4 /* Outgoing of any type */ +#define PACKET_LOOPBACK 5 /* MC/BRD frame looped back */ +#define PACKET_USER 6 /* To user space */ +#define PACKET_KERNEL 7 /* To kernel space */ +/* Unused, PACKET_FASTROUTE and PACKET_LOOPBACK are invisible to user space */ +#define PACKET_FASTROUTE 6 /* Fastrouted frame */ + +/* Packet socket options */ + +#define PACKET_ADD_MEMBERSHIP 1 +#define PACKET_DROP_MEMBERSHIP 2 +#define PACKET_RECV_OUTPUT 3 +/* Value 4 is still used by obsolete turbo-packet. */ +#define PACKET_RX_RING 5 +#define PACKET_STATISTICS 6 +#define PACKET_COPY_THRESH 7 +#define PACKET_AUXDATA 8 +#define PACKET_ORIGDEV 9 +#define PACKET_VERSION 10 +#define PACKET_HDRLEN 11 +#define PACKET_RESERVE 12 +#define PACKET_TX_RING 13 +#define PACKET_LOSS 14 +#define PACKET_VNET_HDR 15 +#define PACKET_TX_TIMESTAMP 16 +#define PACKET_TIMESTAMP 17 +#define PACKET_FANOUT 18 +#define PACKET_TX_HAS_OFF 19 +#define PACKET_QDISC_BYPASS 20 +#define PACKET_ROLLOVER_STATS 21 +#define PACKET_FANOUT_DATA 22 + +#define PACKET_FANOUT_HASH 0 +#define PACKET_FANOUT_LB 1 +#define PACKET_FANOUT_CPU 2 +#define PACKET_FANOUT_ROLLOVER 3 +#define PACKET_FANOUT_RND 4 +#define PACKET_FANOUT_QM 5 +#define PACKET_FANOUT_CBPF 6 +#define PACKET_FANOUT_EBPF 7 +#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 +#define PACKET_FANOUT_FLAG_DEFRAG 0x8000 + +struct tpacket_stats { + unsigned int tp_packets; + unsigned int tp_drops; +}; + +struct tpacket_stats_v3 { + unsigned int tp_packets; + unsigned int tp_drops; + unsigned int tp_freeze_q_cnt; +}; + +struct tpacket_rollover_stats { + __aligned_u64 tp_all; + __aligned_u64 tp_huge; + __aligned_u64 tp_failed; +}; + +union tpacket_stats_u { + struct tpacket_stats stats1; + struct tpacket_stats_v3 stats3; +}; + +struct tpacket_auxdata { + __u32 tp_status; + __u32 tp_len; + __u32 tp_snaplen; + __u16 tp_mac; + __u16 tp_net; + __u16 tp_vlan_tci; + __u16 tp_vlan_tpid; +}; + +/* Rx ring - header status */ +#define TP_STATUS_KERNEL 0 +#define TP_STATUS_USER (1 << 0) +#define TP_STATUS_COPY (1 << 1) +#define TP_STATUS_LOSING (1 << 2) +#define TP_STATUS_CSUMNOTREADY (1 << 3) +#define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */ +#define TP_STATUS_BLK_TMO (1 << 5) +#define TP_STATUS_VLAN_TPID_VALID (1 << 6) /* auxdata has valid tp_vlan_tpid */ +#define TP_STATUS_CSUM_VALID (1 << 7) + +/* Tx ring - header status */ +#define TP_STATUS_AVAILABLE 0 +#define TP_STATUS_SEND_REQUEST (1 << 0) +#define TP_STATUS_SENDING (1 << 1) +#define TP_STATUS_WRONG_FORMAT (1 << 2) + +/* Rx and Tx ring - header status */ +#define TP_STATUS_TS_SOFTWARE (1 << 29) +#define TP_STATUS_TS_SYS_HARDWARE (1 << 30) /* deprecated, never set */ +#define TP_STATUS_TS_RAW_HARDWARE (1 << 31) + +/* Rx ring - feature request bits */ +#define TP_FT_REQ_FILL_RXHASH 0x1 + +struct tpacket_hdr { + unsigned long tp_status; + unsigned int tp_len; + unsigned int tp_snaplen; + unsigned short tp_mac; + unsigned short tp_net; + unsigned int tp_sec; + unsigned int tp_usec; +}; + +#define TPACKET_ALIGNMENT 16 +#define TPACKET_ALIGN(x) (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1)) +#define TPACKET_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll)) + +struct tpacket2_hdr { + __u32 tp_status; + __u32 tp_len; + __u32 tp_snaplen; + __u16 tp_mac; + __u16 tp_net; + __u32 tp_sec; + __u32 tp_nsec; + __u16 tp_vlan_tci; + __u16 tp_vlan_tpid; + __u8 tp_padding[4]; +}; + +struct tpacket_hdr_variant1 { + __u32 tp_rxhash; + __u32 tp_vlan_tci; + __u16 tp_vlan_tpid; + __u16 tp_padding; +}; + +struct tpacket3_hdr { + __u32 tp_next_offset; + __u32 tp_sec; + __u32 tp_nsec; + __u32 tp_snaplen; + __u32 tp_len; + __u32 tp_status; + __u16 tp_mac; + __u16 tp_net; + /* pkt_hdr variants */ + union { + struct tpacket_hdr_variant1 hv1; + }; + __u8 tp_padding[8]; +}; + +struct tpacket_bd_ts { + unsigned int ts_sec; + union { + unsigned int ts_usec; + unsigned int ts_nsec; + }; +}; + +struct tpacket_hdr_v1 { + __u32 block_status; + __u32 num_pkts; + __u32 offset_to_first_pkt; + + /* Number of valid bytes (including padding) + * blk_len <= tp_block_size + */ + __u32 blk_len; + + /* + * Quite a few uses of sequence number: + * 1. Make sure cache flush etc worked. + * Well, one can argue - why not use the increasing ts below? + * But look at 2. below first. + * 2. When you pass around blocks to other user space decoders, + * you can see which blk[s] is[are] outstanding etc. + * 3. Validate kernel code. + */ + __aligned_u64 seq_num; + + /* + * ts_last_pkt: + * + * Case 1. Block has 'N'(N >=1) packets and TMO'd(timed out) + * ts_last_pkt == 'time-stamp of last packet' and NOT the + * time when the timer fired and the block was closed. + * By providing the ts of the last packet we can absolutely + * guarantee that time-stamp wise, the first packet in the + * next block will never precede the last packet of the + * previous block. + * Case 2. Block has zero packets and TMO'd + * ts_last_pkt = time when the timer fired and the block + * was closed. + * Case 3. Block has 'N' packets and NO TMO. + * ts_last_pkt = time-stamp of the last pkt in the block. + * + * ts_first_pkt: + * Is always the time-stamp when the block was opened. + * Case a) ZERO packets + * No packets to deal with but atleast you know the + * time-interval of this block. + * Case b) Non-zero packets + * Use the ts of the first packet in the block. + * + */ + struct tpacket_bd_ts ts_first_pkt, ts_last_pkt; +}; + +union tpacket_bd_header_u { + struct tpacket_hdr_v1 bh1; +}; + +struct tpacket_block_desc { + __u32 version; + __u32 offset_to_priv; + union tpacket_bd_header_u hdr; +}; + +#define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll)) +#define TPACKET3_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll)) + +enum tpacket_versions { + TPACKET_V1, + TPACKET_V2, + TPACKET_V3 +}; + +/* + Frame structure: + + - Start. Frame must be aligned to TPACKET_ALIGNMENT=16 + - struct tpacket_hdr + - pad to TPACKET_ALIGNMENT=16 + - struct sockaddr_ll + - Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16 + - Start+tp_mac: [ Optional MAC header ] + - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16. + - Pad to align to TPACKET_ALIGNMENT=16 + */ + +struct tpacket_req { + unsigned int tp_block_size; /* Minimal size of contiguous block */ + unsigned int tp_block_nr; /* Number of blocks */ + unsigned int tp_frame_size; /* Size of frame */ + unsigned int tp_frame_nr; /* Total number of frames */ +}; + +struct tpacket_req3 { + unsigned int tp_block_size; /* Minimal size of contiguous block */ + unsigned int tp_block_nr; /* Number of blocks */ + unsigned int tp_frame_size; /* Size of frame */ + unsigned int tp_frame_nr; /* Total number of frames */ + unsigned int tp_retire_blk_tov; /* timeout in msecs */ + unsigned int tp_sizeof_priv; /* offset to private data area */ + unsigned int tp_feature_req_word; +}; + +union tpacket_req_u { + struct tpacket_req req; + struct tpacket_req3 req3; +}; + +struct packet_mreq { + int mr_ifindex; + unsigned short mr_type; + unsigned short mr_alen; + unsigned char mr_address[8]; +}; + +#define PACKET_MR_MULTICAST 0 +#define PACKET_MR_PROMISC 1 +#define PACKET_MR_ALLMULTI 2 +#define PACKET_MR_UNICAST 3 + +#endif diff --git a/ip/iplink.c b/ip/iplink.c index d2e586b6..68e5faea 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -237,6 +237,36 @@ struct iplink_req { char buf[1024]; }; +static int nl_get_ll_addr_len(unsigned int dev_index) +{ + int len; + struct iplink_req req = { + .n = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlmsg_type = RTM_GETLINK, + .nlmsg_flags = NLM_F_REQUEST + }, + .i = { + .ifi_family = preferred_family, + .ifi_index = dev_index, + } + }; + struct rtattr *tb[IFLA_MAX+1]; + + if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) + return -1; + + len = req.n.nlmsg_len - NLMSG_LENGTH(sizeof(req.i)); + if (len < 0) + return -1; + + parse_rtattr_flags(tb, IFLA_MAX, IFLA_RTA(&req.i), len, NLA_F_NESTED); + if (!tb[IFLA_ADDRESS]) + return -1; + + return RTA_PAYLOAD(tb[IFLA_ADDRESS]); +} + static int iplink_parse_vf(int vf, int *argcp, char ***argvp, struct iplink_req *req, int dev_index) { @@ -273,13 +303,20 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp, while (NEXT_ARG_OK()) { NEXT_ARG(); if (matches(*argv, "mac") == 0) { - struct ifla_vf_mac ivm; + struct ifla_vf_mac ivm = { 0 }; + int halen = nl_get_ll_addr_len(dev_index); NEXT_ARG(); ivm.vf = vf; len = ll_addr_a2n((char *)ivm.mac, 32, *argv); if (len < 0) return -1; + if (halen > 0 && len != halen) { + fprintf(stderr, + "Invalid address length %d - must be %d bytes\n", + len, halen); + return -1; + } addattr_l(&req->n, sizeof(*req), IFLA_VF_MAC, &ivm, sizeof(ivm)); } else if (matches(*argv, "vlan") == 0) { struct ifla_vf_vlan ivv; @@ -428,6 +465,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, int numrxqueues = -1; int dev_index = 0; int link_netnsid = -1; + int addr_len = 0; *group = -1; ret = argc; @@ -452,10 +490,10 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, *link = *argv; } else if (matches(*argv, "address") == 0) { NEXT_ARG(); - len = ll_addr_a2n(abuf, sizeof(abuf), *argv); + addr_len = ll_addr_a2n(abuf, sizeof(abuf), *argv); if (len < 0) return -1; - addattr_l(&req->n, sizeof(*req), IFLA_ADDRESS, abuf, len); + addattr_l(&req->n, sizeof(*req), IFLA_ADDRESS, abuf, addr_len); } else if (matches(*argv, "broadcast") == 0 || strcmp(*argv, "brd") == 0) { NEXT_ARG(); @@ -677,6 +715,16 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, argc--; argv++; } + if (dev_index && addr_len) { + int halen = nl_get_ll_addr_len(dev_index); + if (halen >= 0 && halen != addr_len) { + fprintf(stderr, + "Invalid address length %d - must be %d bytes\n", + addr_len, halen); + return -1; + } + } + return ret - argc; } diff --git a/misc/ss.c b/misc/ss.c index 9c456d4f..02be7e74 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -766,6 +766,8 @@ struct tcpstat { unsigned long long bytes_received; unsigned int segs_out; unsigned int segs_in; + unsigned int data_segs_out; + unsigned int data_segs_in; unsigned int unacked; unsigned int retrans; unsigned int retrans_total; @@ -1716,6 +1718,10 @@ static void tcp_stats_print(struct tcpstat *s) printf(" segs_out:%u", s->segs_out); if (s->segs_in) printf(" segs_in:%u", s->segs_in); + if (s->data_segs_out) + printf(" data_segs_out:%u", s->data_segs_out); + if (s->data_segs_in) + printf(" data_segs_in:%u", s->data_segs_in); if (s->dctcp && s->dctcp->enabled) { struct dctcpstat *dctcp = s->dctcp; @@ -2022,6 +2028,8 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, s.bytes_received = info->tcpi_bytes_received; s.segs_out = info->tcpi_segs_out; s.segs_in = info->tcpi_segs_in; + s.data_segs_out = info->tcpi_data_segs_out; + s.data_segs_in = info->tcpi_data_segs_in; s.not_sent = info->tcpi_notsent_bytes; if (info->tcpi_min_rtt && info->tcpi_min_rtt != ~0U) s.min_rtt = (double) info->tcpi_min_rtt / 1000;