mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	rds: add type of service(tos) infrastructure
RDS Service type (TOS) is user-defined and needs to be configured via RDS IOCTL interface. It must be set before initiating any traffic and once set the TOS can not be changed. All out-going traffic from the socket will be associated with its TOS. Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> [yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes] Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
This commit is contained in:
		
							parent
							
								
									d021fabf52
								
							
						
					
					
						commit
						3eb450367d
					
				
					 11 changed files with 72 additions and 17 deletions
				
			
		| 
						 | 
				
			
			@ -69,6 +69,12 @@
 | 
			
		|||
#define RDS_TRANS_COUNT	3
 | 
			
		||||
#define	RDS_TRANS_NONE	(~0)
 | 
			
		||||
 | 
			
		||||
/* IOCTLS commands for SOL_RDS */
 | 
			
		||||
#define SIOCRDSSETTOS		(SIOCPROTOPRIVATE)
 | 
			
		||||
#define SIOCRDSGETTOS		(SIOCPROTOPRIVATE + 1)
 | 
			
		||||
 | 
			
		||||
typedef __u8	rds_tos_t;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Control message types for SOL_RDS.
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -149,6 +155,7 @@ struct rds_info_connection {
 | 
			
		|||
	__be32		faddr;
 | 
			
		||||
	__u8		transport[TRANSNAMSIZ];		/* null term ascii */
 | 
			
		||||
	__u8		flags;
 | 
			
		||||
	__u8		tos;
 | 
			
		||||
} __attribute__((packed));
 | 
			
		||||
 | 
			
		||||
struct rds6_info_connection {
 | 
			
		||||
| 
						 | 
				
			
			@ -171,6 +178,7 @@ struct rds_info_message {
 | 
			
		|||
	__be16		lport;
 | 
			
		||||
	__be16		fport;
 | 
			
		||||
	__u8		flags;
 | 
			
		||||
	__u8		tos;
 | 
			
		||||
} __attribute__((packed));
 | 
			
		||||
 | 
			
		||||
struct rds6_info_message {
 | 
			
		||||
| 
						 | 
				
			
			@ -214,6 +222,7 @@ struct rds_info_tcp_socket {
 | 
			
		|||
	__u32           last_sent_nxt;
 | 
			
		||||
	__u32           last_expected_una;
 | 
			
		||||
	__u32           last_seen_una;
 | 
			
		||||
	__u8		tos;
 | 
			
		||||
} __attribute__((packed));
 | 
			
		||||
 | 
			
		||||
struct rds6_info_tcp_socket {
 | 
			
		||||
| 
						 | 
				
			
			@ -240,6 +249,7 @@ struct rds_info_rdma_connection {
 | 
			
		|||
	__u32		max_send_sge;
 | 
			
		||||
	__u32		rdma_mr_max;
 | 
			
		||||
	__u32		rdma_mr_size;
 | 
			
		||||
	__u8		tos;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct rds6_info_rdma_connection {
 | 
			
		||||
| 
						 | 
				
			
			@ -253,6 +263,7 @@ struct rds6_info_rdma_connection {
 | 
			
		|||
	__u32		max_send_sge;
 | 
			
		||||
	__u32		rdma_mr_max;
 | 
			
		||||
	__u32		rdma_mr_size;
 | 
			
		||||
	__u8		tos;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* RDS message Receive Path Latency points */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -254,7 +254,38 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
 | 
			
		|||
 | 
			
		||||
static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 | 
			
		||||
{
 | 
			
		||||
	return -ENOIOCTLCMD;
 | 
			
		||||
	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
 | 
			
		||||
	rds_tos_t tos;
 | 
			
		||||
 | 
			
		||||
	switch (cmd) {
 | 
			
		||||
	case SIOCRDSSETTOS:
 | 
			
		||||
		if (get_user(tos, (rds_tos_t __user *)arg))
 | 
			
		||||
			return -EFAULT;
 | 
			
		||||
 | 
			
		||||
		if (rs->rs_transport &&
 | 
			
		||||
		    rs->rs_transport->t_type == RDS_TRANS_TCP)
 | 
			
		||||
			tos = 0;
 | 
			
		||||
 | 
			
		||||
		spin_lock_bh(&rds_sock_lock);
 | 
			
		||||
		if (rs->rs_tos || rs->rs_conn) {
 | 
			
		||||
			spin_unlock_bh(&rds_sock_lock);
 | 
			
		||||
			return -EINVAL;
 | 
			
		||||
		}
 | 
			
		||||
		rs->rs_tos = tos;
 | 
			
		||||
		spin_unlock_bh(&rds_sock_lock);
 | 
			
		||||
		break;
 | 
			
		||||
	case SIOCRDSGETTOS:
 | 
			
		||||
		spin_lock_bh(&rds_sock_lock);
 | 
			
		||||
		tos = rs->rs_tos;
 | 
			
		||||
		spin_unlock_bh(&rds_sock_lock);
 | 
			
		||||
		if (put_user(tos, (rds_tos_t __user *)arg))
 | 
			
		||||
			return -EFAULT;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		return -ENOIOCTLCMD;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
 | 
			
		||||
| 
						 | 
				
			
			@ -650,6 +681,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 | 
			
		|||
	spin_lock_init(&rs->rs_rdma_lock);
 | 
			
		||||
	rs->rs_rdma_keys = RB_ROOT;
 | 
			
		||||
	rs->rs_rx_traces = 0;
 | 
			
		||||
	rs->rs_tos = 0;
 | 
			
		||||
	rs->rs_conn = NULL;
 | 
			
		||||
 | 
			
		||||
	spin_lock_bh(&rds_sock_lock);
 | 
			
		||||
	list_add_tail(&rs->rs_item, &rds_sock_list);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -84,7 +84,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
 | 
			
		|||
					      const struct in6_addr *laddr,
 | 
			
		||||
					      const struct in6_addr *faddr,
 | 
			
		||||
					      struct rds_transport *trans,
 | 
			
		||||
					      int dev_if)
 | 
			
		||||
					      u8 tos, int dev_if)
 | 
			
		||||
{
 | 
			
		||||
	struct rds_connection *conn, *ret = NULL;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -92,6 +92,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
 | 
			
		|||
		if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
 | 
			
		||||
		    ipv6_addr_equal(&conn->c_laddr, laddr) &&
 | 
			
		||||
		    conn->c_trans == trans &&
 | 
			
		||||
		    conn->c_tos == tos &&
 | 
			
		||||
		    net == rds_conn_net(conn) &&
 | 
			
		||||
		    conn->c_dev_if == dev_if) {
 | 
			
		||||
			ret = conn;
 | 
			
		||||
| 
						 | 
				
			
			@ -160,7 +161,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 | 
			
		|||
						const struct in6_addr *laddr,
 | 
			
		||||
						const struct in6_addr *faddr,
 | 
			
		||||
						struct rds_transport *trans,
 | 
			
		||||
						gfp_t gfp,
 | 
			
		||||
						gfp_t gfp, u8 tos,
 | 
			
		||||
						int is_outgoing,
 | 
			
		||||
						int dev_if)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -172,7 +173,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 | 
			
		|||
	int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
 | 
			
		||||
	conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
 | 
			
		||||
	if (conn &&
 | 
			
		||||
	    conn->c_loopback &&
 | 
			
		||||
	    conn->c_trans != &rds_loop_transport &&
 | 
			
		||||
| 
						 | 
				
			
			@ -206,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 | 
			
		|||
	conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
 | 
			
		||||
	conn->c_faddr = *faddr;
 | 
			
		||||
	conn->c_dev_if = dev_if;
 | 
			
		||||
	conn->c_tos = tos;
 | 
			
		||||
 | 
			
		||||
#if IS_ENABLED(CONFIG_IPV6)
 | 
			
		||||
	/* If the local address is link local, set c_bound_if to be the
 | 
			
		||||
| 
						 | 
				
			
			@ -298,7 +300,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 | 
			
		|||
		struct rds_connection *found;
 | 
			
		||||
 | 
			
		||||
		found = rds_conn_lookup(net, head, laddr, faddr, trans,
 | 
			
		||||
					dev_if);
 | 
			
		||||
					tos, dev_if);
 | 
			
		||||
		if (found) {
 | 
			
		||||
			struct rds_conn_path *cp;
 | 
			
		||||
			int i;
 | 
			
		||||
| 
						 | 
				
			
			@ -333,10 +335,10 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 | 
			
		|||
struct rds_connection *rds_conn_create(struct net *net,
 | 
			
		||||
				       const struct in6_addr *laddr,
 | 
			
		||||
				       const struct in6_addr *faddr,
 | 
			
		||||
				       struct rds_transport *trans, gfp_t gfp,
 | 
			
		||||
				       int dev_if)
 | 
			
		||||
				       struct rds_transport *trans, u8 tos,
 | 
			
		||||
				       gfp_t gfp, int dev_if)
 | 
			
		||||
{
 | 
			
		||||
	return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
 | 
			
		||||
	return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if);
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(rds_conn_create);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -344,9 +346,9 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
 | 
			
		|||
						const struct in6_addr *laddr,
 | 
			
		||||
						const struct in6_addr *faddr,
 | 
			
		||||
						struct rds_transport *trans,
 | 
			
		||||
						gfp_t gfp, int dev_if)
 | 
			
		||||
						u8 tos, gfp_t gfp, int dev_if)
 | 
			
		||||
{
 | 
			
		||||
	return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
 | 
			
		||||
	return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if);
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -301,6 +301,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 | 
			
		|||
 | 
			
		||||
	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
 | 
			
		||||
	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
 | 
			
		||||
	iinfo->tos = conn->c_tos;
 | 
			
		||||
 | 
			
		||||
	memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
 | 
			
		||||
	memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -786,7 +786,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 | 
			
		|||
 | 
			
		||||
	/* RDS/IB is not currently netns aware, thus init_net */
 | 
			
		||||
	conn = rds_conn_create(&init_net, daddr6, saddr6,
 | 
			
		||||
			       &rds_ib_transport, GFP_KERNEL, ifindex);
 | 
			
		||||
			       &rds_ib_transport, 0, GFP_KERNEL, ifindex);
 | 
			
		||||
	if (IS_ERR(conn)) {
 | 
			
		||||
		rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
 | 
			
		||||
		conn = NULL;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -115,6 +115,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 | 
			
		|||
			pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
 | 
			
		||||
				&conn->c_laddr, &conn->c_faddr);
 | 
			
		||||
			conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
 | 
			
		||||
			conn->c_tos = 0;
 | 
			
		||||
			rds_conn_drop(conn);
 | 
			
		||||
		}
 | 
			
		||||
		rdsdebug("Connection rejected: %s\n",
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -158,6 +158,9 @@ struct rds_connection {
 | 
			
		|||
	unsigned int		c_version;
 | 
			
		||||
	possible_net_t		c_net;
 | 
			
		||||
 | 
			
		||||
	/* TOS */
 | 
			
		||||
	u8			c_tos;
 | 
			
		||||
 | 
			
		||||
	struct list_head	c_map_item;
 | 
			
		||||
	unsigned long		c_map_queued;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -652,6 +655,7 @@ struct rds_sock {
 | 
			
		|||
	u8			rs_rx_traces;
 | 
			
		||||
	u8			rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
 | 
			
		||||
	struct rds_msg_zcopy_queue rs_zcookie_queue;
 | 
			
		||||
	u8			rs_tos;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
 | 
			
		||||
| 
						 | 
				
			
			@ -760,13 +764,14 @@ void rds_conn_exit(void);
 | 
			
		|||
struct rds_connection *rds_conn_create(struct net *net,
 | 
			
		||||
				       const struct in6_addr *laddr,
 | 
			
		||||
				       const struct in6_addr *faddr,
 | 
			
		||||
				       struct rds_transport *trans, gfp_t gfp,
 | 
			
		||||
				       struct rds_transport *trans,
 | 
			
		||||
				       u8 tos, gfp_t gfp,
 | 
			
		||||
				       int dev_if);
 | 
			
		||||
struct rds_connection *rds_conn_create_outgoing(struct net *net,
 | 
			
		||||
						const struct in6_addr *laddr,
 | 
			
		||||
						const struct in6_addr *faddr,
 | 
			
		||||
						struct rds_transport *trans,
 | 
			
		||||
						gfp_t gfp, int dev_if);
 | 
			
		||||
						u8 tos, gfp_t gfp, int dev_if);
 | 
			
		||||
void rds_conn_shutdown(struct rds_conn_path *cpath);
 | 
			
		||||
void rds_conn_destroy(struct rds_connection *conn);
 | 
			
		||||
void rds_conn_drop(struct rds_connection *conn);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -782,6 +782,7 @@ void rds_inc_info_copy(struct rds_incoming *inc,
 | 
			
		|||
 | 
			
		||||
	minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
 | 
			
		||||
	minfo.len = be32_to_cpu(inc->i_hdr.h_len);
 | 
			
		||||
	minfo.tos = inc->i_conn->c_tos;
 | 
			
		||||
 | 
			
		||||
	if (flip) {
 | 
			
		||||
		minfo.laddr = daddr;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1277,12 +1277,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 | 
			
		|||
 | 
			
		||||
	/* rds_conn_create has a spinlock that runs with IRQ off.
 | 
			
		||||
	 * Caching the conn in the socket helps a lot. */
 | 
			
		||||
	if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
 | 
			
		||||
	if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) {
 | 
			
		||||
		conn = rs->rs_conn;
 | 
			
		||||
	else {
 | 
			
		||||
	} else {
 | 
			
		||||
		conn = rds_conn_create_outgoing(sock_net(sock->sk),
 | 
			
		||||
						&rs->rs_bound_addr, &daddr,
 | 
			
		||||
						rs->rs_transport,
 | 
			
		||||
						rs->rs_transport, 0,
 | 
			
		||||
						sock->sk->sk_allocation,
 | 
			
		||||
						scope_id);
 | 
			
		||||
		if (IS_ERR(conn)) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -267,6 +267,7 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 | 
			
		|||
		tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
 | 
			
		||||
		tsinfo.last_expected_una = tc->t_last_expected_una;
 | 
			
		||||
		tsinfo.last_seen_una = tc->t_last_seen_una;
 | 
			
		||||
		tsinfo.tos = tc->t_cpath->cp_conn->c_tos;
 | 
			
		||||
 | 
			
		||||
		rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -200,7 +200,7 @@ int rds_tcp_accept_one(struct socket *sock)
 | 
			
		|||
 | 
			
		||||
	conn = rds_conn_create(sock_net(sock->sk),
 | 
			
		||||
			       my_addr, peer_addr,
 | 
			
		||||
			       &rds_tcp_transport, GFP_KERNEL, dev_if);
 | 
			
		||||
			       &rds_tcp_transport, 0, GFP_KERNEL, dev_if);
 | 
			
		||||
 | 
			
		||||
	if (IS_ERR(conn)) {
 | 
			
		||||
		ret = PTR_ERR(conn);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue