mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	rds: Enable RDS IPv6 support
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the
listener is now an IPv6 endpoint which accepts both IPv4 and IPv6
connection requests.  RDS/RDMA/IB uses a private data (struct
rds_ib_connect_private) exchange between endpoints at RDS connection
establishment time to support RDMA. This private data exchange uses a
32 bit integer to represent an IP address. This needs to be changed in
order to support IPv6. A new private data struct
rds6_ib_connect_private is introduced to handle this. To ensure
backward compatibility, an IPv6 capable RDS stack uses another RDMA
listener port (RDS_CM_PORT) to accept IPv6 connection. And it
continues to use the original RDS_PORT for IPv4 RDS connections. When
it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to
send the connection set up request.
v5: Fixed syntax problem (David Miller).
v4: Changed port history comments in rds.h (Sowmini Varadhan).
v3: Added support to set up IPv4 connection using mapped address
    (David Miller).
    Added support to set up connection between link local and non-link
    addresses.
    Various review comments from Santosh Shilimkar and Sowmini Varadhan.
v2: Fixed bound and peer address scope mismatched issue.
    Added back rds_connect() IPv6 changes.
Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
			
			
This commit is contained in:
		
							parent
							
								
									eee2fa6ab3
								
							
						
					
					
						commit
						1e2b44e78e
					
				
					 14 changed files with 459 additions and 114 deletions
				
			
		| 
						 | 
				
			
			@ -142,15 +142,32 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
 | 
			
		|||
			uaddr_len = sizeof(*sin6);
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		/* If socket is not yet bound, set the return address family
 | 
			
		||||
		 * to be AF_UNSPEC (value 0) and the address size to be that
 | 
			
		||||
		 * of an IPv4 address.
 | 
			
		||||
		/* If socket is not yet bound and the socket is connected,
 | 
			
		||||
		 * set the return address family to be the same as the
 | 
			
		||||
		 * connected address, but with 0 address value.  If it is not
 | 
			
		||||
		 * connected, set the family to be AF_UNSPEC (value 0) and
 | 
			
		||||
		 * the address size to be that of an IPv4 address.
 | 
			
		||||
		 */
 | 
			
		||||
		if (ipv6_addr_any(&rs->rs_bound_addr)) {
 | 
			
		||||
			sin = (struct sockaddr_in *)uaddr;
 | 
			
		||||
			memset(sin, 0, sizeof(*sin));
 | 
			
		||||
			sin->sin_family = AF_UNSPEC;
 | 
			
		||||
			return sizeof(*sin);
 | 
			
		||||
			if (ipv6_addr_any(&rs->rs_conn_addr)) {
 | 
			
		||||
				sin = (struct sockaddr_in *)uaddr;
 | 
			
		||||
				memset(sin, 0, sizeof(*sin));
 | 
			
		||||
				sin->sin_family = AF_UNSPEC;
 | 
			
		||||
				return sizeof(*sin);
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if (ipv6_addr_type(&rs->rs_conn_addr) &
 | 
			
		||||
			    IPV6_ADDR_MAPPED) {
 | 
			
		||||
				sin = (struct sockaddr_in *)uaddr;
 | 
			
		||||
				memset(sin, 0, sizeof(*sin));
 | 
			
		||||
				sin->sin_family = AF_INET;
 | 
			
		||||
				return sizeof(*sin);
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			sin6 = (struct sockaddr_in6 *)uaddr;
 | 
			
		||||
			memset(sin6, 0, sizeof(*sin6));
 | 
			
		||||
			sin6->sin6_family = AF_INET6;
 | 
			
		||||
			return sizeof(*sin6);
 | 
			
		||||
		}
 | 
			
		||||
		if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
 | 
			
		||||
			sin = (struct sockaddr_in *)uaddr;
 | 
			
		||||
| 
						 | 
				
			
			@ -484,16 +501,18 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
 | 
			
		|||
{
 | 
			
		||||
	struct sock *sk = sock->sk;
 | 
			
		||||
	struct sockaddr_in *sin;
 | 
			
		||||
	struct sockaddr_in6 *sin6;
 | 
			
		||||
	struct rds_sock *rs = rds_sk_to_rs(sk);
 | 
			
		||||
	int addr_type;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
	lock_sock(sk);
 | 
			
		||||
 | 
			
		||||
	switch (addr_len) {
 | 
			
		||||
	case sizeof(struct sockaddr_in):
 | 
			
		||||
	switch (uaddr->sa_family) {
 | 
			
		||||
	case AF_INET:
 | 
			
		||||
		sin = (struct sockaddr_in *)uaddr;
 | 
			
		||||
		if (sin->sin_family != AF_INET) {
 | 
			
		||||
			ret = -EAFNOSUPPORT;
 | 
			
		||||
		if (addr_len < sizeof(struct sockaddr_in)) {
 | 
			
		||||
			ret = -EINVAL;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
		if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -509,12 +528,56 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
 | 
			
		|||
		rs->rs_conn_port = sin->sin_port;
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	case sizeof(struct sockaddr_in6):
 | 
			
		||||
		ret = -EPROTONOSUPPORT;
 | 
			
		||||
	case AF_INET6:
 | 
			
		||||
		sin6 = (struct sockaddr_in6 *)uaddr;
 | 
			
		||||
		if (addr_len < sizeof(struct sockaddr_in6)) {
 | 
			
		||||
			ret = -EINVAL;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
		addr_type = ipv6_addr_type(&sin6->sin6_addr);
 | 
			
		||||
		if (!(addr_type & IPV6_ADDR_UNICAST)) {
 | 
			
		||||
			__be32 addr4;
 | 
			
		||||
 | 
			
		||||
			if (!(addr_type & IPV6_ADDR_MAPPED)) {
 | 
			
		||||
				ret = -EPROTOTYPE;
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/* It is a mapped address.  Need to do some sanity
 | 
			
		||||
			 * checks.
 | 
			
		||||
			 */
 | 
			
		||||
			addr4 = sin6->sin6_addr.s6_addr32[3];
 | 
			
		||||
			if (addr4 == htonl(INADDR_ANY) ||
 | 
			
		||||
			    addr4 == htonl(INADDR_BROADCAST) ||
 | 
			
		||||
			    IN_MULTICAST(ntohl(addr4))) {
 | 
			
		||||
				ret = -EPROTOTYPE;
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (addr_type & IPV6_ADDR_LINKLOCAL) {
 | 
			
		||||
			/* If socket is arleady bound to a link local address,
 | 
			
		||||
			 * the peer address must be on the same link.
 | 
			
		||||
			 */
 | 
			
		||||
			if (sin6->sin6_scope_id == 0 ||
 | 
			
		||||
			    (!ipv6_addr_any(&rs->rs_bound_addr) &&
 | 
			
		||||
			     rs->rs_bound_scope_id &&
 | 
			
		||||
			     sin6->sin6_scope_id != rs->rs_bound_scope_id)) {
 | 
			
		||||
				ret = -EINVAL;
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
			/* Remember the connected address scope ID.  It will
 | 
			
		||||
			 * be checked against the binding local address when
 | 
			
		||||
			 * the socket is bound.
 | 
			
		||||
			 */
 | 
			
		||||
			rs->rs_bound_scope_id = sin6->sin6_scope_id;
 | 
			
		||||
		}
 | 
			
		||||
		rs->rs_conn_addr = sin6->sin6_addr;
 | 
			
		||||
		rs->rs_conn_port = sin6->sin6_port;
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
	default:
 | 
			
		||||
		ret = -EINVAL;
 | 
			
		||||
		ret = -EAFNOSUPPORT;
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -127,9 +127,10 @@ static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr,
 | 
			
		|||
		if (!rhashtable_insert_fast(&bind_hash_table,
 | 
			
		||||
					    &rs->rs_bound_node, ht_parms)) {
 | 
			
		||||
			*port = rs->rs_bound_port;
 | 
			
		||||
			rs->rs_bound_scope_id = scope_id;
 | 
			
		||||
			ret = 0;
 | 
			
		||||
			rdsdebug("rs %p binding to %pI4:%d\n",
 | 
			
		||||
			  rs, &addr, (int)ntohs(*port));
 | 
			
		||||
			rdsdebug("rs %p binding to %pI6c:%d\n",
 | 
			
		||||
				 rs, addr, (int)ntohs(*port));
 | 
			
		||||
			break;
 | 
			
		||||
		} else {
 | 
			
		||||
			rs->rs_bound_addr = in6addr_any;
 | 
			
		||||
| 
						 | 
				
			
			@ -164,23 +165,53 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 | 
			
		|||
	struct in6_addr v6addr, *binding_addr;
 | 
			
		||||
	struct rds_transport *trans;
 | 
			
		||||
	__u32 scope_id = 0;
 | 
			
		||||
	int addr_type;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
	__be16 port;
 | 
			
		||||
 | 
			
		||||
	/* We only allow an RDS socket to be bound to an IPv4 address. IPv6
 | 
			
		||||
	 * address support will be added later.
 | 
			
		||||
	/* We allow an RDS socket to be bound to either IPv4 or IPv6
 | 
			
		||||
	 * address.
 | 
			
		||||
	 */
 | 
			
		||||
	if (addr_len == sizeof(struct sockaddr_in)) {
 | 
			
		||||
	if (uaddr->sa_family == AF_INET) {
 | 
			
		||||
		struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 | 
			
		||||
 | 
			
		||||
		if (sin->sin_family != AF_INET ||
 | 
			
		||||
		    sin->sin_addr.s_addr == htonl(INADDR_ANY))
 | 
			
		||||
		if (addr_len < sizeof(struct sockaddr_in) ||
 | 
			
		||||
		    sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
 | 
			
		||||
		    sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
 | 
			
		||||
		    IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 | 
			
		||||
			return -EINVAL;
 | 
			
		||||
		ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
 | 
			
		||||
		binding_addr = &v6addr;
 | 
			
		||||
		port = sin->sin_port;
 | 
			
		||||
	} else if (addr_len == sizeof(struct sockaddr_in6)) {
 | 
			
		||||
		return -EPROTONOSUPPORT;
 | 
			
		||||
	} else if (uaddr->sa_family == AF_INET6) {
 | 
			
		||||
		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr;
 | 
			
		||||
 | 
			
		||||
		if (addr_len < sizeof(struct sockaddr_in6))
 | 
			
		||||
			return -EINVAL;
 | 
			
		||||
		addr_type = ipv6_addr_type(&sin6->sin6_addr);
 | 
			
		||||
		if (!(addr_type & IPV6_ADDR_UNICAST)) {
 | 
			
		||||
			__be32 addr4;
 | 
			
		||||
 | 
			
		||||
			if (!(addr_type & IPV6_ADDR_MAPPED))
 | 
			
		||||
				return -EINVAL;
 | 
			
		||||
 | 
			
		||||
			/* It is a mapped address.  Need to do some sanity
 | 
			
		||||
			 * checks.
 | 
			
		||||
			 */
 | 
			
		||||
			addr4 = sin6->sin6_addr.s6_addr32[3];
 | 
			
		||||
			if (addr4 == htonl(INADDR_ANY) ||
 | 
			
		||||
			    addr4 == htonl(INADDR_BROADCAST) ||
 | 
			
		||||
			    IN_MULTICAST(ntohl(addr4)))
 | 
			
		||||
				return -EINVAL;
 | 
			
		||||
		}
 | 
			
		||||
		/* The scope ID must be specified for link local address. */
 | 
			
		||||
		if (addr_type & IPV6_ADDR_LINKLOCAL) {
 | 
			
		||||
			if (sin6->sin6_scope_id == 0)
 | 
			
		||||
				return -EINVAL;
 | 
			
		||||
			scope_id = sin6->sin6_scope_id;
 | 
			
		||||
		}
 | 
			
		||||
		binding_addr = &sin6->sin6_addr;
 | 
			
		||||
		port = sin6->sin6_port;
 | 
			
		||||
	} else {
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -191,6 +222,16 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 | 
			
		|||
		ret = -EINVAL;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
	/* Socket is connected.  The binding address should have the same
 | 
			
		||||
	 * scope ID as the connected address, except the case when one is
 | 
			
		||||
	 * non-link local address (scope_id is 0).
 | 
			
		||||
	 */
 | 
			
		||||
	if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id &&
 | 
			
		||||
	    rs->rs_bound_scope_id &&
 | 
			
		||||
	    scope_id != rs->rs_bound_scope_id) {
 | 
			
		||||
		ret = -EINVAL;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = rds_add_bound(rs, binding_addr, &port, scope_id);
 | 
			
		||||
	if (ret)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
/*
 | 
			
		||||
 * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
 | 
			
		||||
 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * This software is available to you under a choice of one of two
 | 
			
		||||
 * licenses.  You may choose to be licensed under the terms of the GNU
 | 
			
		||||
| 
						 | 
				
			
			@ -36,6 +36,7 @@
 | 
			
		|||
#include <linux/export.h>
 | 
			
		||||
#include <net/ipv6.h>
 | 
			
		||||
#include <net/inet6_hashtables.h>
 | 
			
		||||
#include <net/addrconf.h>
 | 
			
		||||
 | 
			
		||||
#include "rds.h"
 | 
			
		||||
#include "loop.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -200,6 +201,15 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 | 
			
		|||
	conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
 | 
			
		||||
	conn->c_faddr = *faddr;
 | 
			
		||||
	conn->c_dev_if = dev_if;
 | 
			
		||||
	/* If the local address is link local, set c_bound_if to be the
 | 
			
		||||
	 * index used for this connection.  Otherwise, set it to 0 as
 | 
			
		||||
	 * the socket is not bound to an interface.  c_bound_if is used
 | 
			
		||||
	 * to look up a socket when a packet is received
 | 
			
		||||
	 */
 | 
			
		||||
	if (ipv6_addr_type(laddr) & IPV6_ADDR_LINKLOCAL)
 | 
			
		||||
		conn->c_bound_if = dev_if;
 | 
			
		||||
	else
 | 
			
		||||
		conn->c_bound_if = 0;
 | 
			
		||||
 | 
			
		||||
	rds_conn_net_set(conn, net);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -486,10 +496,18 @@ void rds_conn_destroy(struct rds_connection *conn)
 | 
			
		|||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(rds_conn_destroy);
 | 
			
		||||
 | 
			
		||||
static void rds_conn_message_info(struct socket *sock, unsigned int len,
 | 
			
		||||
				  struct rds_info_iterator *iter,
 | 
			
		||||
				  struct rds_info_lengths *lens,
 | 
			
		||||
				  int want_send)
 | 
			
		||||
static void __rds_inc_msg_cp(struct rds_incoming *inc,
 | 
			
		||||
			     struct rds_info_iterator *iter,
 | 
			
		||||
			     void *saddr, void *daddr, int flip)
 | 
			
		||||
{
 | 
			
		||||
	rds_inc_info_copy(inc, iter, *(__be32 *)saddr,
 | 
			
		||||
			  *(__be32 *)daddr, flip);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
 | 
			
		||||
				      struct rds_info_iterator *iter,
 | 
			
		||||
				      struct rds_info_lengths *lens,
 | 
			
		||||
				      int want_send)
 | 
			
		||||
{
 | 
			
		||||
	struct hlist_head *head;
 | 
			
		||||
	struct list_head *list;
 | 
			
		||||
| 
						 | 
				
			
			@ -524,18 +542,13 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 | 
			
		|||
 | 
			
		||||
				/* XXX too lazy to maintain counts.. */
 | 
			
		||||
				list_for_each_entry(rm, list, m_conn_item) {
 | 
			
		||||
					__be32 laddr;
 | 
			
		||||
					__be32 faddr;
 | 
			
		||||
 | 
			
		||||
					total++;
 | 
			
		||||
					laddr = conn->c_laddr.s6_addr32[3];
 | 
			
		||||
					faddr = conn->c_faddr.s6_addr32[3];
 | 
			
		||||
					if (total <= len)
 | 
			
		||||
						rds_inc_info_copy(&rm->m_inc,
 | 
			
		||||
								  iter,
 | 
			
		||||
								  laddr,
 | 
			
		||||
								  faddr,
 | 
			
		||||
								  0);
 | 
			
		||||
						__rds_inc_msg_cp(&rm->m_inc,
 | 
			
		||||
								 iter,
 | 
			
		||||
								 &conn->c_laddr,
 | 
			
		||||
								 &conn->c_faddr,
 | 
			
		||||
								 0);
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				spin_unlock_irqrestore(&cp->cp_lock, flags);
 | 
			
		||||
| 
						 | 
				
			
			@ -548,6 +561,14 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 | 
			
		|||
	lens->each = sizeof(struct rds_info_message);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void rds_conn_message_info(struct socket *sock, unsigned int len,
 | 
			
		||||
				  struct rds_info_iterator *iter,
 | 
			
		||||
				  struct rds_info_lengths *lens,
 | 
			
		||||
				  int want_send)
 | 
			
		||||
{
 | 
			
		||||
	rds_conn_message_info_cmn(sock, len, iter, lens, want_send);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
 | 
			
		||||
				       struct rds_info_iterator *iter,
 | 
			
		||||
				       struct rds_info_lengths *lens)
 | 
			
		||||
| 
						 | 
				
			
			@ -655,6 +676,9 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
 | 
			
		|||
	struct rds_info_connection *cinfo = buffer;
 | 
			
		||||
	struct rds_connection *conn = cp->cp_conn;
 | 
			
		||||
 | 
			
		||||
	if (conn->c_isv6)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	cinfo->next_tx_seq = cp->cp_next_tx_seq;
 | 
			
		||||
	cinfo->next_rx_seq = cp->cp_next_rx_seq;
 | 
			
		||||
	cinfo->laddr = conn->c_laddr.s6_addr32[3];
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										55
									
								
								net/rds/ib.c
									
									
									
									
									
								
							
							
						
						
									
										55
									
								
								net/rds/ib.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
/*
 | 
			
		||||
 * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
 | 
			
		||||
 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * This software is available to you under a choice of one of two
 | 
			
		||||
 * licenses.  You may choose to be licensed under the terms of the GNU
 | 
			
		||||
| 
						 | 
				
			
			@ -39,6 +39,7 @@
 | 
			
		|||
#include <linux/delay.h>
 | 
			
		||||
#include <linux/slab.h>
 | 
			
		||||
#include <linux/module.h>
 | 
			
		||||
#include <net/addrconf.h>
 | 
			
		||||
 | 
			
		||||
#include "rds_single_path.h"
 | 
			
		||||
#include "rds.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -295,6 +296,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 | 
			
		|||
	/* We will only ever look at IB transports */
 | 
			
		||||
	if (conn->c_trans != &rds_ib_transport)
 | 
			
		||||
		return 0;
 | 
			
		||||
	if (conn->c_isv6)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
 | 
			
		||||
	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
 | 
			
		||||
| 
						 | 
				
			
			@ -330,7 +333,6 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
 | 
			
		|||
				sizeof(struct rds_info_rdma_connection));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Early RDS/IB was built to only bind to an address if there is an IPoIB
 | 
			
		||||
 * device with that address set.
 | 
			
		||||
| 
						 | 
				
			
			@ -346,8 +348,12 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
 | 
			
		|||
{
 | 
			
		||||
	int ret;
 | 
			
		||||
	struct rdma_cm_id *cm_id;
 | 
			
		||||
	struct sockaddr_in6 sin6;
 | 
			
		||||
	struct sockaddr_in sin;
 | 
			
		||||
	struct sockaddr *sa;
 | 
			
		||||
	bool isv4;
 | 
			
		||||
 | 
			
		||||
	isv4 = ipv6_addr_v4mapped(addr);
 | 
			
		||||
	/* Create a CMA ID and try to bind it. This catches both
 | 
			
		||||
	 * IB and iWARP capable NICs.
 | 
			
		||||
	 */
 | 
			
		||||
| 
						 | 
				
			
			@ -356,20 +362,53 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
 | 
			
		|||
	if (IS_ERR(cm_id))
 | 
			
		||||
		return PTR_ERR(cm_id);
 | 
			
		||||
 | 
			
		||||
	memset(&sin, 0, sizeof(sin));
 | 
			
		||||
	sin.sin_family = AF_INET;
 | 
			
		||||
	sin.sin_addr.s_addr = addr->s6_addr32[3];
 | 
			
		||||
	if (isv4) {
 | 
			
		||||
		memset(&sin, 0, sizeof(sin));
 | 
			
		||||
		sin.sin_family = AF_INET;
 | 
			
		||||
		sin.sin_addr.s_addr = addr->s6_addr32[3];
 | 
			
		||||
		sa = (struct sockaddr *)&sin;
 | 
			
		||||
	} else {
 | 
			
		||||
		memset(&sin6, 0, sizeof(sin6));
 | 
			
		||||
		sin6.sin6_family = AF_INET6;
 | 
			
		||||
		sin6.sin6_addr = *addr;
 | 
			
		||||
		sin6.sin6_scope_id = scope_id;
 | 
			
		||||
		sa = (struct sockaddr *)&sin6;
 | 
			
		||||
 | 
			
		||||
		/* XXX Do a special IPv6 link local address check here.  The
 | 
			
		||||
		 * reason is that rdma_bind_addr() always succeeds with IPv6
 | 
			
		||||
		 * link local address regardless it is indeed configured in a
 | 
			
		||||
		 * system.
 | 
			
		||||
		 */
 | 
			
		||||
		if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) {
 | 
			
		||||
			struct net_device *dev;
 | 
			
		||||
 | 
			
		||||
			if (scope_id == 0)
 | 
			
		||||
				return -EADDRNOTAVAIL;
 | 
			
		||||
 | 
			
		||||
			/* Use init_net for now as RDS is not network
 | 
			
		||||
			 * name space aware.
 | 
			
		||||
			 */
 | 
			
		||||
			dev = dev_get_by_index(&init_net, scope_id);
 | 
			
		||||
			if (!dev)
 | 
			
		||||
				return -EADDRNOTAVAIL;
 | 
			
		||||
			if (!ipv6_chk_addr(&init_net, addr, dev, 1)) {
 | 
			
		||||
				dev_put(dev);
 | 
			
		||||
				return -EADDRNOTAVAIL;
 | 
			
		||||
			}
 | 
			
		||||
			dev_put(dev);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* rdma_bind_addr will only succeed for IB & iWARP devices */
 | 
			
		||||
	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
 | 
			
		||||
	ret = rdma_bind_addr(cm_id, sa);
 | 
			
		||||
	/* due to this, we will claim to support iWARP devices unless we
 | 
			
		||||
	   check node_type. */
 | 
			
		||||
	if (ret || !cm_id->device ||
 | 
			
		||||
	    cm_id->device->node_type != RDMA_NODE_IB_CA)
 | 
			
		||||
		ret = -EADDRNOTAVAIL;
 | 
			
		||||
 | 
			
		||||
	rdsdebug("addr %pI6c ret %d node type %d\n",
 | 
			
		||||
		 addr, ret,
 | 
			
		||||
	rdsdebug("addr %pI6c%%%u ret %d node type %d\n",
 | 
			
		||||
		 addr, scope_id, ret,
 | 
			
		||||
		 cm_id->device ? cm_id->device->node_type : -1);
 | 
			
		||||
 | 
			
		||||
	rdma_destroy_id(cm_id);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -678,7 +678,7 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
 | 
			
		|||
	return version;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Given an IPv6 address, find the IB net_device which hosts that address and
 | 
			
		||||
/* Given an IPv6 address, find the net_device which hosts that address and
 | 
			
		||||
 * return its index.  This is used by the rds_ib_cm_handle_connect() code to
 | 
			
		||||
 * find the interface index of where an incoming request comes from when
 | 
			
		||||
 * the request is using a link local address.
 | 
			
		||||
| 
						 | 
				
			
			@ -695,8 +695,7 @@ static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr)
 | 
			
		|||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	for_each_netdev_rcu(net, dev) {
 | 
			
		||||
		if (dev->type == ARPHRD_INFINIBAND &&
 | 
			
		||||
		    ipv6_chk_addr(net, addr, dev, 0)) {
 | 
			
		||||
		if (ipv6_chk_addr(net, addr, dev, 1)) {
 | 
			
		||||
			idx = dev->ifindex;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -736,7 +735,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 | 
			
		|||
		dp_cmn = &dp->ricp_v6.dp_cmn;
 | 
			
		||||
		saddr6 = &dp->ricp_v6.dp_saddr;
 | 
			
		||||
		daddr6 = &dp->ricp_v6.dp_daddr;
 | 
			
		||||
		/* If the local address is link local, need to find the
 | 
			
		||||
		/* If either address is link local, need to find the
 | 
			
		||||
		 * interface index in order to create a proper RDS
 | 
			
		||||
		 * connection.
 | 
			
		||||
		 */
 | 
			
		||||
| 
						 | 
				
			
			@ -748,6 +747,14 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 | 
			
		|||
				err = -EOPNOTSUPP;
 | 
			
		||||
				goto out;
 | 
			
		||||
			}
 | 
			
		||||
		} else if (ipv6_addr_type(saddr6) & IPV6_ADDR_LINKLOCAL) {
 | 
			
		||||
			/* Use our address to find the correct index. */
 | 
			
		||||
			ifindex = __rds_find_ifindex(&init_net, daddr6);
 | 
			
		||||
			/* No index found...  Need to bail out. */
 | 
			
		||||
			if (ifindex == 0) {
 | 
			
		||||
				err = -EOPNOTSUPP;
 | 
			
		||||
				goto out;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		dp_cmn = &dp->ricp_v4.dp_cmn;
 | 
			
		||||
| 
						 | 
				
			
			@ -886,7 +893,10 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp)
 | 
			
		|||
 | 
			
		||||
	/* XXX I wonder what affect the port space has */
 | 
			
		||||
	/* delegate cm event handler to rdma_transport */
 | 
			
		||||
	handler = rds_rdma_cm_event_handler;
 | 
			
		||||
	if (conn->c_isv6)
 | 
			
		||||
		handler = rds6_rdma_cm_event_handler;
 | 
			
		||||
	else
 | 
			
		||||
		handler = rds_rdma_cm_event_handler;
 | 
			
		||||
	ic->i_cm_id = rdma_create_id(&init_net, handler, conn,
 | 
			
		||||
				     RDMA_PS_TCP, IB_QPT_RC);
 | 
			
		||||
	if (IS_ERR(ic->i_cm_id)) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,7 +37,9 @@
 | 
			
		|||
#include "rdma_transport.h"
 | 
			
		||||
#include "ib.h"
 | 
			
		||||
 | 
			
		||||
/* Global IPv4 and IPv6 RDS RDMA listener cm_id */
 | 
			
		||||
static struct rdma_cm_id *rds_rdma_listen_id;
 | 
			
		||||
static struct rdma_cm_id *rds6_rdma_listen_id;
 | 
			
		||||
 | 
			
		||||
static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 | 
			
		||||
					 struct rdma_cm_event *event,
 | 
			
		||||
| 
						 | 
				
			
			@ -153,6 +155,12 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 | 
			
		|||
	return rds_rdma_cm_event_handler_cmn(cm_id, event, false);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 | 
			
		||||
			       struct rdma_cm_event *event)
 | 
			
		||||
{
 | 
			
		||||
	return rds_rdma_cm_event_handler_cmn(cm_id, event, true);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
 | 
			
		||||
				       struct sockaddr *sa,
 | 
			
		||||
				       struct rdma_cm_id **ret_cm_id)
 | 
			
		||||
| 
						 | 
				
			
			@ -206,6 +214,7 @@ static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
 | 
			
		|||
static int rds_rdma_listen_init(void)
 | 
			
		||||
{
 | 
			
		||||
	int ret;
 | 
			
		||||
	struct sockaddr_in6 sin6;
 | 
			
		||||
	struct sockaddr_in sin;
 | 
			
		||||
 | 
			
		||||
	sin.sin_family = PF_INET;
 | 
			
		||||
| 
						 | 
				
			
			@ -214,7 +223,21 @@ static int rds_rdma_listen_init(void)
 | 
			
		|||
	ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler,
 | 
			
		||||
					  (struct sockaddr *)&sin,
 | 
			
		||||
					  &rds_rdma_listen_id);
 | 
			
		||||
	return ret;
 | 
			
		||||
	if (ret != 0)
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
	sin6.sin6_family = PF_INET6;
 | 
			
		||||
	sin6.sin6_addr = in6addr_any;
 | 
			
		||||
	sin6.sin6_port = htons(RDS_CM_PORT);
 | 
			
		||||
	sin6.sin6_scope_id = 0;
 | 
			
		||||
	sin6.sin6_flowinfo = 0;
 | 
			
		||||
	ret = rds_rdma_listen_init_common(rds6_rdma_cm_event_handler,
 | 
			
		||||
					  (struct sockaddr *)&sin6,
 | 
			
		||||
					  &rds6_rdma_listen_id);
 | 
			
		||||
	/* Keep going even when IPv6 is not enabled in the system. */
 | 
			
		||||
	if (ret != 0)
 | 
			
		||||
		rdsdebug("Cannot set up IPv6 RDMA listener\n");
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void rds_rdma_listen_stop(void)
 | 
			
		||||
| 
						 | 
				
			
			@ -224,6 +247,11 @@ static void rds_rdma_listen_stop(void)
 | 
			
		|||
		rdma_destroy_id(rds_rdma_listen_id);
 | 
			
		||||
		rds_rdma_listen_id = NULL;
 | 
			
		||||
	}
 | 
			
		||||
	if (rds6_rdma_listen_id) {
 | 
			
		||||
		rdsdebug("cm %p\n", rds6_rdma_listen_id);
 | 
			
		||||
		rdma_destroy_id(rds6_rdma_listen_id);
 | 
			
		||||
		rds6_rdma_listen_id = NULL;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int rds_rdma_init(void)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,11 +6,16 @@
 | 
			
		|||
#include <rdma/rdma_cm.h>
 | 
			
		||||
#include "rds.h"
 | 
			
		||||
 | 
			
		||||
/* RDMA_CM also uses 16385 as the listener port. */
 | 
			
		||||
#define RDS_CM_PORT	16385
 | 
			
		||||
 | 
			
		||||
#define RDS_RDMA_RESOLVE_TIMEOUT_MS     5000
 | 
			
		||||
 | 
			
		||||
int rds_rdma_conn_connect(struct rds_connection *conn);
 | 
			
		||||
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 | 
			
		||||
			      struct rdma_cm_event *event);
 | 
			
		||||
int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 | 
			
		||||
			       struct rdma_cm_event *event);
 | 
			
		||||
 | 
			
		||||
/* from ib.c */
 | 
			
		||||
extern struct rds_transport rds_ib_transport;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -24,14 +24,15 @@
 | 
			
		|||
#define RDS_PROTOCOL_MINOR(v)	((v) & 255)
 | 
			
		||||
#define RDS_PROTOCOL(maj, min)	(((maj) << 8) | min)
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * XXX randomly chosen, but at least seems to be unused:
 | 
			
		||||
 * #               18464-18768 Unassigned
 | 
			
		||||
 * We should do better.  We want a reserved port to discourage unpriv'ed
 | 
			
		||||
 * userspace from listening.
 | 
			
		||||
/* The following ports, 16385, 18634, 18635, are registered with IANA as
 | 
			
		||||
 * the ports to be used for RDS over TCP and UDP.  Currently, only RDS over
 | 
			
		||||
 * TCP and RDS over IB/RDMA are implemented.  18634 is the historical value
 | 
			
		||||
 * used for the RDMA_CM listener port.  RDS/TCP uses port 16385.  After
 | 
			
		||||
 * IPv6 work, RDMA_CM also uses 16385 as the listener port.  18634 is kept
 | 
			
		||||
 * to ensure compatibility with older RDS modules.  Those ports are defined
 | 
			
		||||
 * in each transport's header file.
 | 
			
		||||
 */
 | 
			
		||||
#define RDS_PORT	18634
 | 
			
		||||
#define RDS_CM_PORT	16385
 | 
			
		||||
 | 
			
		||||
#ifdef ATOMIC64_INIT
 | 
			
		||||
#define KERNEL_HAS_ATOMIC64
 | 
			
		||||
| 
						 | 
				
			
			@ -140,7 +141,8 @@ struct rds_connection {
 | 
			
		|||
	struct hlist_node	c_hash_node;
 | 
			
		||||
	struct in6_addr		c_laddr;
 | 
			
		||||
	struct in6_addr		c_faddr;
 | 
			
		||||
	int			c_dev_if; /* c_laddrs's interface index */
 | 
			
		||||
	int			c_dev_if; /* ifindex used for this conn */
 | 
			
		||||
	int			c_bound_if; /* ifindex of c_laddr */
 | 
			
		||||
	unsigned int		c_loopback:1,
 | 
			
		||||
				c_isv6:1,
 | 
			
		||||
				c_ping_triggered:1,
 | 
			
		||||
| 
						 | 
				
			
			@ -736,7 +738,7 @@ void rds_cong_remove_socket(struct rds_sock *);
 | 
			
		|||
void rds_cong_exit(void);
 | 
			
		||||
struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
 | 
			
		||||
 | 
			
		||||
/* conn.c */
 | 
			
		||||
/* connection.c */
 | 
			
		||||
extern u32 rds_gen_num;
 | 
			
		||||
int rds_conn_init(void);
 | 
			
		||||
void rds_conn_exit(void);
 | 
			
		||||
| 
						 | 
				
			
			@ -874,6 +876,10 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
 | 
			
		|||
void rds_inc_info_copy(struct rds_incoming *inc,
 | 
			
		||||
		       struct rds_info_iterator *iter,
 | 
			
		||||
		       __be32 saddr, __be32 daddr, int flip);
 | 
			
		||||
void rds6_inc_info_copy(struct rds_incoming *inc,
 | 
			
		||||
			struct rds_info_iterator *iter,
 | 
			
		||||
			struct in6_addr *saddr, struct in6_addr *daddr,
 | 
			
		||||
			int flip);
 | 
			
		||||
 | 
			
		||||
/* send.c */
 | 
			
		||||
int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -364,7 +364,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
 | 
			
		|||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_dev_if);
 | 
			
		||||
	rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_bound_if);
 | 
			
		||||
	if (!rs) {
 | 
			
		||||
		rds_stats_inc(s_recv_drop_no_sock);
 | 
			
		||||
		goto out;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1091,10 +1091,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 | 
			
		|||
			ret = -EINVAL;
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
		switch (namelen) {
 | 
			
		||||
		case sizeof(*usin):
 | 
			
		||||
			if (usin->sin_family != AF_INET ||
 | 
			
		||||
			    usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
 | 
			
		||||
		switch (usin->sin_family) {
 | 
			
		||||
		case AF_INET:
 | 
			
		||||
			if (usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
 | 
			
		||||
			    usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
 | 
			
		||||
			    IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) {
 | 
			
		||||
				ret = -EINVAL;
 | 
			
		||||
| 
						 | 
				
			
			@ -1104,9 +1103,44 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 | 
			
		|||
			dport = usin->sin_port;
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case sizeof(*sin6): {
 | 
			
		||||
			ret = -EPROTONOSUPPORT;
 | 
			
		||||
			goto out;
 | 
			
		||||
		case AF_INET6: {
 | 
			
		||||
			int addr_type;
 | 
			
		||||
 | 
			
		||||
			if (namelen < sizeof(*sin6)) {
 | 
			
		||||
				ret = -EINVAL;
 | 
			
		||||
				goto out;
 | 
			
		||||
			}
 | 
			
		||||
			addr_type = ipv6_addr_type(&sin6->sin6_addr);
 | 
			
		||||
			if (!(addr_type & IPV6_ADDR_UNICAST)) {
 | 
			
		||||
				__be32 addr4;
 | 
			
		||||
 | 
			
		||||
				if (!(addr_type & IPV6_ADDR_MAPPED)) {
 | 
			
		||||
					ret = -EINVAL;
 | 
			
		||||
					goto out;
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				/* It is a mapped address.  Need to do some
 | 
			
		||||
				 * sanity checks.
 | 
			
		||||
				 */
 | 
			
		||||
				addr4 = sin6->sin6_addr.s6_addr32[3];
 | 
			
		||||
				if (addr4 == htonl(INADDR_ANY) ||
 | 
			
		||||
				    addr4 == htonl(INADDR_BROADCAST) ||
 | 
			
		||||
				    IN_MULTICAST(ntohl(addr4))) {
 | 
			
		||||
					return -EINVAL;
 | 
			
		||||
					goto out;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			if (addr_type & IPV6_ADDR_LINKLOCAL) {
 | 
			
		||||
				if (sin6->sin6_scope_id == 0) {
 | 
			
		||||
					ret = -EINVAL;
 | 
			
		||||
					goto out;
 | 
			
		||||
				}
 | 
			
		||||
				scope_id = sin6->sin6_scope_id;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			daddr = sin6->sin6_addr;
 | 
			
		||||
			dport = sin6->sin6_port;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		default:
 | 
			
		||||
| 
						 | 
				
			
			@ -1138,6 +1172,19 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 | 
			
		|||
			ret = -EOPNOTSUPP;
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
		/* If the socket is already bound to a link local address,
 | 
			
		||||
		 * it can only send to peers on the same link.  But allow
 | 
			
		||||
		 * communicating beween link local and non-link local address.
 | 
			
		||||
		 */
 | 
			
		||||
		if (scope_id != rs->rs_bound_scope_id) {
 | 
			
		||||
			if (!scope_id) {
 | 
			
		||||
				scope_id = rs->rs_bound_scope_id;
 | 
			
		||||
			} else if (rs->rs_bound_scope_id) {
 | 
			
		||||
				release_sock(sk);
 | 
			
		||||
				ret = -EINVAL;
 | 
			
		||||
				goto out;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	release_sock(sk);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
/*
 | 
			
		||||
 * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
 | 
			
		||||
 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * This software is available to you under a choice of one of two
 | 
			
		||||
 * licenses.  You may choose to be licensed under the terms of the GNU
 | 
			
		||||
| 
						 | 
				
			
			@ -46,7 +46,12 @@
 | 
			
		|||
/* only for info exporting */
 | 
			
		||||
static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
 | 
			
		||||
static LIST_HEAD(rds_tcp_tc_list);
 | 
			
		||||
 | 
			
		||||
/* rds_tcp_tc_count counts only IPv4 connections.
 | 
			
		||||
 * rds6_tcp_tc_count counts both IPv4 and IPv6 connections.
 | 
			
		||||
 */
 | 
			
		||||
static unsigned int rds_tcp_tc_count;
 | 
			
		||||
static unsigned int rds6_tcp_tc_count;
 | 
			
		||||
 | 
			
		||||
/* Track rds_tcp_connection structs so they can be cleaned up */
 | 
			
		||||
static DEFINE_SPINLOCK(rds_tcp_conn_lock);
 | 
			
		||||
| 
						 | 
				
			
			@ -113,7 +118,9 @@ void rds_tcp_restore_callbacks(struct socket *sock,
 | 
			
		|||
	/* done under the callback_lock to serialize with write_space */
 | 
			
		||||
	spin_lock(&rds_tcp_tc_list_lock);
 | 
			
		||||
	list_del_init(&tc->t_list_item);
 | 
			
		||||
	rds_tcp_tc_count--;
 | 
			
		||||
	rds6_tcp_tc_count--;
 | 
			
		||||
	if (!tc->t_cpath->cp_conn->c_isv6)
 | 
			
		||||
		rds_tcp_tc_count--;
 | 
			
		||||
	spin_unlock(&rds_tcp_tc_list_lock);
 | 
			
		||||
 | 
			
		||||
	tc->t_sock = NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -200,7 +207,9 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
 | 
			
		|||
	/* done under the callback_lock to serialize with write_space */
 | 
			
		||||
	spin_lock(&rds_tcp_tc_list_lock);
 | 
			
		||||
	list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
 | 
			
		||||
	rds_tcp_tc_count++;
 | 
			
		||||
	rds6_tcp_tc_count++;
 | 
			
		||||
	if (!tc->t_cpath->cp_conn->c_isv6)
 | 
			
		||||
		rds_tcp_tc_count++;
 | 
			
		||||
	spin_unlock(&rds_tcp_tc_list_lock);
 | 
			
		||||
 | 
			
		||||
	/* accepted sockets need our listen data ready undone */
 | 
			
		||||
| 
						 | 
				
			
			@ -221,6 +230,9 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
 | 
			
		|||
	write_unlock_bh(&sock->sk->sk_callback_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Handle RDS_INFO_TCP_SOCKETS socket option.  It only returns IPv4
 | 
			
		||||
 * connections for backward compatibility.
 | 
			
		||||
 */
 | 
			
		||||
static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 | 
			
		||||
			    struct rds_info_iterator *iter,
 | 
			
		||||
			    struct rds_info_lengths *lens)
 | 
			
		||||
| 
						 | 
				
			
			@ -228,8 +240,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 | 
			
		|||
	struct rds_info_tcp_socket tsinfo;
 | 
			
		||||
	struct rds_tcp_connection *tc;
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
	struct sockaddr_in sin;
 | 
			
		||||
	struct socket *sock;
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -237,16 +247,15 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 | 
			
		|||
		goto out;
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
 | 
			
		||||
		struct inet_sock *inet = inet_sk(tc->t_sock->sk);
 | 
			
		||||
 | 
			
		||||
		sock = tc->t_sock;
 | 
			
		||||
		if (sock) {
 | 
			
		||||
			sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
 | 
			
		||||
			tsinfo.local_addr = sin.sin_addr.s_addr;
 | 
			
		||||
			tsinfo.local_port = sin.sin_port;
 | 
			
		||||
			sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
 | 
			
		||||
			tsinfo.peer_addr = sin.sin_addr.s_addr;
 | 
			
		||||
			tsinfo.peer_port = sin.sin_port;
 | 
			
		||||
		}
 | 
			
		||||
		if (tc->t_cpath->cp_conn->c_isv6)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		tsinfo.local_addr = inet->inet_saddr;
 | 
			
		||||
		tsinfo.local_port = inet->inet_sport;
 | 
			
		||||
		tsinfo.peer_addr = inet->inet_daddr;
 | 
			
		||||
		tsinfo.peer_port = inet->inet_dport;
 | 
			
		||||
 | 
			
		||||
		tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
 | 
			
		||||
		tsinfo.data_rem = tc->t_tinc_data_rem;
 | 
			
		||||
| 
						 | 
				
			
			@ -494,13 +503,18 @@ static __net_init int rds_tcp_init_net(struct net *net)
 | 
			
		|||
		err = -ENOMEM;
 | 
			
		||||
		goto fail;
 | 
			
		||||
	}
 | 
			
		||||
	rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
 | 
			
		||||
	rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true);
 | 
			
		||||
	if (!rtn->rds_tcp_listen_sock) {
 | 
			
		||||
		pr_warn("could not set up listen sock\n");
 | 
			
		||||
		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
 | 
			
		||||
		rtn->rds_tcp_sysctl = NULL;
 | 
			
		||||
		err = -EAFNOSUPPORT;
 | 
			
		||||
		goto fail;
 | 
			
		||||
		pr_warn("could not set up IPv6 listen sock\n");
 | 
			
		||||
 | 
			
		||||
		/* Try IPv4 as some systems disable IPv6 */
 | 
			
		||||
		rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false);
 | 
			
		||||
		if (!rtn->rds_tcp_listen_sock) {
 | 
			
		||||
			unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
 | 
			
		||||
			rtn->rds_tcp_sysctl = NULL;
 | 
			
		||||
			err = -EAFNOSUPPORT;
 | 
			
		||||
			goto fail;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -67,7 +67,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn);
 | 
			
		|||
void rds_tcp_state_change(struct sock *sk);
 | 
			
		||||
 | 
			
		||||
/* tcp_listen.c */
 | 
			
		||||
struct socket *rds_tcp_listen_init(struct net *);
 | 
			
		||||
struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
 | 
			
		||||
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
 | 
			
		||||
void rds_tcp_listen_data_ready(struct sock *sk);
 | 
			
		||||
int rds_tcp_accept_one(struct socket *sock);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -89,9 +89,11 @@ void rds_tcp_state_change(struct sock *sk)
 | 
			
		|||
int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 | 
			
		||||
{
 | 
			
		||||
	struct socket *sock = NULL;
 | 
			
		||||
	struct sockaddr_in6 sin6;
 | 
			
		||||
	struct sockaddr_in sin;
 | 
			
		||||
	struct sockaddr *addr;
 | 
			
		||||
	int addrlen;
 | 
			
		||||
	bool isv6;
 | 
			
		||||
	int ret;
 | 
			
		||||
	struct rds_connection *conn = cp->cp_conn;
 | 
			
		||||
	struct rds_tcp_connection *tc = cp->cp_transport_data;
 | 
			
		||||
| 
						 | 
				
			
			@ -108,18 +110,36 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 | 
			
		|||
		mutex_unlock(&tc->t_conn_path_lock);
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
	ret = sock_create_kern(rds_conn_net(conn), PF_INET,
 | 
			
		||||
			       SOCK_STREAM, IPPROTO_TCP, &sock);
 | 
			
		||||
	if (ipv6_addr_v4mapped(&conn->c_laddr)) {
 | 
			
		||||
		ret = sock_create_kern(rds_conn_net(conn), PF_INET,
 | 
			
		||||
				       SOCK_STREAM, IPPROTO_TCP, &sock);
 | 
			
		||||
		isv6 = false;
 | 
			
		||||
	} else {
 | 
			
		||||
		ret = sock_create_kern(rds_conn_net(conn), PF_INET6,
 | 
			
		||||
				       SOCK_STREAM, IPPROTO_TCP, &sock);
 | 
			
		||||
		isv6 = true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (ret < 0)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	rds_tcp_tune(sock);
 | 
			
		||||
 | 
			
		||||
	sin.sin_family = AF_INET;
 | 
			
		||||
	sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
 | 
			
		||||
	sin.sin_port = 0;
 | 
			
		||||
	addr = (struct sockaddr *)&sin;
 | 
			
		||||
	addrlen = sizeof(sin);
 | 
			
		||||
	if (isv6) {
 | 
			
		||||
		sin6.sin6_family = AF_INET6;
 | 
			
		||||
		sin6.sin6_addr = conn->c_laddr;
 | 
			
		||||
		sin6.sin6_port = 0;
 | 
			
		||||
		sin6.sin6_flowinfo = 0;
 | 
			
		||||
		sin6.sin6_scope_id = conn->c_dev_if;
 | 
			
		||||
		addr = (struct sockaddr *)&sin6;
 | 
			
		||||
		addrlen = sizeof(sin6);
 | 
			
		||||
	} else {
 | 
			
		||||
		sin.sin_family = AF_INET;
 | 
			
		||||
		sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
 | 
			
		||||
		sin.sin_port = 0;
 | 
			
		||||
		addr = (struct sockaddr *)&sin;
 | 
			
		||||
		addrlen = sizeof(sin);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = sock->ops->bind(sock, addr, addrlen);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
| 
						 | 
				
			
			@ -128,11 +148,21 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 | 
			
		|||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sin.sin_family = AF_INET;
 | 
			
		||||
	sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
 | 
			
		||||
	sin.sin_port = htons(RDS_TCP_PORT);
 | 
			
		||||
	addr = (struct sockaddr *)&sin;
 | 
			
		||||
	addrlen = sizeof(sin);
 | 
			
		||||
	if (isv6) {
 | 
			
		||||
		sin6.sin6_family = AF_INET6;
 | 
			
		||||
		sin6.sin6_addr = conn->c_faddr;
 | 
			
		||||
		sin6.sin6_port = htons(RDS_TCP_PORT);
 | 
			
		||||
		sin6.sin6_flowinfo = 0;
 | 
			
		||||
		sin6.sin6_scope_id = conn->c_dev_if;
 | 
			
		||||
		addr = (struct sockaddr *)&sin6;
 | 
			
		||||
		addrlen = sizeof(sin6);
 | 
			
		||||
	} else {
 | 
			
		||||
		sin.sin_family = AF_INET;
 | 
			
		||||
		sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
 | 
			
		||||
		sin.sin_port = htons(RDS_TCP_PORT);
 | 
			
		||||
		addr = (struct sockaddr *)&sin;
 | 
			
		||||
		addrlen = sizeof(sin);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * once we call connect() we can start getting callbacks and they
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -131,6 +131,8 @@ int rds_tcp_accept_one(struct socket *sock)
 | 
			
		|||
	struct rds_tcp_connection *rs_tcp = NULL;
 | 
			
		||||
	int conn_state;
 | 
			
		||||
	struct rds_conn_path *cp;
 | 
			
		||||
	struct in6_addr *my_addr, *peer_addr;
 | 
			
		||||
	int dev_if;
 | 
			
		||||
 | 
			
		||||
	if (!sock) /* module unload or netns delete in progress */
 | 
			
		||||
		return -ENETUNREACH;
 | 
			
		||||
| 
						 | 
				
			
			@ -163,15 +165,29 @@ int rds_tcp_accept_one(struct socket *sock)
 | 
			
		|||
 | 
			
		||||
	inet = inet_sk(new_sock->sk);
 | 
			
		||||
 | 
			
		||||
	my_addr = &new_sock->sk->sk_v6_rcv_saddr;
 | 
			
		||||
	peer_addr = &new_sock->sk->sk_v6_daddr;
 | 
			
		||||
	rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n",
 | 
			
		||||
		 &new_sock->sk->sk_v6_rcv_saddr, ntohs(inet->inet_sport),
 | 
			
		||||
		 &new_sock->sk->sk_v6_daddr, ntohs(inet->inet_dport));
 | 
			
		||||
		 my_addr, ntohs(inet->inet_sport),
 | 
			
		||||
		 peer_addr, ntohs(inet->inet_dport));
 | 
			
		||||
 | 
			
		||||
	/* sk_bound_dev_if is not set if the peer address is not link local
 | 
			
		||||
	 * address.  In this case, it happens that mcast_oif is set.  So
 | 
			
		||||
	 * just use it.
 | 
			
		||||
	 */
 | 
			
		||||
	if ((ipv6_addr_type(my_addr) & IPV6_ADDR_LINKLOCAL) &&
 | 
			
		||||
	    !(ipv6_addr_type(peer_addr) & IPV6_ADDR_LINKLOCAL)) {
 | 
			
		||||
		struct ipv6_pinfo *inet6;
 | 
			
		||||
 | 
			
		||||
		inet6 = inet6_sk(new_sock->sk);
 | 
			
		||||
		dev_if = inet6->mcast_oif;
 | 
			
		||||
	} else {
 | 
			
		||||
		dev_if = new_sock->sk->sk_bound_dev_if;
 | 
			
		||||
	}
 | 
			
		||||
	conn = rds_conn_create(sock_net(sock->sk),
 | 
			
		||||
			       &new_sock->sk->sk_v6_rcv_saddr,
 | 
			
		||||
			       &new_sock->sk->sk_v6_daddr,
 | 
			
		||||
			       &rds_tcp_transport, GFP_KERNEL,
 | 
			
		||||
			       new_sock->sk->sk_bound_dev_if);
 | 
			
		||||
			       &rds_tcp_transport, GFP_KERNEL, dev_if);
 | 
			
		||||
 | 
			
		||||
	if (IS_ERR(conn)) {
 | 
			
		||||
		ret = PTR_ERR(conn);
 | 
			
		||||
| 
						 | 
				
			
			@ -256,15 +272,22 @@ void rds_tcp_listen_data_ready(struct sock *sk)
 | 
			
		|||
		ready(sk);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct socket *rds_tcp_listen_init(struct net *net)
 | 
			
		||||
struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
 | 
			
		||||
{
 | 
			
		||||
	struct sockaddr_in sin;
 | 
			
		||||
	struct socket *sock = NULL;
 | 
			
		||||
	struct sockaddr_storage ss;
 | 
			
		||||
	struct sockaddr_in6 *sin6;
 | 
			
		||||
	struct sockaddr_in *sin;
 | 
			
		||||
	int addr_len;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
 | 
			
		||||
	if (ret < 0)
 | 
			
		||||
	ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM,
 | 
			
		||||
			       IPPROTO_TCP, &sock);
 | 
			
		||||
	if (ret < 0) {
 | 
			
		||||
		rdsdebug("could not create %s listener socket: %d\n",
 | 
			
		||||
			 isv6 ? "IPv6" : "IPv4", ret);
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sock->sk->sk_reuse = SK_CAN_REUSE;
 | 
			
		||||
	rds_tcp_nonagle(sock);
 | 
			
		||||
| 
						 | 
				
			
			@ -274,13 +297,28 @@ struct socket *rds_tcp_listen_init(struct net *net)
 | 
			
		|||
	sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
 | 
			
		||||
	write_unlock_bh(&sock->sk->sk_callback_lock);
 | 
			
		||||
 | 
			
		||||
	sin.sin_family = PF_INET;
 | 
			
		||||
	sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
 | 
			
		||||
	sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
 | 
			
		||||
	if (isv6) {
 | 
			
		||||
		sin6 = (struct sockaddr_in6 *)&ss;
 | 
			
		||||
		sin6->sin6_family = PF_INET6;
 | 
			
		||||
		sin6->sin6_addr = in6addr_any;
 | 
			
		||||
		sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
 | 
			
		||||
		sin6->sin6_scope_id = 0;
 | 
			
		||||
		sin6->sin6_flowinfo = 0;
 | 
			
		||||
		addr_len = sizeof(*sin6);
 | 
			
		||||
	} else {
 | 
			
		||||
		sin = (struct sockaddr_in *)&ss;
 | 
			
		||||
		sin->sin_family = PF_INET;
 | 
			
		||||
		sin->sin_addr.s_addr = INADDR_ANY;
 | 
			
		||||
		sin->sin_port = (__force u16)htons(RDS_TCP_PORT);
 | 
			
		||||
		addr_len = sizeof(*sin);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
 | 
			
		||||
	if (ret < 0)
 | 
			
		||||
	ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
 | 
			
		||||
	if (ret < 0) {
 | 
			
		||||
		rdsdebug("could not bind %s listener socket: %d\n",
 | 
			
		||||
			 isv6 ? "IPv6" : "IPv4", ret);
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = sock->ops->listen(sock, 64);
 | 
			
		||||
	if (ret < 0)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue