forked from mirrors/linux
		
	tipc: change socket buffer overflow control to respect sk_rcvbuf
As per feedback from the netdev community, we change the buffer overflow protection algorithm in receiving sockets so that it always respects the nominal upper limit set in sk_rcvbuf. Instead of scaling up from a small sk_rcvbuf value, which leads to violation of the configured sk_rcvbuf limit, we now calculate the weighted per-message limit by scaling down from a much bigger value, still in the same field, according to the importance priority of the received message. To allow for administrative tunability of the socket receive buffer size, we create a tipc_rmem sysctl variable to allow the user to configure an even bigger value via sysctl command. It is a size of three (min/default/max) to be consistent with things like tcp_rmem. By default, the value initialized in tipc_rmem[1] is equal to the receive socket size needed by a TIPC_CRITICAL_IMPORTANCE message. This value is also set as the default value of sk_rcvbuf. Originally-by: Jon Maloy <jon.maloy@ericsson.com> Cc: Neil Horman <nhorman@tuxdriver.com> Cc: Jon Maloy <jon.maloy@ericsson.com> [Ying: added sysctl variation to Jon's original patch] Signed-off-by: Ying Xue <ying.xue@windriver.com> [PG: don't compile sysctl.c if not config'd; add Documentation] Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									8941bbcd57
								
							
						
					
					
						commit
						cc79dd1ba9
					
				
					 7 changed files with 112 additions and 12 deletions
				
			
		|  | @ -26,7 +26,7 @@ Table : Subdirectories in /proc/sys/net | ||||||
|  ipv4      IP version 4        x25        X.25 protocol |  ipv4      IP version 4        x25        X.25 protocol | ||||||
|  ipx       IPX                 token-ring IBM token ring |  ipx       IPX                 token-ring IBM token ring | ||||||
|  bridge    Bridging            decnet     DEC net |  bridge    Bridging            decnet     DEC net | ||||||
|  ipv6      IP version 6 |  ipv6      IP version 6        tipc       TIPC | ||||||
| .............................................................................. | .............................................................................. | ||||||
| 
 | 
 | ||||||
| 1. /proc/sys/net/core - Network core options | 1. /proc/sys/net/core - Network core options | ||||||
|  | @ -207,3 +207,18 @@ IPX. | ||||||
| The /proc/net/ipx_route  table  holds  a list of IPX routes. For each route it | The /proc/net/ipx_route  table  holds  a list of IPX routes. For each route it | ||||||
| gives the  destination  network, the router node (or Directly) and the network | gives the  destination  network, the router node (or Directly) and the network | ||||||
| address of the router (or Connected) for internal networks. | address of the router (or Connected) for internal networks. | ||||||
|  | 
 | ||||||
|  | 6. TIPC | ||||||
|  | ------------------------------------------------------- | ||||||
|  | 
 | ||||||
|  | The TIPC protocol now has a tunable for the receive memory, similar to the | ||||||
|  | tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max) | ||||||
|  | 
 | ||||||
|  |     # cat /proc/sys/net/tipc/tipc_rmem | ||||||
|  |     4252725 34021800        68043600 | ||||||
|  |     # | ||||||
|  | 
 | ||||||
|  | The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values | ||||||
|  | are scaled (shifted) versions of that same value.  Note that the min value | ||||||
|  | is not at this point in time used in any meaningful way, but the triplet is | ||||||
|  | preserved in order to be consistent with things like tcp_rmem. | ||||||
|  |  | ||||||
|  | @ -11,3 +11,4 @@ tipc-y	+= addr.o bcast.o bearer.o config.o \ | ||||||
| 	   socket.o log.o eth_media.o | 	   socket.o log.o eth_media.o | ||||||
| 
 | 
 | ||||||
| tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o | tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o | ||||||
|  | tipc-$(CONFIG_SYSCTL)		+= sysctl.o | ||||||
|  |  | ||||||
|  | @ -39,6 +39,7 @@ | ||||||
| #include "name_table.h" | #include "name_table.h" | ||||||
| #include "subscr.h" | #include "subscr.h" | ||||||
| #include "config.h" | #include "config.h" | ||||||
|  | #include "port.h" | ||||||
| 
 | 
 | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| 
 | 
 | ||||||
|  | @ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly; | ||||||
| int tipc_max_ports __read_mostly; | int tipc_max_ports __read_mostly; | ||||||
| int tipc_net_id __read_mostly; | int tipc_net_id __read_mostly; | ||||||
| int tipc_remote_management __read_mostly; | int tipc_remote_management __read_mostly; | ||||||
| 
 | int sysctl_tipc_rmem[3] __read_mostly;	/* min/default/max */ | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * tipc_buf_acquire - creates a TIPC message buffer |  * tipc_buf_acquire - creates a TIPC message buffer | ||||||
|  | @ -118,6 +119,7 @@ static void tipc_core_stop(void) | ||||||
| 	tipc_nametbl_stop(); | 	tipc_nametbl_stop(); | ||||||
| 	tipc_ref_table_stop(); | 	tipc_ref_table_stop(); | ||||||
| 	tipc_socket_stop(); | 	tipc_socket_stop(); | ||||||
|  | 	tipc_unregister_sysctl(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  | @ -142,13 +144,14 @@ static int tipc_core_start(void) | ||||||
| 		res = tipc_netlink_start(); | 		res = tipc_netlink_start(); | ||||||
| 	if (!res) | 	if (!res) | ||||||
| 		res = tipc_socket_init(); | 		res = tipc_socket_init(); | ||||||
|  | 	if (!res) | ||||||
|  | 		res = tipc_register_sysctl(); | ||||||
| 	if (res) | 	if (res) | ||||||
| 		tipc_core_stop(); | 		tipc_core_stop(); | ||||||
| 
 | 
 | ||||||
| 	return res; | 	return res; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| static int __init tipc_init(void) | static int __init tipc_init(void) | ||||||
| { | { | ||||||
| 	int res; | 	int res; | ||||||
|  | @ -160,6 +163,11 @@ static int __init tipc_init(void) | ||||||
| 	tipc_max_ports = CONFIG_TIPC_PORTS; | 	tipc_max_ports = CONFIG_TIPC_PORTS; | ||||||
| 	tipc_net_id = 4711; | 	tipc_net_id = 4711; | ||||||
| 
 | 
 | ||||||
|  | 	sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE; | ||||||
|  | 	sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 << | ||||||
|  | 			      TIPC_CRITICAL_IMPORTANCE; | ||||||
|  | 	sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT; | ||||||
|  | 
 | ||||||
| 	res = tipc_core_start(); | 	res = tipc_core_start(); | ||||||
| 	if (res) | 	if (res) | ||||||
| 		pr_err("Unable to start in single node mode\n"); | 		pr_err("Unable to start in single node mode\n"); | ||||||
|  |  | ||||||
|  | @ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly; | ||||||
| extern int tipc_max_ports __read_mostly; | extern int tipc_max_ports __read_mostly; | ||||||
| extern int tipc_net_id __read_mostly; | extern int tipc_net_id __read_mostly; | ||||||
| extern int tipc_remote_management __read_mostly; | extern int tipc_remote_management __read_mostly; | ||||||
|  | extern int sysctl_tipc_rmem[3] __read_mostly; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Other global variables |  * Other global variables | ||||||
|  | @ -97,6 +98,14 @@ extern void tipc_netlink_stop(void); | ||||||
| extern int  tipc_socket_init(void); | extern int  tipc_socket_init(void); | ||||||
| extern void tipc_socket_stop(void); | extern void tipc_socket_stop(void); | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_SYSCTL | ||||||
|  | extern int tipc_register_sysctl(void); | ||||||
|  | extern void tipc_unregister_sysctl(void); | ||||||
|  | #else | ||||||
|  | #define tipc_register_sysctl() 0 | ||||||
|  | #define tipc_unregister_sysctl() | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * TIPC timer and signal code |  * TIPC timer and signal code | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  | @ -43,6 +43,8 @@ | ||||||
| #include "node_subscr.h" | #include "node_subscr.h" | ||||||
| 
 | 
 | ||||||
| #define TIPC_FLOW_CONTROL_WIN 512 | #define TIPC_FLOW_CONTROL_WIN 512 | ||||||
|  | #define CONN_OVERLOAD_LIMIT	((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ | ||||||
|  | 				SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) | ||||||
| 
 | 
 | ||||||
| typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, | typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, | ||||||
| 		struct sk_buff **buf, unsigned char const *data, | 		struct sk_buff **buf, unsigned char const *data, | ||||||
|  |  | ||||||
|  | @ -43,8 +43,6 @@ | ||||||
| #define SS_LISTENING	-1	/* socket is listening */ | #define SS_LISTENING	-1	/* socket is listening */ | ||||||
| #define SS_READY	-2	/* socket is connectionless */ | #define SS_READY	-2	/* socket is connectionless */ | ||||||
| 
 | 
 | ||||||
| #define CONN_OVERLOAD_LIMIT	((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ |  | ||||||
| 				SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) |  | ||||||
| #define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */ | #define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */ | ||||||
| 
 | 
 | ||||||
| struct tipc_sock { | struct tipc_sock { | ||||||
|  | @ -203,6 +201,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, | ||||||
| 
 | 
 | ||||||
| 	sock_init_data(sock, sk); | 	sock_init_data(sock, sk); | ||||||
| 	sk->sk_backlog_rcv = backlog_rcv; | 	sk->sk_backlog_rcv = backlog_rcv; | ||||||
|  | 	sk->sk_rcvbuf = sysctl_tipc_rmem[1]; | ||||||
| 	sk->sk_data_ready = tipc_data_ready; | 	sk->sk_data_ready = tipc_data_ready; | ||||||
| 	sk->sk_write_space = tipc_write_space; | 	sk->sk_write_space = tipc_write_space; | ||||||
| 	tipc_sk(sk)->p = tp_ptr; | 	tipc_sk(sk)->p = tp_ptr; | ||||||
|  | @ -1233,10 +1232,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) | ||||||
|  * For all connectionless messages, by default new queue limits are |  * For all connectionless messages, by default new queue limits are | ||||||
|  * as belows: |  * as belows: | ||||||
|  * |  * | ||||||
|  * TIPC_LOW_IMPORTANCE       (5MB) |  * TIPC_LOW_IMPORTANCE       (4 MB) | ||||||
|  * TIPC_MEDIUM_IMPORTANCE    (10MB) |  * TIPC_MEDIUM_IMPORTANCE    (8 MB) | ||||||
|  * TIPC_HIGH_IMPORTANCE      (20MB) |  * TIPC_HIGH_IMPORTANCE      (16 MB) | ||||||
|  * TIPC_CRITICAL_IMPORTANCE  (40MB) |  * TIPC_CRITICAL_IMPORTANCE  (32 MB) | ||||||
|  * |  * | ||||||
|  * Returns overload limit according to corresponding message importance |  * Returns overload limit according to corresponding message importance | ||||||
|  */ |  */ | ||||||
|  | @ -1246,9 +1245,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) | ||||||
| 	unsigned int limit; | 	unsigned int limit; | ||||||
| 
 | 
 | ||||||
| 	if (msg_connected(msg)) | 	if (msg_connected(msg)) | ||||||
| 		limit = CONN_OVERLOAD_LIMIT; | 		limit = sysctl_tipc_rmem[2]; | ||||||
| 	else | 	else | ||||||
| 		limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); | 		limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << | ||||||
|  | 			msg_importance(msg); | ||||||
| 	return limit; | 	return limit; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1847,7 +1847,8 @@ static const struct net_proto_family tipc_family_ops = { | ||||||
| static struct proto tipc_proto = { | static struct proto tipc_proto = { | ||||||
| 	.name		= "TIPC", | 	.name		= "TIPC", | ||||||
| 	.owner		= THIS_MODULE, | 	.owner		= THIS_MODULE, | ||||||
| 	.obj_size	= sizeof(struct tipc_sock) | 	.obj_size	= sizeof(struct tipc_sock), | ||||||
|  | 	.sysctl_rmem	= sysctl_tipc_rmem | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  |  | ||||||
							
								
								
									
										64
									
								
								net/tipc/sysctl.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								net/tipc/sysctl.c
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,64 @@ | ||||||
|  | /*
 | ||||||
|  |  * net/tipc/sysctl.c: sysctl interface to TIPC subsystem | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 2013, Wind River Systems | ||||||
|  |  * All rights reserved. | ||||||
|  |  * | ||||||
|  |  * Redistribution and use in source and binary forms, with or without | ||||||
|  |  * modification, are permitted provided that the following conditions are met: | ||||||
|  |  * | ||||||
|  |  * 1. Redistributions of source code must retain the above copyright | ||||||
|  |  *    notice, this list of conditions and the following disclaimer. | ||||||
|  |  * 2. Redistributions in binary form must reproduce the above copyright | ||||||
|  |  *    notice, this list of conditions and the following disclaimer in the | ||||||
|  |  *    documentation and/or other materials provided with the distribution. | ||||||
|  |  * 3. Neither the names of the copyright holders nor the names of its | ||||||
|  |  *    contributors may be used to endorse or promote products derived from | ||||||
|  |  *    this software without specific prior written permission. | ||||||
|  |  * | ||||||
|  |  * Alternatively, this software may be distributed under the terms of the | ||||||
|  |  * GNU General Public License ("GPL") version 2 as published by the Free | ||||||
|  |  * Software Foundation. | ||||||
|  |  * | ||||||
|  |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||||||
|  |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||||
|  |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||||
|  |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||||||
|  |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||||||
|  |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||||||
|  |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||||
|  |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||||||
|  |  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||||||
|  |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||||||
|  |  * POSSIBILITY OF SUCH DAMAGE. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include "core.h" | ||||||
|  | 
 | ||||||
|  | #include <linux/sysctl.h> | ||||||
|  | 
 | ||||||
|  | static struct ctl_table_header *tipc_ctl_hdr; | ||||||
|  | 
 | ||||||
|  | static struct ctl_table tipc_table[] = { | ||||||
|  | 	{ | ||||||
|  | 		.procname	= "tipc_rmem", | ||||||
|  | 		.data		= &sysctl_tipc_rmem, | ||||||
|  | 		.maxlen		= sizeof(sysctl_tipc_rmem), | ||||||
|  | 		.mode		= 0644, | ||||||
|  | 		.proc_handler	= proc_dointvec, | ||||||
|  | 	}, | ||||||
|  | 	{} | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | int tipc_register_sysctl(void) | ||||||
|  | { | ||||||
|  | 	tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table); | ||||||
|  | 	if (tipc_ctl_hdr == NULL) | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void tipc_unregister_sysctl(void) | ||||||
|  | { | ||||||
|  | 	unregister_net_sysctl_table(tipc_ctl_hdr); | ||||||
|  | } | ||||||
		Loading…
	
		Reference in a new issue
	
	 Ying Xue
						Ying Xue