forked from mirrors/linux
		
	net: dont hold rtnl mutex during netlink dump callbacks
Four years ago, Patrick made a change to hold rtnl mutex during netlink dump callbacks. I believe it was a wrong move. This slows down concurrent dumps, making good old /proc/net/ files faster than rtnetlink in some situations. This occurred to me because one "ip link show dev ..." was _very_ slow on a workload adding/removing network devices in background. All dump callbacks are able to use RCU locking now, so this patch does roughly a revert of commits :1c2d670f36: [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks6313c1e099: [RTNETLINK]: Remove unnecessary locking in dump callbacks This let writers fight for rtnl mutex and readers going full speed. It also takes care of phonet : phonet_route_get() is now called from rcu read section. I renamed it to phonet_route_get_rcu() Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Patrick McHardy <kaber@trash.net> Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com> Acked-by: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									dcfd9cdc12
								
							
						
					
					
						commit
						e67f88dd12
					
				
					 8 changed files with 25 additions and 23 deletions
				
			
		|  | @ -51,7 +51,7 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr); | ||||||
| int phonet_route_add(struct net_device *dev, u8 daddr); | int phonet_route_add(struct net_device *dev, u8 daddr); | ||||||
| int phonet_route_del(struct net_device *dev, u8 daddr); | int phonet_route_del(struct net_device *dev, u8 daddr); | ||||||
| void rtm_phonet_notify(int event, struct net_device *dev, u8 dst); | void rtm_phonet_notify(int event, struct net_device *dev, u8 dst); | ||||||
| struct net_device *phonet_route_get(struct net *net, u8 daddr); | struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr); | ||||||
| struct net_device *phonet_route_output(struct net *net, u8 daddr); | struct net_device *phonet_route_output(struct net *net, u8 daddr); | ||||||
| 
 | 
 | ||||||
| #define PN_NO_ADDR	0xff | #define PN_NO_ADDR	0xff | ||||||
|  |  | ||||||
|  | @ -120,8 +120,9 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 	int idx; | 	int idx; | ||||||
| 
 | 
 | ||||||
| 	idx = 0; | 	idx = 0; | ||||||
| 	for_each_netdev(net, dev) { | 	rcu_read_lock(); | ||||||
| 		struct net_bridge_port *port = br_port_get_rtnl(dev); | 	for_each_netdev_rcu(net, dev) { | ||||||
|  | 		struct net_bridge_port *port = br_port_get_rcu(dev); | ||||||
| 
 | 
 | ||||||
| 		/* not a bridge port */ | 		/* not a bridge port */ | ||||||
| 		if (!port || idx < cb->args[0]) | 		if (!port || idx < cb->args[0]) | ||||||
|  | @ -135,7 +136,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| skip: | skip: | ||||||
| 		++idx; | 		++idx; | ||||||
| 	} | 	} | ||||||
| 
 | 	rcu_read_unlock(); | ||||||
| 	cb->args[0] = idx; | 	cb->args[0] = idx; | ||||||
| 
 | 
 | ||||||
| 	return skb->len; | 	return skb->len; | ||||||
|  |  | ||||||
|  | @ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, | ||||||
| 	int idx = 0; | 	int idx = 0; | ||||||
| 	struct fib_rule *rule; | 	struct fib_rule *rule; | ||||||
| 
 | 
 | ||||||
| 	list_for_each_entry(rule, &ops->rules_list, list) { | 	rcu_read_lock(); | ||||||
|  | 	list_for_each_entry_rcu(rule, &ops->rules_list, list) { | ||||||
| 		if (idx < cb->args[1]) | 		if (idx < cb->args[1]) | ||||||
| 			goto skip; | 			goto skip; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1007,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 	s_h = cb->args[0]; | 	s_h = cb->args[0]; | ||||||
| 	s_idx = cb->args[1]; | 	s_idx = cb->args[1]; | ||||||
| 
 | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
| 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { | 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { | ||||||
| 		idx = 0; | 		idx = 0; | ||||||
| 		head = &net->dev_index_head[h]; | 		head = &net->dev_index_head[h]; | ||||||
| 		hlist_for_each_entry(dev, node, head, index_hlist) { | 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) { | ||||||
| 			if (idx < s_idx) | 			if (idx < s_idx) | ||||||
| 				goto cont; | 				goto cont; | ||||||
| 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, | 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, | ||||||
|  | @ -1023,6 +1024,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| out: | out: | ||||||
|  | 	rcu_read_unlock(); | ||||||
| 	cb->args[1] = idx; | 	cb->args[1] = idx; | ||||||
| 	cb->args[0] = h; | 	cb->args[0] = h; | ||||||
| 
 | 
 | ||||||
|  | @ -1879,7 +1881,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | ||||||
| 	int min_len; | 	int min_len; | ||||||
| 	int family; | 	int family; | ||||||
| 	int type; | 	int type; | ||||||
| 	int err; |  | ||||||
| 
 | 
 | ||||||
| 	type = nlh->nlmsg_type; | 	type = nlh->nlmsg_type; | ||||||
| 	if (type > RTM_MAX) | 	if (type > RTM_MAX) | ||||||
|  | @ -1906,11 +1907,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | ||||||
| 		if (dumpit == NULL) | 		if (dumpit == NULL) | ||||||
| 			return -EOPNOTSUPP; | 			return -EOPNOTSUPP; | ||||||
| 
 | 
 | ||||||
| 		__rtnl_unlock(); |  | ||||||
| 		rtnl = net->rtnl; | 		rtnl = net->rtnl; | ||||||
| 		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); | 		return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); | ||||||
| 		rtnl_lock(); |  | ||||||
| 		return err; |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); | 	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); | ||||||
|  | @ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net) | ||||||
| { | { | ||||||
| 	struct sock *sk; | 	struct sock *sk; | ||||||
| 	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, | 	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, | ||||||
| 				   rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); | 				   rtnetlink_rcv, NULL, THIS_MODULE); | ||||||
| 	if (!sk) | 	if (!sk) | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 	net->rtnl = sk; | 	net->rtnl = sk; | ||||||
|  |  | ||||||
|  | @ -752,7 +752,8 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 	skip_naddr = cb->args[1]; | 	skip_naddr = cb->args[1]; | ||||||
| 
 | 
 | ||||||
| 	idx = 0; | 	idx = 0; | ||||||
| 	for_each_netdev(&init_net, dev) { | 	rcu_read_lock(); | ||||||
|  | 	for_each_netdev_rcu(&init_net, dev) { | ||||||
| 		if (idx < skip_ndevs) | 		if (idx < skip_ndevs) | ||||||
| 			goto cont; | 			goto cont; | ||||||
| 		else if (idx > skip_ndevs) { | 		else if (idx > skip_ndevs) { | ||||||
|  | @ -761,11 +762,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 			skip_naddr = 0; | 			skip_naddr = 0; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) | 		if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL) | ||||||
| 			goto cont; | 			goto cont; | ||||||
| 
 | 
 | ||||||
| 		for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa; | 		for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa; | ||||||
| 		     ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) { | 		     ifa = rcu_dereference(ifa->ifa_next), dn_idx++) { | ||||||
| 			if (dn_idx < skip_naddr) | 			if (dn_idx < skip_naddr) | ||||||
| 				continue; | 				continue; | ||||||
| 
 | 
 | ||||||
|  | @ -778,6 +779,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 		idx++; | 		idx++; | ||||||
| 	} | 	} | ||||||
| done: | done: | ||||||
|  | 	rcu_read_unlock(); | ||||||
| 	cb->args[0] = idx; | 	cb->args[0] = idx; | ||||||
| 	cb->args[1] = dn_idx; | 	cb->args[1] = dn_idx; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -394,10 +394,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 	arg.net = net; | 	arg.net = net; | ||||||
| 	w->args = &arg; | 	w->args = &arg; | ||||||
| 
 | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
| 	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { | 	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { | ||||||
| 		e = 0; | 		e = 0; | ||||||
| 		head = &net->ipv6.fib_table_hash[h]; | 		head = &net->ipv6.fib_table_hash[h]; | ||||||
| 		hlist_for_each_entry(tb, node, head, tb6_hlist) { | 		hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { | ||||||
| 			if (e < s_e) | 			if (e < s_e) | ||||||
| 				goto next; | 				goto next; | ||||||
| 			res = fib6_dump_table(tb, skb, cb); | 			res = fib6_dump_table(tb, skb, cb); | ||||||
|  | @ -408,6 +409,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| out: | out: | ||||||
|  | 	rcu_read_unlock(); | ||||||
| 	cb->args[1] = e; | 	cb->args[1] = e; | ||||||
| 	cb->args[0] = h; | 	cb->args[0] = h; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -426,18 +426,14 @@ int phonet_route_del(struct net_device *dev, u8 daddr) | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| struct net_device *phonet_route_get(struct net *net, u8 daddr) | struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr) | ||||||
| { | { | ||||||
| 	struct phonet_net *pnn = phonet_pernet(net); | 	struct phonet_net *pnn = phonet_pernet(net); | ||||||
| 	struct phonet_routes *routes = &pnn->routes; | 	struct phonet_routes *routes = &pnn->routes; | ||||||
| 	struct net_device *dev; | 	struct net_device *dev; | ||||||
| 
 | 
 | ||||||
| 	ASSERT_RTNL(); /* no need to hold the device */ |  | ||||||
| 
 |  | ||||||
| 	daddr >>= 2; | 	daddr >>= 2; | ||||||
| 	rcu_read_lock(); |  | ||||||
| 	dev = rcu_dereference(routes->table[daddr]); | 	dev = rcu_dereference(routes->table[daddr]); | ||||||
| 	rcu_read_unlock(); |  | ||||||
| 	return dev; | 	return dev; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -264,10 +264,11 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 	struct net *net = sock_net(skb->sk); | 	struct net *net = sock_net(skb->sk); | ||||||
| 	u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; | 	u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; | ||||||
| 
 | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
| 	for (addr = 0; addr < 64; addr++) { | 	for (addr = 0; addr < 64; addr++) { | ||||||
| 		struct net_device *dev; | 		struct net_device *dev; | ||||||
| 
 | 
 | ||||||
| 		dev = phonet_route_get(net, addr << 2); | 		dev = phonet_route_get_rcu(net, addr << 2); | ||||||
| 		if (!dev) | 		if (!dev) | ||||||
| 			continue; | 			continue; | ||||||
| 
 | 
 | ||||||
|  | @ -279,6 +280,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| out: | out: | ||||||
|  | 	rcu_read_unlock(); | ||||||
| 	cb->args[0] = addr_idx; | 	cb->args[0] = addr_idx; | ||||||
| 	cb->args[1] = 0; | 	cb->args[1] = 0; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet