forked from mirrors/linux
		
	netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test
Linkui Xiao reported that there's a race condition when ipset swap and destroy is
called, which can lead to crash in add/del/test element operations. Swap then
destroy are usual operations to replace a set with another one in a production
system. The issue can in some cases be reproduced with the script:
ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576
ipset add hash_ip1 172.20.0.0/16
ipset add hash_ip1 192.168.0.0/16
iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT
while [ 1 ]
do
	# ... Ongoing traffic...
        ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576
        ipset add hash_ip2 172.20.0.0/16
        ipset swap hash_ip1 hash_ip2
        ipset destroy hash_ip2
        sleep 0.05
done
In the race case the possible order of the operations are
	CPU0			CPU1
	ip_set_test
				ipset swap hash_ip1 hash_ip2
				ipset destroy hash_ip2
	hash_net_kadt
Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which
is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy
removed it, hash_net_kadt crashes.
The fix is to force ip_set_swap() to wait for all readers to finish accessing the
old set pointers by calling synchronize_rcu().
The first version of the patch was written by Linkui Xiao <xiaolinkui@kylinos.cn>.
v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden
    ip_set_destroy() unnecessarily when all sets are destroyed.
v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held
    and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair.
    So there's no need to extend the rcu read locked area in ipset itself.
Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/
Reported by: Linkui Xiao <xiaolinkui@kylinos.cn>
Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
			
			
This commit is contained in:
		
							parent
							
								
									a7d5a955bf
								
							
						
					
					
						commit
						28628fa952
					
				
					 1 changed files with 7 additions and 7 deletions
				
			
		|  | @ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); | |||
| 	ip_set_dereference((inst)->ip_set_list)[id] | ||||
| #define ip_set_ref_netlink(inst,id)	\ | ||||
| 	rcu_dereference_raw((inst)->ip_set_list)[id] | ||||
| #define ip_set_dereference_nfnl(p)	\ | ||||
| 	rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) | ||||
| 
 | ||||
| /* The set types are implemented in modules and registered set types
 | ||||
|  * can be found in ip_set_type_list. Adding/deleting types is | ||||
|  | @ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set) | |||
| static struct ip_set * | ||||
| ip_set_rcu_get(struct net *net, ip_set_id_t index) | ||||
| { | ||||
| 	struct ip_set *set; | ||||
| 	struct ip_set_net *inst = ip_set_pernet(net); | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	/* ip_set_list itself needs to be protected */ | ||||
| 	set = rcu_dereference(inst->ip_set_list)[index]; | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	return set; | ||||
| 	/* ip_set_list and the set pointer need to be protected */ | ||||
| 	return ip_set_dereference_nfnl(inst->ip_set_list)[index]; | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
|  | @ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, | |||
| 	ip_set(inst, to_id) = from; | ||||
| 	write_unlock_bh(&ip_set_ref_lock); | ||||
| 
 | ||||
| 	/* Make sure all readers of the old set pointers are completed. */ | ||||
| 	synchronize_rcu(); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jozsef Kadlecsik
						Jozsef Kadlecsik