forked from mirrors/linux
		
	pfcp_newlink() links the device to a list in dev_net(dev) instead
of net, where a udp tunnel socket is created.
Even when net is removed, the device stays alive on dev_net(dev).
Then, removing net triggers the splat below. [0]
In this example, pfcp0 is created in ns2, but the udp socket is
created in ns1.
  ip netns add ns1
  ip netns add ns2
  ip -n ns1 link add netns ns2 name pfcp0 type pfcp
  ip netns del ns1
Let's link the device to the socket's netns instead.
Now, pfcp_net_exit() needs another netdev iteration to remove
all pfcp devices in the netns.
pfcp_dev_list is not used under RCU, so the list API is converted
to the non-RCU variant.
pfcp_net_exit() can be converted to .exit_batch_rtnl() in net-next.
[0]:
ref_tracker: net notrefcnt@00000000128b34dc has 1/1 users at
     sk_alloc (./include/net/net_namespace.h:345 net/core/sock.c:2236)
     inet_create (net/ipv4/af_inet.c:326 net/ipv4/af_inet.c:252)
     __sock_create (net/socket.c:1558)
     udp_sock_create4 (net/ipv4/udp_tunnel_core.c:18)
     pfcp_create_sock (drivers/net/pfcp.c:168)
     pfcp_newlink (drivers/net/pfcp.c:182 drivers/net/pfcp.c:197)
     rtnl_newlink (net/core/rtnetlink.c:3786 net/core/rtnetlink.c:3897 net/core/rtnetlink.c:4012)
     rtnetlink_rcv_msg (net/core/rtnetlink.c:6922)
     netlink_rcv_skb (net/netlink/af_netlink.c:2542)
     netlink_unicast (net/netlink/af_netlink.c:1321 net/netlink/af_netlink.c:1347)
     netlink_sendmsg (net/netlink/af_netlink.c:1891)
     ____sys_sendmsg (net/socket.c:711 net/socket.c:726 net/socket.c:2583)
     ___sys_sendmsg (net/socket.c:2639)
     __sys_sendmsg (net/socket.c:2669)
     do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83)
     entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
WARNING: CPU: 1 PID: 11 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179)
Modules linked in:
CPU: 1 UID: 0 PID: 11 Comm: kworker/u16:0 Not tainted 6.13.0-rc5-00147-g4c1224501e9d #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Workqueue: netns cleanup_net
RIP: 0010:ref_tracker_dir_exit (lib/ref_tracker.c:179)
Code: 00 00 00 fc ff df 4d 8b 26 49 bd 00 01 00 00 00 00 ad de 4c 39 f5 0f 85 df 00 00 00 48 8b 74 24 08 48 89 df e8 a5 cc 12 02 90 <0f> 0b 90 48 8d 6b 44 be 04 00 00 00 48 89 ef e8 80 de 67 ff 48 89
RSP: 0018:ff11000007f3fb60 EFLAGS: 00010286
RAX: 00000000000020ef RBX: ff1100000d6481e0 RCX: 1ffffffff0e40d82
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8423ee3c
RBP: ff1100000d648230 R08: 0000000000000001 R09: fffffbfff0e395af
R10: 0000000000000001 R11: 0000000000000000 R12: ff1100000d648230
R13: dead000000000100 R14: ff1100000d648230 R15: dffffc0000000000
FS:  0000000000000000(0000) GS:ff1100006ce80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005620e1363990 CR3: 000000000eeb2002 CR4: 0000000000771ef0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 <TASK>
 ? __warn (kernel/panic.c:748)
 ? ref_tracker_dir_exit (lib/ref_tracker.c:179)
 ? report_bug (lib/bug.c:201 lib/bug.c:219)
 ? handle_bug (arch/x86/kernel/traps.c:285)
 ? exc_invalid_op (arch/x86/kernel/traps.c:309 (discriminator 1))
 ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621)
 ? _raw_spin_unlock_irqrestore (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
 ? ref_tracker_dir_exit (lib/ref_tracker.c:179)
 ? __pfx_ref_tracker_dir_exit (lib/ref_tracker.c:158)
 ? kfree (mm/slub.c:4613 mm/slub.c:4761)
 net_free (net/core/net_namespace.c:476 net/core/net_namespace.c:467)
 cleanup_net (net/core/net_namespace.c:664 (discriminator 3))
 process_one_work (kernel/workqueue.c:3229)
 worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391)
 kthread (kernel/kthread.c:389)
 ret_from_fork (arch/x86/kernel/process.c:147)
 ret_from_fork_asm (arch/x86/entry/entry_64.S:257)
  </TASK>
Fixes: 76c8764ef3 ("pfcp: add PFCP module")
Reported-by: Xiao Liang <shaw.leon@gmail.com>
Closes: https://lore.kernel.org/netdev/20250104125732.17335-1-shaw.leon@gmail.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
		
	
			
		
			
				
	
	
		
			306 lines
		
	
	
	
		
			6.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			306 lines
		
	
	
	
		
			6.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0-or-later
 | 
						|
/*
 | 
						|
 * PFCP according to 3GPP TS 29.244
 | 
						|
 *
 | 
						|
 * Copyright (C) 2022, Intel Corporation.
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/netdevice.h>
 | 
						|
#include <linux/rculist.h>
 | 
						|
#include <linux/skbuff.h>
 | 
						|
#include <linux/types.h>
 | 
						|
 | 
						|
#include <net/udp.h>
 | 
						|
#include <net/udp_tunnel.h>
 | 
						|
#include <net/pfcp.h>
 | 
						|
 | 
						|
struct pfcp_dev {
 | 
						|
	struct list_head	list;
 | 
						|
 | 
						|
	struct socket		*sock;
 | 
						|
	struct net_device	*dev;
 | 
						|
	struct net		*net;
 | 
						|
 | 
						|
	struct gro_cells	gro_cells;
 | 
						|
};
 | 
						|
 | 
						|
static unsigned int pfcp_net_id __read_mostly;
 | 
						|
 | 
						|
struct pfcp_net {
 | 
						|
	struct list_head	pfcp_dev_list;
 | 
						|
};
 | 
						|
 | 
						|
static void
 | 
						|
pfcp_session_recv(struct pfcp_dev *pfcp, struct sk_buff *skb,
 | 
						|
		  struct pfcp_metadata *md)
 | 
						|
{
 | 
						|
	struct pfcphdr_session *unparsed = pfcp_hdr_session(skb);
 | 
						|
 | 
						|
	md->seid = unparsed->seid;
 | 
						|
	md->type = PFCP_TYPE_SESSION;
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
pfcp_node_recv(struct pfcp_dev *pfcp, struct sk_buff *skb,
 | 
						|
	       struct pfcp_metadata *md)
 | 
						|
{
 | 
						|
	md->type = PFCP_TYPE_NODE;
 | 
						|
}
 | 
						|
 | 
						|
static int pfcp_encap_recv(struct sock *sk, struct sk_buff *skb)
 | 
						|
{
 | 
						|
	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
 | 
						|
	struct metadata_dst *tun_dst;
 | 
						|
	struct pfcp_metadata *md;
 | 
						|
	struct pfcphdr *unparsed;
 | 
						|
	struct pfcp_dev *pfcp;
 | 
						|
 | 
						|
	if (unlikely(!pskb_may_pull(skb, PFCP_HLEN)))
 | 
						|
		goto drop;
 | 
						|
 | 
						|
	pfcp = rcu_dereference_sk_user_data(sk);
 | 
						|
	if (unlikely(!pfcp))
 | 
						|
		goto drop;
 | 
						|
 | 
						|
	unparsed = pfcp_hdr(skb);
 | 
						|
 | 
						|
	ip_tunnel_flags_zero(flags);
 | 
						|
	tun_dst = udp_tun_rx_dst(skb, sk->sk_family, flags, 0,
 | 
						|
				 sizeof(*md));
 | 
						|
	if (unlikely(!tun_dst))
 | 
						|
		goto drop;
 | 
						|
 | 
						|
	md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
 | 
						|
	if (unlikely(!md))
 | 
						|
		goto drop;
 | 
						|
 | 
						|
	if (unparsed->flags & PFCP_SEID_FLAG)
 | 
						|
		pfcp_session_recv(pfcp, skb, md);
 | 
						|
	else
 | 
						|
		pfcp_node_recv(pfcp, skb, md);
 | 
						|
 | 
						|
	__set_bit(IP_TUNNEL_PFCP_OPT_BIT, tun_dst->u.tun_info.key.tun_flags);
 | 
						|
	tun_dst->u.tun_info.options_len = sizeof(*md);
 | 
						|
 | 
						|
	if (unlikely(iptunnel_pull_header(skb, PFCP_HLEN, skb->protocol,
 | 
						|
					  !net_eq(sock_net(sk),
 | 
						|
					  dev_net(pfcp->dev)))))
 | 
						|
		goto drop;
 | 
						|
 | 
						|
	skb_dst_set(skb, (struct dst_entry *)tun_dst);
 | 
						|
 | 
						|
	skb_reset_network_header(skb);
 | 
						|
	skb_reset_mac_header(skb);
 | 
						|
	skb->dev = pfcp->dev;
 | 
						|
 | 
						|
	gro_cells_receive(&pfcp->gro_cells, skb);
 | 
						|
 | 
						|
	return 0;
 | 
						|
drop:
 | 
						|
	kfree_skb(skb);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static void pfcp_del_sock(struct pfcp_dev *pfcp)
 | 
						|
{
 | 
						|
	udp_tunnel_sock_release(pfcp->sock);
 | 
						|
	pfcp->sock = NULL;
 | 
						|
}
 | 
						|
 | 
						|
static void pfcp_dev_uninit(struct net_device *dev)
 | 
						|
{
 | 
						|
	struct pfcp_dev *pfcp = netdev_priv(dev);
 | 
						|
 | 
						|
	gro_cells_destroy(&pfcp->gro_cells);
 | 
						|
	pfcp_del_sock(pfcp);
 | 
						|
}
 | 
						|
 | 
						|
static int pfcp_dev_init(struct net_device *dev)
 | 
						|
{
 | 
						|
	struct pfcp_dev *pfcp = netdev_priv(dev);
 | 
						|
 | 
						|
	pfcp->dev = dev;
 | 
						|
 | 
						|
	return gro_cells_init(&pfcp->gro_cells, dev);
 | 
						|
}
 | 
						|
 | 
						|
static const struct net_device_ops pfcp_netdev_ops = {
 | 
						|
	.ndo_init		= pfcp_dev_init,
 | 
						|
	.ndo_uninit		= pfcp_dev_uninit,
 | 
						|
	.ndo_get_stats64	= dev_get_tstats64,
 | 
						|
};
 | 
						|
 | 
						|
static const struct device_type pfcp_type = {
 | 
						|
	.name = "pfcp",
 | 
						|
};
 | 
						|
 | 
						|
static void pfcp_link_setup(struct net_device *dev)
 | 
						|
{
 | 
						|
	dev->netdev_ops = &pfcp_netdev_ops;
 | 
						|
	dev->needs_free_netdev = true;
 | 
						|
	SET_NETDEV_DEVTYPE(dev, &pfcp_type);
 | 
						|
 | 
						|
	dev->hard_header_len = 0;
 | 
						|
	dev->addr_len = 0;
 | 
						|
 | 
						|
	dev->type = ARPHRD_NONE;
 | 
						|
	dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
 | 
						|
	dev->priv_flags |= IFF_NO_QUEUE;
 | 
						|
 | 
						|
	netif_keep_dst(dev);
 | 
						|
}
 | 
						|
 | 
						|
static struct socket *pfcp_create_sock(struct pfcp_dev *pfcp)
 | 
						|
{
 | 
						|
	struct udp_tunnel_sock_cfg tuncfg = {};
 | 
						|
	struct udp_port_cfg udp_conf = {
 | 
						|
		.local_ip.s_addr	= htonl(INADDR_ANY),
 | 
						|
		.family			= AF_INET,
 | 
						|
	};
 | 
						|
	struct net *net = pfcp->net;
 | 
						|
	struct socket *sock;
 | 
						|
	int err;
 | 
						|
 | 
						|
	udp_conf.local_udp_port = htons(PFCP_PORT);
 | 
						|
 | 
						|
	err = udp_sock_create(net, &udp_conf, &sock);
 | 
						|
	if (err)
 | 
						|
		return ERR_PTR(err);
 | 
						|
 | 
						|
	tuncfg.sk_user_data = pfcp;
 | 
						|
	tuncfg.encap_rcv = pfcp_encap_recv;
 | 
						|
	tuncfg.encap_type = 1;
 | 
						|
 | 
						|
	setup_udp_tunnel_sock(net, sock, &tuncfg);
 | 
						|
 | 
						|
	return sock;
 | 
						|
}
 | 
						|
 | 
						|
static int pfcp_add_sock(struct pfcp_dev *pfcp)
 | 
						|
{
 | 
						|
	pfcp->sock = pfcp_create_sock(pfcp);
 | 
						|
 | 
						|
	return PTR_ERR_OR_ZERO(pfcp->sock);
 | 
						|
}
 | 
						|
 | 
						|
static int pfcp_newlink(struct net *net, struct net_device *dev,
 | 
						|
			struct nlattr *tb[], struct nlattr *data[],
 | 
						|
			struct netlink_ext_ack *extack)
 | 
						|
{
 | 
						|
	struct pfcp_dev *pfcp = netdev_priv(dev);
 | 
						|
	struct pfcp_net *pn;
 | 
						|
	int err;
 | 
						|
 | 
						|
	pfcp->net = net;
 | 
						|
 | 
						|
	err = pfcp_add_sock(pfcp);
 | 
						|
	if (err) {
 | 
						|
		netdev_dbg(dev, "failed to add pfcp socket %d\n", err);
 | 
						|
		goto exit_err;
 | 
						|
	}
 | 
						|
 | 
						|
	err = register_netdevice(dev);
 | 
						|
	if (err) {
 | 
						|
		netdev_dbg(dev, "failed to register pfcp netdev %d\n", err);
 | 
						|
		goto exit_del_pfcp_sock;
 | 
						|
	}
 | 
						|
 | 
						|
	pn = net_generic(net, pfcp_net_id);
 | 
						|
	list_add(&pfcp->list, &pn->pfcp_dev_list);
 | 
						|
 | 
						|
	netdev_dbg(dev, "registered new PFCP interface\n");
 | 
						|
 | 
						|
	return 0;
 | 
						|
 | 
						|
exit_del_pfcp_sock:
 | 
						|
	pfcp_del_sock(pfcp);
 | 
						|
exit_err:
 | 
						|
	pfcp->net = NULL;
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
static void pfcp_dellink(struct net_device *dev, struct list_head *head)
 | 
						|
{
 | 
						|
	struct pfcp_dev *pfcp = netdev_priv(dev);
 | 
						|
 | 
						|
	list_del(&pfcp->list);
 | 
						|
	unregister_netdevice_queue(dev, head);
 | 
						|
}
 | 
						|
 | 
						|
static struct rtnl_link_ops pfcp_link_ops __read_mostly = {
 | 
						|
	.kind		= "pfcp",
 | 
						|
	.priv_size	= sizeof(struct pfcp_dev),
 | 
						|
	.setup		= pfcp_link_setup,
 | 
						|
	.newlink	= pfcp_newlink,
 | 
						|
	.dellink	= pfcp_dellink,
 | 
						|
};
 | 
						|
 | 
						|
static int __net_init pfcp_net_init(struct net *net)
 | 
						|
{
 | 
						|
	struct pfcp_net *pn = net_generic(net, pfcp_net_id);
 | 
						|
 | 
						|
	INIT_LIST_HEAD(&pn->pfcp_dev_list);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static void __net_exit pfcp_net_exit(struct net *net)
 | 
						|
{
 | 
						|
	struct pfcp_net *pn = net_generic(net, pfcp_net_id);
 | 
						|
	struct pfcp_dev *pfcp, *pfcp_next;
 | 
						|
	struct net_device *dev;
 | 
						|
	LIST_HEAD(list);
 | 
						|
 | 
						|
	rtnl_lock();
 | 
						|
	for_each_netdev(net, dev)
 | 
						|
		if (dev->rtnl_link_ops == &pfcp_link_ops)
 | 
						|
			pfcp_dellink(dev, &list);
 | 
						|
 | 
						|
	list_for_each_entry_safe(pfcp, pfcp_next, &pn->pfcp_dev_list, list)
 | 
						|
		pfcp_dellink(pfcp->dev, &list);
 | 
						|
 | 
						|
	unregister_netdevice_many(&list);
 | 
						|
	rtnl_unlock();
 | 
						|
}
 | 
						|
 | 
						|
static struct pernet_operations pfcp_net_ops = {
 | 
						|
	.init	= pfcp_net_init,
 | 
						|
	.exit	= pfcp_net_exit,
 | 
						|
	.id	= &pfcp_net_id,
 | 
						|
	.size	= sizeof(struct pfcp_net),
 | 
						|
};
 | 
						|
 | 
						|
static int __init pfcp_init(void)
 | 
						|
{
 | 
						|
	int err;
 | 
						|
 | 
						|
	err = register_pernet_subsys(&pfcp_net_ops);
 | 
						|
	if (err)
 | 
						|
		goto exit_err;
 | 
						|
 | 
						|
	err = rtnl_link_register(&pfcp_link_ops);
 | 
						|
	if (err)
 | 
						|
		goto exit_unregister_subsys;
 | 
						|
	return 0;
 | 
						|
 | 
						|
exit_unregister_subsys:
 | 
						|
	unregister_pernet_subsys(&pfcp_net_ops);
 | 
						|
exit_err:
 | 
						|
	pr_err("loading PFCP module failed: err %d\n", err);
 | 
						|
	return err;
 | 
						|
}
 | 
						|
late_initcall(pfcp_init);
 | 
						|
 | 
						|
static void __exit pfcp_exit(void)
 | 
						|
{
 | 
						|
	rtnl_link_unregister(&pfcp_link_ops);
 | 
						|
	unregister_pernet_subsys(&pfcp_net_ops);
 | 
						|
 | 
						|
	pr_info("PFCP module unloaded\n");
 | 
						|
}
 | 
						|
module_exit(pfcp_exit);
 | 
						|
 | 
						|
MODULE_LICENSE("GPL");
 | 
						|
MODULE_AUTHOR("Wojciech Drewek <wojciech.drewek@intel.com>");
 | 
						|
MODULE_DESCRIPTION("Interface driver for PFCP encapsulated traffic");
 | 
						|
MODULE_ALIAS_RTNL_LINK("pfcp");
 |