mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	VXLAN FDB entries can point to either a remote destination or an FDB
nexthop group. The latter is usually used in EVPN deployments where
learning is disabled.
However, when learning is enabled, an incoming packet might try to
refresh an FDB entry that points to an FDB nexthop group and therefore
does not have a remote. Such packets should be dropped, but they are
only dropped after dereferencing the non-existent remote, resulting in a
NPD [1] which can be reproduced using [2].
Fix by dropping such packets earlier. Remove the misleading comment from
first_remote_rcu().
[1]
BUG: kernel NULL pointer dereference, address: 0000000000000000
[...]
CPU: 13 UID: 0 PID: 361 Comm: mausezahn Not tainted 6.17.0-rc1-virtme-g9f6b606b6b37 #1 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-4.fc41 04/01/2014
RIP: 0010:vxlan_snoop+0x98/0x1e0
[...]
Call Trace:
 <TASK>
 vxlan_encap_bypass+0x209/0x240
 encap_bypass_if_local+0xb1/0x100
 vxlan_xmit_one+0x1375/0x17e0
 vxlan_xmit+0x6b4/0x15f0
 dev_hard_start_xmit+0x5d/0x1c0
 __dev_queue_xmit+0x246/0xfd0
 packet_sendmsg+0x113a/0x1850
 __sock_sendmsg+0x38/0x70
 __sys_sendto+0x126/0x180
 __x64_sys_sendto+0x24/0x30
 do_syscall_64+0xa4/0x260
 entry_SYSCALL_64_after_hwframe+0x4b/0x53
[2]
 #!/bin/bash
 ip address add 192.0.2.1/32 dev lo
 ip address add 192.0.2.2/32 dev lo
 ip nexthop add id 1 via 192.0.2.3 fdb
 ip nexthop add id 10 group 1 fdb
 ip link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass
 ip link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning
 bridge fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020
 bridge fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10
 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q
Fixes: 1274e1cc42 ("vxlan: ecmp support for mac fdb entries")
Reported-by: Marlin Cremers <mcremers@cloudbear.nl>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20250901065035.159644-2-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
		
	
			
		
			
				
	
	
		
			253 lines
		
	
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			253 lines
		
	
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0 */
 | 
						|
/*
 | 
						|
 *	Vxlan private header file
 | 
						|
 *
 | 
						|
 */
 | 
						|
 | 
						|
#ifndef _VXLAN_PRIVATE_H
 | 
						|
#define _VXLAN_PRIVATE_H
 | 
						|
 | 
						|
#include <linux/rhashtable.h>
 | 
						|
 | 
						|
extern unsigned int vxlan_net_id;
 | 
						|
extern const u8 all_zeros_mac[ETH_ALEN + 2];
 | 
						|
extern const struct rhashtable_params vxlan_vni_rht_params;
 | 
						|
 | 
						|
#define PORT_HASH_BITS	8
 | 
						|
#define PORT_HASH_SIZE  (1 << PORT_HASH_BITS)
 | 
						|
 | 
						|
/* per-network namespace private data for this module */
 | 
						|
struct vxlan_net {
 | 
						|
	struct list_head  vxlan_list;
 | 
						|
	/* sock_list is protected by rtnl lock */
 | 
						|
	struct hlist_head sock_list[PORT_HASH_SIZE];
 | 
						|
	struct notifier_block nexthop_notifier_block;
 | 
						|
};
 | 
						|
 | 
						|
struct vxlan_fdb_key {
 | 
						|
	u8 eth_addr[ETH_ALEN];
 | 
						|
	__be32 vni;
 | 
						|
};
 | 
						|
 | 
						|
/* Forwarding table entry */
 | 
						|
struct vxlan_fdb {
 | 
						|
	struct rhash_head rhnode;
 | 
						|
	struct rcu_head	  rcu;
 | 
						|
	unsigned long	  updated;	/* jiffies */
 | 
						|
	unsigned long	  used;
 | 
						|
	struct list_head  remotes;
 | 
						|
	struct vxlan_fdb_key key;
 | 
						|
	u16		  state;	/* see ndm_state */
 | 
						|
	u16		  flags;	/* see ndm_flags and below */
 | 
						|
	struct list_head  nh_list;
 | 
						|
	struct hlist_node fdb_node;
 | 
						|
	struct nexthop __rcu *nh;
 | 
						|
	struct vxlan_dev  __rcu *vdev;
 | 
						|
};
 | 
						|
 | 
						|
#define NTF_VXLAN_ADDED_BY_USER 0x100
 | 
						|
 | 
						|
/* Virtual Network hash table head */
 | 
						|
static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
 | 
						|
{
 | 
						|
	return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
 | 
						|
}
 | 
						|
 | 
						|
/* Socket hash table head */
 | 
						|
static inline struct hlist_head *vs_head(struct net *net, __be16 port)
 | 
						|
{
 | 
						|
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 | 
						|
 | 
						|
	return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
 | 
						|
}
 | 
						|
 | 
						|
/* First remote destination for a forwarding entry. */
 | 
						|
static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
 | 
						|
{
 | 
						|
	if (rcu_access_pointer(fdb->nh))
 | 
						|
		return NULL;
 | 
						|
	return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
 | 
						|
}
 | 
						|
 | 
						|
static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
 | 
						|
{
 | 
						|
	if (rcu_access_pointer(fdb->nh))
 | 
						|
		return NULL;
 | 
						|
	return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
 | 
						|
}
 | 
						|
 | 
						|
#if IS_ENABLED(CONFIG_IPV6)
 | 
						|
static inline
 | 
						|
bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
 | 
						|
{
 | 
						|
	if (a->sa.sa_family != b->sa.sa_family)
 | 
						|
		return false;
 | 
						|
	if (a->sa.sa_family == AF_INET6)
 | 
						|
		return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
 | 
						|
	else
 | 
						|
		return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
 | 
						|
}
 | 
						|
 | 
						|
static inline int vxlan_nla_get_addr(union vxlan_addr *ip,
 | 
						|
				     const struct nlattr *nla)
 | 
						|
{
 | 
						|
	if (nla_len(nla) >= sizeof(struct in6_addr)) {
 | 
						|
		ip->sin6.sin6_addr = nla_get_in6_addr(nla);
 | 
						|
		ip->sa.sa_family = AF_INET6;
 | 
						|
		return 0;
 | 
						|
	} else if (nla_len(nla) >= sizeof(__be32)) {
 | 
						|
		ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
 | 
						|
		ip->sa.sa_family = AF_INET;
 | 
						|
		return 0;
 | 
						|
	} else {
 | 
						|
		return -EAFNOSUPPORT;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
 | 
						|
				     const union vxlan_addr *ip)
 | 
						|
{
 | 
						|
	if (ip->sa.sa_family == AF_INET6)
 | 
						|
		return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
 | 
						|
	else
 | 
						|
		return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
 | 
						|
}
 | 
						|
 | 
						|
static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip)
 | 
						|
{
 | 
						|
	if (ip->sa.sa_family == AF_INET6)
 | 
						|
		return ipv6_addr_is_multicast(&ip->sin6.sin6_addr);
 | 
						|
	else
 | 
						|
		return ipv4_is_multicast(ip->sin.sin_addr.s_addr);
 | 
						|
}
 | 
						|
 | 
						|
#else /* !CONFIG_IPV6 */
 | 
						|
 | 
						|
static inline
 | 
						|
bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
 | 
						|
{
 | 
						|
	return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
 | 
						|
}
 | 
						|
 | 
						|
static inline int vxlan_nla_get_addr(union vxlan_addr *ip,
 | 
						|
				     const struct nlattr *nla)
 | 
						|
{
 | 
						|
	if (nla_len(nla) >= sizeof(struct in6_addr)) {
 | 
						|
		return -EAFNOSUPPORT;
 | 
						|
	} else if (nla_len(nla) >= sizeof(__be32)) {
 | 
						|
		ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
 | 
						|
		ip->sa.sa_family = AF_INET;
 | 
						|
		return 0;
 | 
						|
	} else {
 | 
						|
		return -EAFNOSUPPORT;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
 | 
						|
				     const union vxlan_addr *ip)
 | 
						|
{
 | 
						|
	return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
 | 
						|
}
 | 
						|
 | 
						|
static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip)
 | 
						|
{
 | 
						|
	return ipv4_is_multicast(ip->sin.sin_addr.s_addr);
 | 
						|
}
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
static inline size_t vxlan_addr_size(const union vxlan_addr *ip)
 | 
						|
{
 | 
						|
	if (ip->sa.sa_family == AF_INET6)
 | 
						|
		return sizeof(struct in6_addr);
 | 
						|
	else
 | 
						|
		return sizeof(__be32);
 | 
						|
}
 | 
						|
 | 
						|
static inline struct vxlan_vni_node *
 | 
						|
vxlan_vnifilter_lookup(struct vxlan_dev *vxlan, __be32 vni)
 | 
						|
{
 | 
						|
	struct vxlan_vni_group *vg;
 | 
						|
 | 
						|
	vg = rcu_dereference_rtnl(vxlan->vnigrp);
 | 
						|
	if (!vg)
 | 
						|
		return NULL;
 | 
						|
 | 
						|
	return rhashtable_lookup_fast(&vg->vni_hash, &vni,
 | 
						|
				      vxlan_vni_rht_params);
 | 
						|
}
 | 
						|
 | 
						|
/* vxlan_core.c */
 | 
						|
int vxlan_fdb_create(struct vxlan_dev *vxlan,
 | 
						|
		     const u8 *mac, union vxlan_addr *ip,
 | 
						|
		     __u16 state, __be16 port, __be32 src_vni,
 | 
						|
		     __be32 vni, __u32 ifindex, __u16 ndm_flags,
 | 
						|
		     u32 nhid, struct vxlan_fdb **fdb,
 | 
						|
		     struct netlink_ext_ack *extack);
 | 
						|
int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
 | 
						|
		       const unsigned char *addr, union vxlan_addr ip,
 | 
						|
		       __be16 port, __be32 src_vni, __be32 vni,
 | 
						|
		       u32 ifindex, bool swdev_notify);
 | 
						|
u32 eth_vni_hash(const unsigned char *addr, __be32 vni);
 | 
						|
u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni);
 | 
						|
int vxlan_fdb_update(struct vxlan_dev *vxlan,
 | 
						|
		     const u8 *mac, union vxlan_addr *ip,
 | 
						|
		     __u16 state, __u16 flags,
 | 
						|
		     __be16 port, __be32 src_vni, __be32 vni,
 | 
						|
		     __u32 ifindex, __u16 ndm_flags, u32 nhid,
 | 
						|
		     bool swdev_notify, struct netlink_ext_ack *extack);
 | 
						|
void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 | 
						|
		    __be32 default_vni, struct vxlan_rdst *rdst, bool did_rsc);
 | 
						|
int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
 | 
						|
		     struct vxlan_config *conf, __be32 vni);
 | 
						|
 | 
						|
/* vxlan_vnifilter.c */
 | 
						|
int vxlan_vnigroup_init(struct vxlan_dev *vxlan);
 | 
						|
void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan);
 | 
						|
 | 
						|
int vxlan_vnifilter_init(void);
 | 
						|
void vxlan_vnifilter_uninit(void);
 | 
						|
void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni,
 | 
						|
			   struct vxlan_vni_node *vninode,
 | 
						|
			   int type, unsigned int len);
 | 
						|
 | 
						|
void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
 | 
						|
			 struct vxlan_sock *vs,
 | 
						|
			 bool ipv6);
 | 
						|
void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan);
 | 
						|
int vxlan_vnilist_update_group(struct vxlan_dev *vxlan,
 | 
						|
			       union vxlan_addr *old_remote_ip,
 | 
						|
			       union vxlan_addr *new_remote_ip,
 | 
						|
			       struct netlink_ext_ack *extack);
 | 
						|
 | 
						|
 | 
						|
/* vxlan_multicast.c */
 | 
						|
int vxlan_multicast_join(struct vxlan_dev *vxlan);
 | 
						|
int vxlan_multicast_leave(struct vxlan_dev *vxlan);
 | 
						|
bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev,
 | 
						|
		      __be32 vni, union vxlan_addr *rip, int rifindex);
 | 
						|
int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip,
 | 
						|
		    int rifindex);
 | 
						|
int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip,
 | 
						|
		     int rifindex);
 | 
						|
 | 
						|
/* vxlan_mdb.c */
 | 
						|
int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb,
 | 
						|
		   struct netlink_callback *cb);
 | 
						|
int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
 | 
						|
		  struct netlink_ext_ack *extack);
 | 
						|
int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
 | 
						|
		  struct netlink_ext_ack *extack);
 | 
						|
int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
 | 
						|
		       struct netlink_ext_ack *extack);
 | 
						|
int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid,
 | 
						|
		  u32 seq, struct netlink_ext_ack *extack);
 | 
						|
struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
 | 
						|
						struct sk_buff *skb,
 | 
						|
						__be32 src_vni);
 | 
						|
netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan,
 | 
						|
			   const struct vxlan_mdb_entry *mdb_entry,
 | 
						|
			   struct sk_buff *skb);
 | 
						|
int vxlan_mdb_init(struct vxlan_dev *vxlan);
 | 
						|
void vxlan_mdb_fini(struct vxlan_dev *vxlan);
 | 
						|
#endif
 |