forked from mirrors/linux
		
	 2d300ce0b7
			
		
	
	
		2d300ce0b7
		
	
	
	
	
		
			
			Before commit40867d74c3("net: Add l3mdev index to flow struct and avoid oif reset for port devices") it was possible to use FIB rules to match on a L3 domain. This was done by having a FIB rule match on iif / oif being a L3 master device. It worked because prior to the FIB rule lookup the iif / oif fields in the flow structure were reset to the index of the L3 master device to which the input / output device was enslaved to. The above scheme made it impossible to match on the original input / output device. Therefore, cited commit stopped overwriting the iif / oif fields in the flow structure and instead stored the index of the enslaving L3 master device in a new field ('flowi_l3mdev') in the flow structure. While the change enabled new use cases, it broke the original use case of matching on a L3 domain. Fix this by interpreting the iif / oif matching on a L3 master device as a match against the L3 domain. In other words, if the iif / oif in the FIB rule points to a L3 master device, compare the provided index against 'flowi_l3mdev' rather than 'flowi_{i,o}if'. Before cited commit, a FIB rule that matched on 'iif vrf1' would only match incoming traffic from devices enslaved to 'vrf1'. With the proposed change (i.e., comparing against 'flowi_l3mdev'), the rule would also match traffic originating from a socket bound to 'vrf1'. Avoid that by adding a new flow flag ('FLOWI_FLAG_L3MDEV_OIF') that indicates if the L3 domain was derived from the output interface or the input interface (when not set) and take this flag into account when evaluating the FIB rule against the flow structure. Avoid unnecessary checks in the data path by detecting that a rule matches on a L3 master device when the rule is installed and marking it as such. Tested using the following script [1]. Output before40867d74c3(v5.4.291): default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link Output after40867d74c3: default dev dummy1 table 300 scope link default dev dummy1 table 300 scope link Output with this patch: default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link [1] #!/bin/bash ip link add name vrf1 up type vrf table 10 ip link add name dummy1 up master vrf1 type dummy sysctl -wq net.ipv4.conf.all.forwarding=1 sysctl -wq net.ipv4.conf.all.rp_filter=0 ip route add table 100 default dev dummy1 ip route add table 200 default dev dummy1 ip route add table 300 default dev dummy1 ip rule add prio 0 oif vrf1 table 100 ip rule add prio 1 iif vrf1 table 200 ip rule add prio 2 table 300 ip route get 192.0.2.1 oif dummy1 fibmatch ip route get 192.0.2.1 iif dummy1 from 198.51.100.1 fibmatch Fixes:40867d74c3("net: Add l3mdev index to flow struct and avoid oif reset for port devices") Reported-by: hanhuihui <hanhuihui5@huawei.com> Closes: https://lore.kernel.org/netdev/ec671c4f821a4d63904d0da15d604b75@huawei.com/ Signed-off-by: Ido Schimmel <idosch@nvidia.com> Acked-by: David Ahern <dsahern@kernel.org> Link: https://patch.msgid.link/20250414172022.242991-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
		
			
				
	
	
		
			196 lines
		
	
	
	
		
			5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			196 lines
		
	
	
	
		
			5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0 */
 | |
| /*
 | |
|  *
 | |
|  *	Generic internet FLOW.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #ifndef _NET_FLOW_H
 | |
| #define _NET_FLOW_H
 | |
| 
 | |
| #include <linux/in6.h>
 | |
| #include <linux/atomic.h>
 | |
| #include <linux/container_of.h>
 | |
| #include <linux/uidgid.h>
 | |
| 
 | |
| struct flow_keys;
 | |
| 
 | |
| /*
 | |
|  * ifindex generation is per-net namespace, and loopback is
 | |
|  * always the 1st device in ns (see net_dev_init), thus any
 | |
|  * loopback device should get ifindex 1
 | |
|  */
 | |
| 
 | |
| #define LOOPBACK_IFINDEX	1
 | |
| 
 | |
| struct flowi_tunnel {
 | |
| 	__be64			tun_id;
 | |
| };
 | |
| 
 | |
| struct flowi_common {
 | |
| 	int	flowic_oif;
 | |
| 	int	flowic_iif;
 | |
| 	int     flowic_l3mdev;
 | |
| 	__u32	flowic_mark;
 | |
| 	__u8	flowic_tos;
 | |
| 	__u8	flowic_scope;
 | |
| 	__u8	flowic_proto;
 | |
| 	__u8	flowic_flags;
 | |
| #define FLOWI_FLAG_ANYSRC		0x01
 | |
| #define FLOWI_FLAG_KNOWN_NH		0x02
 | |
| #define FLOWI_FLAG_L3MDEV_OIF		0x04
 | |
| 	__u32	flowic_secid;
 | |
| 	kuid_t  flowic_uid;
 | |
| 	__u32		flowic_multipath_hash;
 | |
| 	struct flowi_tunnel flowic_tun_key;
 | |
| };
 | |
| 
 | |
| union flowi_uli {
 | |
| 	struct {
 | |
| 		__be16	dport;
 | |
| 		__be16	sport;
 | |
| 	} ports;
 | |
| 
 | |
| 	struct {
 | |
| 		__u8	type;
 | |
| 		__u8	code;
 | |
| 	} icmpt;
 | |
| 
 | |
| 	__be32		gre_key;
 | |
| 
 | |
| 	struct {
 | |
| 		__u8	type;
 | |
| 	} mht;
 | |
| };
 | |
| 
 | |
| struct flowi4 {
 | |
| 	struct flowi_common	__fl_common;
 | |
| #define flowi4_oif		__fl_common.flowic_oif
 | |
| #define flowi4_iif		__fl_common.flowic_iif
 | |
| #define flowi4_l3mdev		__fl_common.flowic_l3mdev
 | |
| #define flowi4_mark		__fl_common.flowic_mark
 | |
| #define flowi4_tos		__fl_common.flowic_tos
 | |
| #define flowi4_scope		__fl_common.flowic_scope
 | |
| #define flowi4_proto		__fl_common.flowic_proto
 | |
| #define flowi4_flags		__fl_common.flowic_flags
 | |
| #define flowi4_secid		__fl_common.flowic_secid
 | |
| #define flowi4_tun_key		__fl_common.flowic_tun_key
 | |
| #define flowi4_uid		__fl_common.flowic_uid
 | |
| #define flowi4_multipath_hash	__fl_common.flowic_multipath_hash
 | |
| 
 | |
| 	/* (saddr,daddr) must be grouped, same order as in IP header */
 | |
| 	__be32			saddr;
 | |
| 	__be32			daddr;
 | |
| 
 | |
| 	union flowi_uli		uli;
 | |
| #define fl4_sport		uli.ports.sport
 | |
| #define fl4_dport		uli.ports.dport
 | |
| #define fl4_icmp_type		uli.icmpt.type
 | |
| #define fl4_icmp_code		uli.icmpt.code
 | |
| #define fl4_mh_type		uli.mht.type
 | |
| #define fl4_gre_key		uli.gre_key
 | |
| } __attribute__((__aligned__(BITS_PER_LONG/8)));
 | |
| 
 | |
| static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 | |
| 				      __u32 mark, __u8 tos, __u8 scope,
 | |
| 				      __u8 proto, __u8 flags,
 | |
| 				      __be32 daddr, __be32 saddr,
 | |
| 				      __be16 dport, __be16 sport,
 | |
| 				      kuid_t uid)
 | |
| {
 | |
| 	fl4->flowi4_oif = oif;
 | |
| 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
 | |
| 	fl4->flowi4_l3mdev = 0;
 | |
| 	fl4->flowi4_mark = mark;
 | |
| 	fl4->flowi4_tos = tos;
 | |
| 	fl4->flowi4_scope = scope;
 | |
| 	fl4->flowi4_proto = proto;
 | |
| 	fl4->flowi4_flags = flags;
 | |
| 	fl4->flowi4_secid = 0;
 | |
| 	fl4->flowi4_tun_key.tun_id = 0;
 | |
| 	fl4->flowi4_uid = uid;
 | |
| 	fl4->daddr = daddr;
 | |
| 	fl4->saddr = saddr;
 | |
| 	fl4->fl4_dport = dport;
 | |
| 	fl4->fl4_sport = sport;
 | |
| 	fl4->flowi4_multipath_hash = 0;
 | |
| }
 | |
| 
 | |
| /* Reset some input parameters after previous lookup */
 | |
| static inline void flowi4_update_output(struct flowi4 *fl4, int oif,
 | |
| 					__be32 daddr, __be32 saddr)
 | |
| {
 | |
| 	fl4->flowi4_oif = oif;
 | |
| 	fl4->daddr = daddr;
 | |
| 	fl4->saddr = saddr;
 | |
| }
 | |
| 
 | |
| 
 | |
| struct flowi6 {
 | |
| 	struct flowi_common	__fl_common;
 | |
| #define flowi6_oif		__fl_common.flowic_oif
 | |
| #define flowi6_iif		__fl_common.flowic_iif
 | |
| #define flowi6_l3mdev		__fl_common.flowic_l3mdev
 | |
| #define flowi6_mark		__fl_common.flowic_mark
 | |
| #define flowi6_scope		__fl_common.flowic_scope
 | |
| #define flowi6_proto		__fl_common.flowic_proto
 | |
| #define flowi6_flags		__fl_common.flowic_flags
 | |
| #define flowi6_secid		__fl_common.flowic_secid
 | |
| #define flowi6_tun_key		__fl_common.flowic_tun_key
 | |
| #define flowi6_uid		__fl_common.flowic_uid
 | |
| 	struct in6_addr		daddr;
 | |
| 	struct in6_addr		saddr;
 | |
| 	/* Note: flowi6_tos is encoded in flowlabel, too. */
 | |
| 	__be32			flowlabel;
 | |
| 	union flowi_uli		uli;
 | |
| #define fl6_sport		uli.ports.sport
 | |
| #define fl6_dport		uli.ports.dport
 | |
| #define fl6_icmp_type		uli.icmpt.type
 | |
| #define fl6_icmp_code		uli.icmpt.code
 | |
| #define fl6_mh_type		uli.mht.type
 | |
| #define fl6_gre_key		uli.gre_key
 | |
| 	__u32			mp_hash;
 | |
| } __attribute__((__aligned__(BITS_PER_LONG/8)));
 | |
| 
 | |
| struct flowi {
 | |
| 	union {
 | |
| 		struct flowi_common	__fl_common;
 | |
| 		struct flowi4		ip4;
 | |
| 		struct flowi6		ip6;
 | |
| 	} u;
 | |
| #define flowi_oif	u.__fl_common.flowic_oif
 | |
| #define flowi_iif	u.__fl_common.flowic_iif
 | |
| #define flowi_l3mdev	u.__fl_common.flowic_l3mdev
 | |
| #define flowi_mark	u.__fl_common.flowic_mark
 | |
| #define flowi_tos	u.__fl_common.flowic_tos
 | |
| #define flowi_scope	u.__fl_common.flowic_scope
 | |
| #define flowi_proto	u.__fl_common.flowic_proto
 | |
| #define flowi_flags	u.__fl_common.flowic_flags
 | |
| #define flowi_secid	u.__fl_common.flowic_secid
 | |
| #define flowi_tun_key	u.__fl_common.flowic_tun_key
 | |
| #define flowi_uid	u.__fl_common.flowic_uid
 | |
| } __attribute__((__aligned__(BITS_PER_LONG/8)));
 | |
| 
 | |
| static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
 | |
| {
 | |
| 	return container_of(fl4, struct flowi, u.ip4);
 | |
| }
 | |
| 
 | |
| static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4)
 | |
| {
 | |
| 	return &(fl4->__fl_common);
 | |
| }
 | |
| 
 | |
| static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6)
 | |
| {
 | |
| 	return container_of(fl6, struct flowi, u.ip6);
 | |
| }
 | |
| 
 | |
| static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6)
 | |
| {
 | |
| 	return &(fl6->__fl_common);
 | |
| }
 | |
| 
 | |
| __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 | |
| 
 | |
| #endif
 |