mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	net: add a sysctl to reflect the fwmark on replies
Kernel-originated IP packets that have no user socket associated with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.) are emitted with a mark of zero. Add a sysctl to make them have the same mark as the packet they are replying to. This allows an administrator that wishes to do so to use mark-based routing, firewalling, etc. for these replies by marking the original packets inbound. Tested using user-mode linux: - ICMP/ICMPv6 echo replies and errors. - TCP RST packets (IPv4 and IPv6). Signed-off-by: Lorenzo Colitti <lorenzo@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									87e067cda6
								
							
						
					
					
						commit
						e110861f86
					
				
					 10 changed files with 41 additions and 3 deletions
				
			
		| 
						 | 
					@ -231,6 +231,9 @@ void ipfrag_init(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ip_static_sysctl_init(void);
 | 
					void ip_static_sysctl_init(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define IP4_REPLY_MARK(net, mark) \
 | 
				
			||||||
 | 
						((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool ip_is_fragment(const struct iphdr *iph)
 | 
					static inline bool ip_is_fragment(const struct iphdr *iph)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
 | 
						return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -113,6 +113,9 @@ struct frag_hdr {
 | 
				
			||||||
#define	IP6_MF		0x0001
 | 
					#define	IP6_MF		0x0001
 | 
				
			||||||
#define	IP6_OFFSET	0xFFF8
 | 
					#define	IP6_OFFSET	0xFFF8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define IP6_REPLY_MARK(net, mark) \
 | 
				
			||||||
 | 
						((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <net/sock.h>
 | 
					#include <net/sock.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* sysctls */
 | 
					/* sysctls */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -77,6 +77,8 @@ struct netns_ipv4 {
 | 
				
			||||||
	int sysctl_ip_no_pmtu_disc;
 | 
						int sysctl_ip_no_pmtu_disc;
 | 
				
			||||||
	int sysctl_ip_fwd_use_pmtu;
 | 
						int sysctl_ip_fwd_use_pmtu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						int sysctl_fwmark_reflect;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct ping_group_range ping_group_range;
 | 
						struct ping_group_range ping_group_range;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	atomic_t dev_addr_genid;
 | 
						atomic_t dev_addr_genid;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
 | 
				
			||||||
	int flowlabel_consistency;
 | 
						int flowlabel_consistency;
 | 
				
			||||||
	int icmpv6_time;
 | 
						int icmpv6_time;
 | 
				
			||||||
	int anycast_src_echo_reply;
 | 
						int anycast_src_echo_reply;
 | 
				
			||||||
 | 
						int fwmark_reflect;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct netns_ipv6 {
 | 
					struct netns_ipv6 {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 | 
				
			||||||
	struct sock *sk;
 | 
						struct sock *sk;
 | 
				
			||||||
	struct inet_sock *inet;
 | 
						struct inet_sock *inet;
 | 
				
			||||||
	__be32 daddr, saddr;
 | 
						__be32 daddr, saddr;
 | 
				
			||||||
 | 
						u32 mark = IP4_REPLY_MARK(net, skb->mark);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
 | 
						if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
| 
						 | 
					@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 | 
				
			||||||
	icmp_param->data.icmph.checksum = 0;
 | 
						icmp_param->data.icmph.checksum = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	inet->tos = ip_hdr(skb)->tos;
 | 
						inet->tos = ip_hdr(skb)->tos;
 | 
				
			||||||
 | 
						sk->sk_mark = mark;
 | 
				
			||||||
	daddr = ipc.addr = ip_hdr(skb)->saddr;
 | 
						daddr = ipc.addr = ip_hdr(skb)->saddr;
 | 
				
			||||||
	saddr = fib_compute_spec_dst(skb);
 | 
						saddr = fib_compute_spec_dst(skb);
 | 
				
			||||||
	ipc.opt = NULL;
 | 
						ipc.opt = NULL;
 | 
				
			||||||
| 
						 | 
					@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 | 
				
			||||||
	memset(&fl4, 0, sizeof(fl4));
 | 
						memset(&fl4, 0, sizeof(fl4));
 | 
				
			||||||
	fl4.daddr = daddr;
 | 
						fl4.daddr = daddr;
 | 
				
			||||||
	fl4.saddr = saddr;
 | 
						fl4.saddr = saddr;
 | 
				
			||||||
 | 
						fl4.flowi4_mark = mark;
 | 
				
			||||||
	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 | 
						fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 | 
				
			||||||
	fl4.flowi4_proto = IPPROTO_ICMP;
 | 
						fl4.flowi4_proto = IPPROTO_ICMP;
 | 
				
			||||||
	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
 | 
						security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
 | 
				
			||||||
| 
						 | 
					@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 | 
				
			||||||
					struct flowi4 *fl4,
 | 
										struct flowi4 *fl4,
 | 
				
			||||||
					struct sk_buff *skb_in,
 | 
										struct sk_buff *skb_in,
 | 
				
			||||||
					const struct iphdr *iph,
 | 
										const struct iphdr *iph,
 | 
				
			||||||
					__be32 saddr, u8 tos,
 | 
										__be32 saddr, u8 tos, u32 mark,
 | 
				
			||||||
					int type, int code,
 | 
										int type, int code,
 | 
				
			||||||
					struct icmp_bxm *param)
 | 
										struct icmp_bxm *param)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 | 
				
			||||||
	fl4->daddr = (param->replyopts.opt.opt.srr ?
 | 
						fl4->daddr = (param->replyopts.opt.opt.srr ?
 | 
				
			||||||
		      param->replyopts.opt.opt.faddr : iph->saddr);
 | 
							      param->replyopts.opt.opt.faddr : iph->saddr);
 | 
				
			||||||
	fl4->saddr = saddr;
 | 
						fl4->saddr = saddr;
 | 
				
			||||||
 | 
						fl4->flowi4_mark = mark;
 | 
				
			||||||
	fl4->flowi4_tos = RT_TOS(tos);
 | 
						fl4->flowi4_tos = RT_TOS(tos);
 | 
				
			||||||
	fl4->flowi4_proto = IPPROTO_ICMP;
 | 
						fl4->flowi4_proto = IPPROTO_ICMP;
 | 
				
			||||||
	fl4->fl4_icmp_type = type;
 | 
						fl4->fl4_icmp_type = type;
 | 
				
			||||||
| 
						 | 
					@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 | 
				
			||||||
	struct flowi4 fl4;
 | 
						struct flowi4 fl4;
 | 
				
			||||||
	__be32 saddr;
 | 
						__be32 saddr;
 | 
				
			||||||
	u8  tos;
 | 
						u8  tos;
 | 
				
			||||||
 | 
						u32 mark;
 | 
				
			||||||
	struct net *net;
 | 
						struct net *net;
 | 
				
			||||||
	struct sock *sk;
 | 
						struct sock *sk;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 | 
				
			||||||
	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
 | 
						tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
 | 
				
			||||||
					   IPTOS_PREC_INTERNETCONTROL) :
 | 
										   IPTOS_PREC_INTERNETCONTROL) :
 | 
				
			||||||
					  iph->tos;
 | 
										  iph->tos;
 | 
				
			||||||
 | 
						mark = IP4_REPLY_MARK(net, skb_in->mark);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
 | 
						if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
| 
						 | 
					@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 | 
				
			||||||
	icmp_param->skb	  = skb_in;
 | 
						icmp_param->skb	  = skb_in;
 | 
				
			||||||
	icmp_param->offset = skb_network_offset(skb_in);
 | 
						icmp_param->offset = skb_network_offset(skb_in);
 | 
				
			||||||
	inet_sk(sk)->tos = tos;
 | 
						inet_sk(sk)->tos = tos;
 | 
				
			||||||
 | 
						sk->sk_mark = mark;
 | 
				
			||||||
	ipc.addr = iph->saddr;
 | 
						ipc.addr = iph->saddr;
 | 
				
			||||||
	ipc.opt = &icmp_param->replyopts.opt;
 | 
						ipc.opt = &icmp_param->replyopts.opt;
 | 
				
			||||||
	ipc.tx_flags = 0;
 | 
						ipc.tx_flags = 0;
 | 
				
			||||||
	ipc.ttl = 0;
 | 
						ipc.ttl = 0;
 | 
				
			||||||
	ipc.tos = -1;
 | 
						ipc.tos = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
 | 
						rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
 | 
				
			||||||
			       type, code, icmp_param);
 | 
								       type, code, icmp_param);
 | 
				
			||||||
	if (IS_ERR(rt))
 | 
						if (IS_ERR(rt))
 | 
				
			||||||
		goto out_unlock;
 | 
							goto out_unlock;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1546,7 +1546,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 | 
				
			||||||
			daddr = replyopts.opt.opt.faddr;
 | 
								daddr = replyopts.opt.opt.faddr;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	flowi4_init_output(&fl4, arg->bound_dev_if, 0,
 | 
						flowi4_init_output(&fl4, arg->bound_dev_if,
 | 
				
			||||||
 | 
								   IP4_REPLY_MARK(net, skb->mark),
 | 
				
			||||||
			   RT_TOS(arg->tos),
 | 
								   RT_TOS(arg->tos),
 | 
				
			||||||
			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
 | 
								   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
 | 
				
			||||||
			   ip_reply_arg_flowi_flags(arg),
 | 
								   ip_reply_arg_flowi_flags(arg),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -838,6 +838,13 @@ static struct ctl_table ipv4_net_table[] = {
 | 
				
			||||||
		.mode		= 0644,
 | 
							.mode		= 0644,
 | 
				
			||||||
		.proc_handler	= proc_dointvec,
 | 
							.proc_handler	= proc_dointvec,
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							.procname	= "fwmark_reflect",
 | 
				
			||||||
 | 
							.data		= &init_net.ipv4.sysctl_fwmark_reflect,
 | 
				
			||||||
 | 
							.maxlen		= sizeof(int),
 | 
				
			||||||
 | 
							.mode		= 0644,
 | 
				
			||||||
 | 
							.proc_handler	= proc_dointvec,
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
	{ }
 | 
						{ }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 | 
				
			||||||
	int len;
 | 
						int len;
 | 
				
			||||||
	int hlimit;
 | 
						int hlimit;
 | 
				
			||||||
	int err = 0;
 | 
						int err = 0;
 | 
				
			||||||
 | 
						u32 mark = IP6_REPLY_MARK(net, skb->mark);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ((u8 *)hdr < skb->head ||
 | 
						if ((u8 *)hdr < skb->head ||
 | 
				
			||||||
	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 | 
						    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 | 
				
			||||||
| 
						 | 
					@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 | 
				
			||||||
	fl6.daddr = hdr->saddr;
 | 
						fl6.daddr = hdr->saddr;
 | 
				
			||||||
	if (saddr)
 | 
						if (saddr)
 | 
				
			||||||
		fl6.saddr = *saddr;
 | 
							fl6.saddr = *saddr;
 | 
				
			||||||
 | 
						fl6.flowi6_mark = mark;
 | 
				
			||||||
	fl6.flowi6_oif = iif;
 | 
						fl6.flowi6_oif = iif;
 | 
				
			||||||
	fl6.fl6_icmp_type = type;
 | 
						fl6.fl6_icmp_type = type;
 | 
				
			||||||
	fl6.fl6_icmp_code = code;
 | 
						fl6.fl6_icmp_code = code;
 | 
				
			||||||
| 
						 | 
					@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 | 
				
			||||||
	sk = icmpv6_xmit_lock(net);
 | 
						sk = icmpv6_xmit_lock(net);
 | 
				
			||||||
	if (sk == NULL)
 | 
						if (sk == NULL)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
						sk->sk_mark = mark;
 | 
				
			||||||
	np = inet6_sk(sk);
 | 
						np = inet6_sk(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!icmpv6_xrlim_allow(sk, type, &fl6))
 | 
						if (!icmpv6_xrlim_allow(sk, type, &fl6))
 | 
				
			||||||
| 
						 | 
					@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 | 
				
			||||||
	int err = 0;
 | 
						int err = 0;
 | 
				
			||||||
	int hlimit;
 | 
						int hlimit;
 | 
				
			||||||
	u8 tclass;
 | 
						u8 tclass;
 | 
				
			||||||
 | 
						u32 mark = IP6_REPLY_MARK(net, skb->mark);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	saddr = &ipv6_hdr(skb)->daddr;
 | 
						saddr = &ipv6_hdr(skb)->daddr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 | 
				
			||||||
		fl6.saddr = *saddr;
 | 
							fl6.saddr = *saddr;
 | 
				
			||||||
	fl6.flowi6_oif = skb->dev->ifindex;
 | 
						fl6.flowi6_oif = skb->dev->ifindex;
 | 
				
			||||||
	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 | 
						fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 | 
				
			||||||
 | 
						fl6.flowi6_mark = mark;
 | 
				
			||||||
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 | 
						security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sk = icmpv6_xmit_lock(net);
 | 
						sk = icmpv6_xmit_lock(net);
 | 
				
			||||||
	if (sk == NULL)
 | 
						if (sk == NULL)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
						sk->sk_mark = mark;
 | 
				
			||||||
	np = inet6_sk(sk);
 | 
						np = inet6_sk(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 | 
						if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
 | 
				
			||||||
		.mode		= 0644,
 | 
							.mode		= 0644,
 | 
				
			||||||
		.proc_handler	= proc_dointvec
 | 
							.proc_handler	= proc_dointvec
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							.procname	= "fwmark_reflect",
 | 
				
			||||||
 | 
							.data		= &init_net.ipv6.sysctl.fwmark_reflect,
 | 
				
			||||||
 | 
							.maxlen		= sizeof(int),
 | 
				
			||||||
 | 
							.mode		= 0644,
 | 
				
			||||||
 | 
							.proc_handler	= proc_dointvec
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
	{ }
 | 
						{ }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -812,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 | 
				
			||||||
		fl6.flowi6_oif = inet6_iif(skb);
 | 
							fl6.flowi6_oif = inet6_iif(skb);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
		fl6.flowi6_oif = oif;
 | 
							fl6.flowi6_oif = oif;
 | 
				
			||||||
 | 
						fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
 | 
				
			||||||
	fl6.fl6_dport = t1->dest;
 | 
						fl6.fl6_dport = t1->dest;
 | 
				
			||||||
	fl6.fl6_sport = t1->source;
 | 
						fl6.fl6_sport = t1->source;
 | 
				
			||||||
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 | 
						security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue