forked from mirrors/linux
		
	net: add a sysctl to reflect the fwmark on replies
Kernel-originated IP packets that have no user socket associated with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.) are emitted with a mark of zero. Add a sysctl to make them have the same mark as the packet they are replying to. This allows an administrator that wishes to do so to use mark-based routing, firewalling, etc. for these replies by marking the original packets inbound. Tested using user-mode linux: - ICMP/ICMPv6 echo replies and errors. - TCP RST packets (IPv4 and IPv6). Signed-off-by: Lorenzo Colitti <lorenzo@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									87e067cda6
								
							
						
					
					
						commit
						e110861f86
					
				
					 10 changed files with 41 additions and 3 deletions
				
			
		|  | @ -231,6 +231,9 @@ void ipfrag_init(void); | |||
| 
 | ||||
| void ip_static_sysctl_init(void); | ||||
| 
 | ||||
| #define IP4_REPLY_MARK(net, mark) \ | ||||
| 	((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) | ||||
| 
 | ||||
| static inline bool ip_is_fragment(const struct iphdr *iph) | ||||
| { | ||||
| 	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; | ||||
|  |  | |||
|  | @ -113,6 +113,9 @@ struct frag_hdr { | |||
| #define	IP6_MF		0x0001 | ||||
| #define	IP6_OFFSET	0xFFF8 | ||||
| 
 | ||||
| #define IP6_REPLY_MARK(net, mark) \ | ||||
| 	((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) | ||||
| 
 | ||||
| #include <net/sock.h> | ||||
| 
 | ||||
| /* sysctls */ | ||||
|  |  | |||
|  | @ -77,6 +77,8 @@ struct netns_ipv4 { | |||
| 	int sysctl_ip_no_pmtu_disc; | ||||
| 	int sysctl_ip_fwd_use_pmtu; | ||||
| 
 | ||||
| 	int sysctl_fwmark_reflect; | ||||
| 
 | ||||
| 	struct ping_group_range ping_group_range; | ||||
| 
 | ||||
| 	atomic_t dev_addr_genid; | ||||
|  |  | |||
|  | @ -30,6 +30,7 @@ struct netns_sysctl_ipv6 { | |||
| 	int flowlabel_consistency; | ||||
| 	int icmpv6_time; | ||||
| 	int anycast_src_echo_reply; | ||||
| 	int fwmark_reflect; | ||||
| }; | ||||
| 
 | ||||
| struct netns_ipv6 { | ||||
|  |  | |||
|  | @ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
| 	struct sock *sk; | ||||
| 	struct inet_sock *inet; | ||||
| 	__be32 daddr, saddr; | ||||
| 	u32 mark = IP4_REPLY_MARK(net, skb->mark); | ||||
| 
 | ||||
| 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) | ||||
| 		return; | ||||
|  | @ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
| 	icmp_param->data.icmph.checksum = 0; | ||||
| 
 | ||||
| 	inet->tos = ip_hdr(skb)->tos; | ||||
| 	sk->sk_mark = mark; | ||||
| 	daddr = ipc.addr = ip_hdr(skb)->saddr; | ||||
| 	saddr = fib_compute_spec_dst(skb); | ||||
| 	ipc.opt = NULL; | ||||
|  | @ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
| 	memset(&fl4, 0, sizeof(fl4)); | ||||
| 	fl4.daddr = daddr; | ||||
| 	fl4.saddr = saddr; | ||||
| 	fl4.flowi4_mark = mark; | ||||
| 	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); | ||||
| 	fl4.flowi4_proto = IPPROTO_ICMP; | ||||
| 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | ||||
|  | @ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
| 					struct flowi4 *fl4, | ||||
| 					struct sk_buff *skb_in, | ||||
| 					const struct iphdr *iph, | ||||
| 					__be32 saddr, u8 tos, | ||||
| 					__be32 saddr, u8 tos, u32 mark, | ||||
| 					int type, int code, | ||||
| 					struct icmp_bxm *param) | ||||
| { | ||||
|  | @ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
| 	fl4->daddr = (param->replyopts.opt.opt.srr ? | ||||
| 		      param->replyopts.opt.opt.faddr : iph->saddr); | ||||
| 	fl4->saddr = saddr; | ||||
| 	fl4->flowi4_mark = mark; | ||||
| 	fl4->flowi4_tos = RT_TOS(tos); | ||||
| 	fl4->flowi4_proto = IPPROTO_ICMP; | ||||
| 	fl4->fl4_icmp_type = type; | ||||
|  | @ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 	struct flowi4 fl4; | ||||
| 	__be32 saddr; | ||||
| 	u8  tos; | ||||
| 	u32 mark; | ||||
| 	struct net *net; | ||||
| 	struct sock *sk; | ||||
| 
 | ||||
|  | @ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | | ||||
| 					   IPTOS_PREC_INTERNETCONTROL) : | ||||
| 					  iph->tos; | ||||
| 	mark = IP4_REPLY_MARK(net, skb_in->mark); | ||||
| 
 | ||||
| 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) | ||||
| 		goto out_unlock; | ||||
|  | @ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 	icmp_param->skb	  = skb_in; | ||||
| 	icmp_param->offset = skb_network_offset(skb_in); | ||||
| 	inet_sk(sk)->tos = tos; | ||||
| 	sk->sk_mark = mark; | ||||
| 	ipc.addr = iph->saddr; | ||||
| 	ipc.opt = &icmp_param->replyopts.opt; | ||||
| 	ipc.tx_flags = 0; | ||||
| 	ipc.ttl = 0; | ||||
| 	ipc.tos = -1; | ||||
| 
 | ||||
| 	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, | ||||
| 	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, | ||||
| 			       type, code, icmp_param); | ||||
| 	if (IS_ERR(rt)) | ||||
| 		goto out_unlock; | ||||
|  |  | |||
|  | @ -1546,7 +1546,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, | |||
| 			daddr = replyopts.opt.opt.faddr; | ||||
| 	} | ||||
| 
 | ||||
| 	flowi4_init_output(&fl4, arg->bound_dev_if, 0, | ||||
| 	flowi4_init_output(&fl4, arg->bound_dev_if, | ||||
| 			   IP4_REPLY_MARK(net, skb->mark), | ||||
| 			   RT_TOS(arg->tos), | ||||
| 			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, | ||||
| 			   ip_reply_arg_flowi_flags(arg), | ||||
|  |  | |||
|  | @ -838,6 +838,13 @@ static struct ctl_table ipv4_net_table[] = { | |||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= proc_dointvec, | ||||
| 	}, | ||||
| 	{ | ||||
| 		.procname	= "fwmark_reflect", | ||||
| 		.data		= &init_net.ipv4.sysctl_fwmark_reflect, | ||||
| 		.maxlen		= sizeof(int), | ||||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= proc_dointvec, | ||||
| 	}, | ||||
| 	{ } | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) | |||
| 	int len; | ||||
| 	int hlimit; | ||||
| 	int err = 0; | ||||
| 	u32 mark = IP6_REPLY_MARK(net, skb->mark); | ||||
| 
 | ||||
| 	if ((u8 *)hdr < skb->head || | ||||
| 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) | ||||
|  | @ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) | |||
| 	fl6.daddr = hdr->saddr; | ||||
| 	if (saddr) | ||||
| 		fl6.saddr = *saddr; | ||||
| 	fl6.flowi6_mark = mark; | ||||
| 	fl6.flowi6_oif = iif; | ||||
| 	fl6.fl6_icmp_type = type; | ||||
| 	fl6.fl6_icmp_code = code; | ||||
|  | @ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) | |||
| 	sk = icmpv6_xmit_lock(net); | ||||
| 	if (sk == NULL) | ||||
| 		return; | ||||
| 	sk->sk_mark = mark; | ||||
| 	np = inet6_sk(sk); | ||||
| 
 | ||||
| 	if (!icmpv6_xrlim_allow(sk, type, &fl6)) | ||||
|  | @ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) | |||
| 	int err = 0; | ||||
| 	int hlimit; | ||||
| 	u8 tclass; | ||||
| 	u32 mark = IP6_REPLY_MARK(net, skb->mark); | ||||
| 
 | ||||
| 	saddr = &ipv6_hdr(skb)->daddr; | ||||
| 
 | ||||
|  | @ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) | |||
| 		fl6.saddr = *saddr; | ||||
| 	fl6.flowi6_oif = skb->dev->ifindex; | ||||
| 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; | ||||
| 	fl6.flowi6_mark = mark; | ||||
| 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); | ||||
| 
 | ||||
| 	sk = icmpv6_xmit_lock(net); | ||||
| 	if (sk == NULL) | ||||
| 		return; | ||||
| 	sk->sk_mark = mark; | ||||
| 	np = inet6_sk(sk); | ||||
| 
 | ||||
| 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) | ||||
|  |  | |||
|  | @ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = { | |||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= proc_dointvec | ||||
| 	}, | ||||
| 	{ | ||||
| 		.procname	= "fwmark_reflect", | ||||
| 		.data		= &init_net.ipv6.sysctl.fwmark_reflect, | ||||
| 		.maxlen		= sizeof(int), | ||||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= proc_dointvec | ||||
| 	}, | ||||
| 	{ } | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -812,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, | |||
| 		fl6.flowi6_oif = inet6_iif(skb); | ||||
| 	else | ||||
| 		fl6.flowi6_oif = oif; | ||||
| 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); | ||||
| 	fl6.fl6_dport = t1->dest; | ||||
| 	fl6.fl6_sport = t1->source; | ||||
| 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Lorenzo Colitti
						Lorenzo Colitti