forked from mirrors/linux
		
	inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP generator. linux kernels used inet_peer cache for this purpose, but this had a huge cost on servers disabling MTU discovery. 1) each inet_peer struct consumes 192 bytes 2) inetpeer cache uses a binary tree of inet_peer structs, with a nominal size of ~66000 elements under load. 3) lookups in this tree are hitting a lot of cache lines, as tree depth is about 20. 4) If server deals with many tcp flows, we have a high probability of not finding the inet_peer, allocating a fresh one, inserting it in the tree with same initial ip_id_count, (cf secure_ip_id()) 5) We garbage collect inet_peer aggressively. IP ID generation do not have to be 'perfect' Goal is trying to avoid duplicates in a short period of time, so that reassembly units have a chance to complete reassembly of fragments belonging to one message before receiving other fragments with a recycled ID. We simply use an array of generators, and a Jenkin hash using the dst IP as a key. ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it belongs (it is only used from this file) secure_ip_id() and secure_ipv6_id() no longer are needed. Rename ip_select_ident_more() to ip_select_ident_segs() to avoid unnecessary decrement/increment of the number of segments. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									e067ee336a
								
							
						
					
					
						commit
						73f156a6e8
					
				
					 17 changed files with 66 additions and 156 deletions
				
			
		| 
						 | 
					@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 | 
				
			||||||
	nf_reset(skb);
 | 
						nf_reset(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb->ip_summed = CHECKSUM_NONE;
 | 
						skb->ip_summed = CHECKSUM_NONE;
 | 
				
			||||||
	ip_select_ident(skb, &rt->dst, NULL);
 | 
						ip_select_ident(skb, NULL);
 | 
				
			||||||
	ip_send_check(iph);
 | 
						ip_send_check(iph);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ip_local_out(skb);
 | 
						ip_local_out(skb);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -41,14 +41,13 @@ struct inet_peer {
 | 
				
			||||||
		struct rcu_head     gc_rcu;
 | 
							struct rcu_head     gc_rcu;
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
 | 
						 * Once inet_peer is queued for deletion (refcnt == -1), following field
 | 
				
			||||||
	 * are not available: rid, ip_id_count
 | 
						 * is not available: rid
 | 
				
			||||||
	 * We can share memory with rcu_head to help keep inet_peer small.
 | 
						 * We can share memory with rcu_head to help keep inet_peer small.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	union {
 | 
						union {
 | 
				
			||||||
		struct {
 | 
							struct {
 | 
				
			||||||
			atomic_t			rid;		/* Frag reception counter */
 | 
								atomic_t			rid;		/* Frag reception counter */
 | 
				
			||||||
			atomic_t			ip_id_count;	/* IP ID for the next packet */
 | 
					 | 
				
			||||||
		};
 | 
							};
 | 
				
			||||||
		struct rcu_head         rcu;
 | 
							struct rcu_head         rcu;
 | 
				
			||||||
		struct inet_peer	*gc_next;
 | 
							struct inet_peer	*gc_next;
 | 
				
			||||||
| 
						 | 
					@ -165,7 +164,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 | 
				
			||||||
void inetpeer_invalidate_tree(struct inet_peer_base *);
 | 
					void inetpeer_invalidate_tree(struct inet_peer_base *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
 | 
					 * temporary check to make sure we dont access rid, tcp_ts,
 | 
				
			||||||
 * tcp_ts_stamp if no refcount is taken on inet_peer
 | 
					 * tcp_ts_stamp if no refcount is taken on inet_peer
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static inline void inet_peer_refcheck(const struct inet_peer *p)
 | 
					static inline void inet_peer_refcheck(const struct inet_peer *p)
 | 
				
			||||||
| 
						 | 
					@ -173,20 +172,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p)
 | 
				
			||||||
	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
 | 
						WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
/* can be called with or without local BH being disabled */
 | 
					 | 
				
			||||||
static inline int inet_getid(struct inet_peer *p, int more)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	int old, new;
 | 
					 | 
				
			||||||
	more++;
 | 
					 | 
				
			||||||
	inet_peer_refcheck(p);
 | 
					 | 
				
			||||||
	do {
 | 
					 | 
				
			||||||
		old = atomic_read(&p->ip_id_count);
 | 
					 | 
				
			||||||
		new = old + more;
 | 
					 | 
				
			||||||
		if (!new)
 | 
					 | 
				
			||||||
			new = 1;
 | 
					 | 
				
			||||||
	} while (atomic_cmpxchg(&p->ip_id_count, old, new) != old);
 | 
					 | 
				
			||||||
	return new;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif /* _NET_INETPEER_H */
 | 
					#endif /* _NET_INETPEER_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -309,9 +309,19 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
 | 
					#define IP_IDENTS_SZ 2048u
 | 
				
			||||||
 | 
					extern atomic_t *ip_idents;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
 | 
					static inline u32 ip_idents_reserve(u32 hash, int segs)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return atomic_add_return(segs, id_ptr) - segs;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void __ip_select_ident(struct iphdr *iph, int segs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct iphdr *iph = ip_hdr(skb);
 | 
						struct iphdr *iph = ip_hdr(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -321,24 +331,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
 | 
				
			||||||
		 * does not change, they drop every other packet in
 | 
							 * does not change, they drop every other packet in
 | 
				
			||||||
		 * a TCP stream using header compression.
 | 
							 * a TCP stream using header compression.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		iph->id = (sk && inet_sk(sk)->inet_daddr) ?
 | 
					 | 
				
			||||||
					htons(inet_sk(sk)->inet_id++) : 0;
 | 
					 | 
				
			||||||
	} else
 | 
					 | 
				
			||||||
		__ip_select_ident(iph, dst, 0);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct iphdr *iph = ip_hdr(skb);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
 | 
					 | 
				
			||||||
		if (sk && inet_sk(sk)->inet_daddr) {
 | 
							if (sk && inet_sk(sk)->inet_daddr) {
 | 
				
			||||||
			iph->id = htons(inet_sk(sk)->inet_id);
 | 
								iph->id = htons(inet_sk(sk)->inet_id);
 | 
				
			||||||
			inet_sk(sk)->inet_id += 1 + more;
 | 
								inet_sk(sk)->inet_id += segs;
 | 
				
			||||||
		} else
 | 
							} else {
 | 
				
			||||||
			iph->id = 0;
 | 
								iph->id = 0;
 | 
				
			||||||
	} else
 | 
							}
 | 
				
			||||||
		__ip_select_ident(iph, dst, more);
 | 
						} else {
 | 
				
			||||||
 | 
							__ip_select_ident(iph, segs);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						ip_select_ident_segs(skb, sk, 1);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
 | 
					static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -668,8 +668,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
 | 
				
			||||||
	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 | 
						return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int ip6_dst_hoplimit(struct dst_entry *dst);
 | 
					int ip6_dst_hoplimit(struct dst_entry *dst);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 | 
					static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,8 +3,6 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/types.h>
 | 
					#include <linux/types.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__u32 secure_ip_id(__be32 daddr);
 | 
					 | 
				
			||||||
__u32 secure_ipv6_id(const __be32 daddr[4]);
 | 
					 | 
				
			||||||
u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 | 
					u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 | 
				
			||||||
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 | 
					u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 | 
				
			||||||
			       __be16 dport);
 | 
								       __be16 dport);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_INET
 | 
					#ifdef CONFIG_INET
 | 
				
			||||||
__u32 secure_ip_id(__be32 daddr)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	u32 hash[MD5_DIGEST_WORDS];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	net_secret_init();
 | 
					 | 
				
			||||||
	hash[0] = (__force __u32) daddr;
 | 
					 | 
				
			||||||
	hash[1] = net_secret[13];
 | 
					 | 
				
			||||||
	hash[2] = net_secret[14];
 | 
					 | 
				
			||||||
	hash[3] = net_secret[15];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	md5_transform(hash, net_secret);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return hash[0];
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__u32 secure_ipv6_id(const __be32 daddr[4])
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	__u32 hash[4];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	net_secret_init();
 | 
					 | 
				
			||||||
	memcpy(hash, daddr, 16);
 | 
					 | 
				
			||||||
	md5_transform(hash, net_secret);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return hash[0];
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 | 
					__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 | 
				
			||||||
				 __be16 sport, __be16 dport)
 | 
									 __be16 sport, __be16 dport)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 | 
				
			||||||
	pip->saddr    = fl4.saddr;
 | 
						pip->saddr    = fl4.saddr;
 | 
				
			||||||
	pip->protocol = IPPROTO_IGMP;
 | 
						pip->protocol = IPPROTO_IGMP;
 | 
				
			||||||
	pip->tot_len  = 0;	/* filled in later */
 | 
						pip->tot_len  = 0;	/* filled in later */
 | 
				
			||||||
	ip_select_ident(skb, &rt->dst, NULL);
 | 
						ip_select_ident(skb, NULL);
 | 
				
			||||||
	((u8 *)&pip[1])[0] = IPOPT_RA;
 | 
						((u8 *)&pip[1])[0] = IPOPT_RA;
 | 
				
			||||||
	((u8 *)&pip[1])[1] = 4;
 | 
						((u8 *)&pip[1])[1] = 4;
 | 
				
			||||||
	((u8 *)&pip[1])[2] = 0;
 | 
						((u8 *)&pip[1])[2] = 0;
 | 
				
			||||||
| 
						 | 
					@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 | 
				
			||||||
	iph->daddr    = dst;
 | 
						iph->daddr    = dst;
 | 
				
			||||||
	iph->saddr    = fl4.saddr;
 | 
						iph->saddr    = fl4.saddr;
 | 
				
			||||||
	iph->protocol = IPPROTO_IGMP;
 | 
						iph->protocol = IPPROTO_IGMP;
 | 
				
			||||||
	ip_select_ident(skb, &rt->dst, NULL);
 | 
						ip_select_ident(skb, NULL);
 | 
				
			||||||
	((u8 *)&iph[1])[0] = IPOPT_RA;
 | 
						((u8 *)&iph[1])[0] = IPOPT_RA;
 | 
				
			||||||
	((u8 *)&iph[1])[1] = 4;
 | 
						((u8 *)&iph[1])[1] = 4;
 | 
				
			||||||
	((u8 *)&iph[1])[2] = 0;
 | 
						((u8 *)&iph[1])[2] = 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -26,20 +26,7 @@
 | 
				
			||||||
 *  Theory of operations.
 | 
					 *  Theory of operations.
 | 
				
			||||||
 *  We keep one entry for each peer IP address.  The nodes contains long-living
 | 
					 *  We keep one entry for each peer IP address.  The nodes contains long-living
 | 
				
			||||||
 *  information about the peer which doesn't depend on routes.
 | 
					 *  information about the peer which doesn't depend on routes.
 | 
				
			||||||
 *  At this moment this information consists only of ID field for the next
 | 
					 | 
				
			||||||
 *  outgoing IP packet.  This field is incremented with each packet as encoded
 | 
					 | 
				
			||||||
 *  in inet_getid() function (include/net/inetpeer.h).
 | 
					 | 
				
			||||||
 *  At the moment of writing this notes identifier of IP packets is generated
 | 
					 | 
				
			||||||
 *  to be unpredictable using this code only for packets subjected
 | 
					 | 
				
			||||||
 *  (actually or potentially) to defragmentation.  I.e. DF packets less than
 | 
					 | 
				
			||||||
 *  PMTU in size when local fragmentation is disabled use a constant ID and do
 | 
					 | 
				
			||||||
 *  not use this code (see ip_select_ident() in include/net/ip.h).
 | 
					 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *  Route cache entries hold references to our nodes.
 | 
					 | 
				
			||||||
 *  New cache entries get references via lookup by destination IP address in
 | 
					 | 
				
			||||||
 *  the avl tree.  The reference is grabbed only when it's needed i.e. only
 | 
					 | 
				
			||||||
 *  when we try to output IP packet which needs an unpredictable ID (see
 | 
					 | 
				
			||||||
 *  __ip_select_ident() in net/ipv4/route.c).
 | 
					 | 
				
			||||||
 *  Nodes are removed only when reference counter goes to 0.
 | 
					 *  Nodes are removed only when reference counter goes to 0.
 | 
				
			||||||
 *  When it's happened the node may be removed when a sufficient amount of
 | 
					 *  When it's happened the node may be removed when a sufficient amount of
 | 
				
			||||||
 *  time has been passed since its last use.  The less-recently-used entry can
 | 
					 *  time has been passed since its last use.  The less-recently-used entry can
 | 
				
			||||||
| 
						 | 
					@ -62,7 +49,6 @@
 | 
				
			||||||
 *		refcnt: atomically against modifications on other CPU;
 | 
					 *		refcnt: atomically against modifications on other CPU;
 | 
				
			||||||
 *		   usually under some other lock to prevent node disappearing
 | 
					 *		   usually under some other lock to prevent node disappearing
 | 
				
			||||||
 *		daddr: unchangeable
 | 
					 *		daddr: unchangeable
 | 
				
			||||||
 *		ip_id_count: atomic value (no lock needed)
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct kmem_cache *peer_cachep __read_mostly;
 | 
					static struct kmem_cache *peer_cachep __read_mostly;
 | 
				
			||||||
| 
						 | 
					@ -497,10 +483,6 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 | 
				
			||||||
		p->daddr = *daddr;
 | 
							p->daddr = *daddr;
 | 
				
			||||||
		atomic_set(&p->refcnt, 1);
 | 
							atomic_set(&p->refcnt, 1);
 | 
				
			||||||
		atomic_set(&p->rid, 0);
 | 
							atomic_set(&p->rid, 0);
 | 
				
			||||||
		atomic_set(&p->ip_id_count,
 | 
					 | 
				
			||||||
				(daddr->family == AF_INET) ?
 | 
					 | 
				
			||||||
					secure_ip_id(daddr->addr.a4) :
 | 
					 | 
				
			||||||
					secure_ipv6_id(daddr->addr.a6));
 | 
					 | 
				
			||||||
		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 | 
							p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 | 
				
			||||||
		p->rate_tokens = 0;
 | 
							p->rate_tokens = 0;
 | 
				
			||||||
		/* 60*HZ is arbitrary, but chosen enough high so that the first
 | 
							/* 60*HZ is arbitrary, but chosen enough high so that the first
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 | 
				
			||||||
	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
 | 
						iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
 | 
				
			||||||
	iph->saddr    = saddr;
 | 
						iph->saddr    = saddr;
 | 
				
			||||||
	iph->protocol = sk->sk_protocol;
 | 
						iph->protocol = sk->sk_protocol;
 | 
				
			||||||
	ip_select_ident(skb, &rt->dst, sk);
 | 
						ip_select_ident(skb, sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (opt && opt->opt.optlen) {
 | 
						if (opt && opt->opt.optlen) {
 | 
				
			||||||
		iph->ihl += opt->opt.optlen>>2;
 | 
							iph->ihl += opt->opt.optlen>>2;
 | 
				
			||||||
| 
						 | 
					@ -430,8 +430,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
 | 
				
			||||||
		ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
 | 
							ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ip_select_ident_more(skb, &rt->dst, sk,
 | 
						ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
 | 
				
			||||||
			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* TODO : should we use skb->sk here instead of sk ? */
 | 
						/* TODO : should we use skb->sk here instead of sk ? */
 | 
				
			||||||
	skb->priority = sk->sk_priority;
 | 
						skb->priority = sk->sk_priority;
 | 
				
			||||||
| 
						 | 
					@ -1379,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 | 
				
			||||||
	iph->ttl = ttl;
 | 
						iph->ttl = ttl;
 | 
				
			||||||
	iph->protocol = sk->sk_protocol;
 | 
						iph->protocol = sk->sk_protocol;
 | 
				
			||||||
	ip_copy_addrs(iph, fl4);
 | 
						ip_copy_addrs(iph, fl4);
 | 
				
			||||||
	ip_select_ident(skb, &rt->dst, sk);
 | 
						ip_select_ident(skb, sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (opt) {
 | 
						if (opt) {
 | 
				
			||||||
		iph->ihl += opt->optlen>>2;
 | 
							iph->ihl += opt->optlen>>2;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 | 
				
			||||||
	iph->daddr	=	dst;
 | 
						iph->daddr	=	dst;
 | 
				
			||||||
	iph->saddr	=	src;
 | 
						iph->saddr	=	src;
 | 
				
			||||||
	iph->ttl	=	ttl;
 | 
						iph->ttl	=	ttl;
 | 
				
			||||||
	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 | 
						__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	err = ip_local_out_sk(sk, skb);
 | 
						err = ip_local_out_sk(sk, skb);
 | 
				
			||||||
	if (unlikely(net_xmit_eval(err)))
 | 
						if (unlikely(net_xmit_eval(err)))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 | 
				
			||||||
	iph->protocol	=	IPPROTO_IPIP;
 | 
						iph->protocol	=	IPPROTO_IPIP;
 | 
				
			||||||
	iph->ihl	=	5;
 | 
						iph->ihl	=	5;
 | 
				
			||||||
	iph->tot_len	=	htons(skb->len);
 | 
						iph->tot_len	=	htons(skb->len);
 | 
				
			||||||
	ip_select_ident(skb, skb_dst(skb), NULL);
 | 
						ip_select_ident(skb, NULL);
 | 
				
			||||||
	ip_send_check(iph);
 | 
						ip_send_check(iph);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 | 
						memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 | 
				
			||||||
		iph->check   = 0;
 | 
							iph->check   = 0;
 | 
				
			||||||
		iph->tot_len = htons(length);
 | 
							iph->tot_len = htons(length);
 | 
				
			||||||
		if (!iph->id)
 | 
							if (!iph->id)
 | 
				
			||||||
			ip_select_ident(skb, &rt->dst, NULL);
 | 
								ip_select_ident(skb, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 | 
							iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -89,6 +89,7 @@
 | 
				
			||||||
#include <linux/rcupdate.h>
 | 
					#include <linux/rcupdate.h>
 | 
				
			||||||
#include <linux/times.h>
 | 
					#include <linux/times.h>
 | 
				
			||||||
#include <linux/slab.h>
 | 
					#include <linux/slab.h>
 | 
				
			||||||
 | 
					#include <linux/jhash.h>
 | 
				
			||||||
#include <net/dst.h>
 | 
					#include <net/dst.h>
 | 
				
			||||||
#include <net/net_namespace.h>
 | 
					#include <net/net_namespace.h>
 | 
				
			||||||
#include <net/protocol.h>
 | 
					#include <net/protocol.h>
 | 
				
			||||||
| 
						 | 
					@ -456,39 +457,19 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 | 
				
			||||||
	return neigh_create(&arp_tbl, pkey, dev);
 | 
						return neigh_create(&arp_tbl, pkey, dev);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					atomic_t *ip_idents __read_mostly;
 | 
				
			||||||
 * Peer allocation may fail only in serious out-of-memory conditions.  However
 | 
					EXPORT_SYMBOL(ip_idents);
 | 
				
			||||||
 * we still can generate some output.
 | 
					
 | 
				
			||||||
 * Random ID selection looks a bit dangerous because we have no chances to
 | 
					void __ip_select_ident(struct iphdr *iph, int segs)
 | 
				
			||||||
 * select ID being unique in a reasonable period of time.
 | 
					 | 
				
			||||||
 * But broken packet identifier may be better than no packet at all.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void ip_select_fb_ident(struct iphdr *iph)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	static DEFINE_SPINLOCK(ip_fb_id_lock);
 | 
						static u32 ip_idents_hashrnd __read_mostly;
 | 
				
			||||||
	static u32 ip_fallback_id;
 | 
						u32 hash, id;
 | 
				
			||||||
	u32 salt;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_bh(&ip_fb_id_lock);
 | 
						net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
 | 
				
			||||||
	salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
 | 
					 | 
				
			||||||
	iph->id = htons(salt & 0xFFFF);
 | 
					 | 
				
			||||||
	ip_fallback_id = salt;
 | 
					 | 
				
			||||||
	spin_unlock_bh(&ip_fb_id_lock);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
 | 
						hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd);
 | 
				
			||||||
{
 | 
						id = ip_idents_reserve(hash, segs);
 | 
				
			||||||
	struct net *net = dev_net(dst->dev);
 | 
						iph->id = htons(id);
 | 
				
			||||||
	struct inet_peer *peer;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
 | 
					 | 
				
			||||||
	if (peer) {
 | 
					 | 
				
			||||||
		iph->id = htons(inet_getid(peer, more));
 | 
					 | 
				
			||||||
		inet_putpeer(peer);
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ip_select_fb_ident(iph);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(__ip_select_ident);
 | 
					EXPORT_SYMBOL(__ip_select_ident);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2711,6 +2692,12 @@ int __init ip_rt_init(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int rc = 0;
 | 
						int rc = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!ip_idents)
 | 
				
			||||||
 | 
							panic("IP: failed to allocate ip_idents\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_IP_ROUTE_CLASSID
 | 
					#ifdef CONFIG_IP_ROUTE_CLASSID
 | 
				
			||||||
	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
 | 
						ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
 | 
				
			||||||
	if (!ip_rt_acct)
 | 
						if (!ip_rt_acct)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
 | 
						top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
 | 
				
			||||||
		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
 | 
							0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
 | 
				
			||||||
	ip_select_ident(skb, dst->child, NULL);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	top_iph->ttl = ip4_dst_hoplimit(dst->child);
 | 
						top_iph->ttl = ip4_dst_hoplimit(dst->child);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	top_iph->saddr = x->props.saddr.a4;
 | 
						top_iph->saddr = x->props.saddr.a4;
 | 
				
			||||||
	top_iph->daddr = x->id.daddr.a4;
 | 
						top_iph->daddr = x->id.daddr.a4;
 | 
				
			||||||
 | 
						ip_select_ident(skb, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -537,6 +537,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 | 
				
			||||||
	skb_copy_secmark(to, from);
 | 
						skb_copy_secmark(to, from);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						static u32 ip6_idents_hashrnd __read_mostly;
 | 
				
			||||||
 | 
						u32 hash, id;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
 | 
				
			||||||
 | 
						id = ip_idents_reserve(hash, 1);
 | 
				
			||||||
 | 
						fhdr->identification = htonl(id);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 | 
					int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct sk_buff *frag;
 | 
						struct sk_buff *frag;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,36 +8,6 @@
 | 
				
			||||||
#include <net/addrconf.h>
 | 
					#include <net/addrconf.h>
 | 
				
			||||||
#include <net/secure_seq.h>
 | 
					#include <net/secure_seq.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	static atomic_t ipv6_fragmentation_id;
 | 
					 | 
				
			||||||
	struct in6_addr addr;
 | 
					 | 
				
			||||||
	int old, new;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#if IS_ENABLED(CONFIG_IPV6)
 | 
					 | 
				
			||||||
	struct inet_peer *peer;
 | 
					 | 
				
			||||||
	struct net *net;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	net = dev_net(rt->dst.dev);
 | 
					 | 
				
			||||||
	peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
 | 
					 | 
				
			||||||
	if (peer) {
 | 
					 | 
				
			||||||
		fhdr->identification = htonl(inet_getid(peer, 0));
 | 
					 | 
				
			||||||
		inet_putpeer(peer);
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	do {
 | 
					 | 
				
			||||||
		old = atomic_read(&ipv6_fragmentation_id);
 | 
					 | 
				
			||||||
		new = old + 1;
 | 
					 | 
				
			||||||
		if (!new)
 | 
					 | 
				
			||||||
			new = 1;
 | 
					 | 
				
			||||||
	} while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	addr = rt->rt6i_dst.addr;
 | 
					 | 
				
			||||||
	addr.s6_addr32[0] ^= (__force __be32)new;
 | 
					 | 
				
			||||||
	fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
EXPORT_SYMBOL(ipv6_select_ident);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 | 
					int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 | 
				
			||||||
	iph->daddr		=	cp->daddr.ip;
 | 
						iph->daddr		=	cp->daddr.ip;
 | 
				
			||||||
	iph->saddr		=	saddr;
 | 
						iph->saddr		=	saddr;
 | 
				
			||||||
	iph->ttl		=	old_iph->ttl;
 | 
						iph->ttl		=	old_iph->ttl;
 | 
				
			||||||
	ip_select_ident(skb, &rt->dst, NULL);
 | 
						ip_select_ident(skb, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Another hack: avoid icmp_send in ip_fragment */
 | 
						/* Another hack: avoid icmp_send in ip_fragment */
 | 
				
			||||||
	skb->ignore_df = 1;
 | 
						skb->ignore_df = 1;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue