forked from mirrors/linux
		
	ipip: percpu stats accounting
Maintain per_cpu tx_bytes, tx_packets, rx_bytes, rx_packets. Other seldom used fields are kept in netdev->stats structure, possibly unsafe. This is a preliminary work to support lockless transmit path, and correct RX stats, that are already unsafe. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									e985aad723
								
							
						
					
					
						commit
						3c97af99a5
					
				
					 1 changed files with 93 additions and 34 deletions
				
			
		
							
								
								
									
										127
									
								
								net/ipv4/ipip.c
									
									
									
									
									
								
							
							
						
						
									
										127
									
								
								net/ipv4/ipip.c
									
									
									
									
									
								
							| 
						 | 
					@ -131,8 +131,9 @@ struct ipip_net {
 | 
				
			||||||
	struct net_device *fb_tunnel_dev;
 | 
						struct net_device *fb_tunnel_dev;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void ipip_tunnel_init(struct net_device *dev);
 | 
					static int ipip_tunnel_init(struct net_device *dev);
 | 
				
			||||||
static void ipip_tunnel_setup(struct net_device *dev);
 | 
					static void ipip_tunnel_setup(struct net_device *dev);
 | 
				
			||||||
 | 
					static void ipip_dev_free(struct net_device *dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Locking : hash tables are protected by RCU and RTNL
 | 
					 * Locking : hash tables are protected by RCU and RTNL
 | 
				
			||||||
| 
						 | 
					@ -141,6 +142,34 @@ static void ipip_tunnel_setup(struct net_device *dev);
 | 
				
			||||||
#define for_each_ip_tunnel_rcu(start) \
 | 
					#define for_each_ip_tunnel_rcu(start) \
 | 
				
			||||||
	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 | 
						for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* often modified stats are per cpu, other are shared (netdev->stats) */
 | 
				
			||||||
 | 
					struct pcpu_tstats {
 | 
				
			||||||
 | 
						unsigned long	rx_packets;
 | 
				
			||||||
 | 
						unsigned long	rx_bytes;
 | 
				
			||||||
 | 
						unsigned long	tx_packets;
 | 
				
			||||||
 | 
						unsigned long	tx_bytes;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct net_device_stats *ipip_get_stats(struct net_device *dev)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct pcpu_tstats sum = { 0 };
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for_each_possible_cpu(i) {
 | 
				
			||||||
 | 
							const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							sum.rx_packets += tstats->rx_packets;
 | 
				
			||||||
 | 
							sum.rx_bytes   += tstats->rx_bytes;
 | 
				
			||||||
 | 
							sum.tx_packets += tstats->tx_packets;
 | 
				
			||||||
 | 
							sum.tx_bytes   += tstats->tx_bytes;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						dev->stats.rx_packets = sum.rx_packets;
 | 
				
			||||||
 | 
						dev->stats.rx_bytes   = sum.rx_bytes;
 | 
				
			||||||
 | 
						dev->stats.tx_packets = sum.tx_packets;
 | 
				
			||||||
 | 
						dev->stats.tx_bytes   = sum.tx_bytes;
 | 
				
			||||||
 | 
						return &dev->stats;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
 | 
					static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
 | 
				
			||||||
		__be32 remote, __be32 local)
 | 
							__be32 remote, __be32 local)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -239,7 +268,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
 | 
				
			||||||
	if (parms->name[0])
 | 
						if (parms->name[0])
 | 
				
			||||||
		strlcpy(name, parms->name, IFNAMSIZ);
 | 
							strlcpy(name, parms->name, IFNAMSIZ);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
		sprintf(name, "tunl%%d");
 | 
							strcpy(name, "tunl%d");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 | 
						dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 | 
				
			||||||
	if (dev == NULL)
 | 
						if (dev == NULL)
 | 
				
			||||||
| 
						 | 
					@ -255,7 +284,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
 | 
				
			||||||
	nt = netdev_priv(dev);
 | 
						nt = netdev_priv(dev);
 | 
				
			||||||
	nt->parms = *parms;
 | 
						nt->parms = *parms;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ipip_tunnel_init(dev);
 | 
						if (ipip_tunnel_init(dev) < 0)
 | 
				
			||||||
 | 
							goto failed_free;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (register_netdevice(dev) < 0)
 | 
						if (register_netdevice(dev) < 0)
 | 
				
			||||||
		goto failed_free;
 | 
							goto failed_free;
 | 
				
			||||||
| 
						 | 
					@ -265,7 +295,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
 | 
				
			||||||
	return nt;
 | 
						return nt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
failed_free:
 | 
					failed_free:
 | 
				
			||||||
	free_netdev(dev);
 | 
						ipip_dev_free(dev);
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -359,8 +389,10 @@ static int ipip_rcv(struct sk_buff *skb)
 | 
				
			||||||
	const struct iphdr *iph = ip_hdr(skb);
 | 
						const struct iphdr *iph = ip_hdr(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rcu_read_lock();
 | 
						rcu_read_lock();
 | 
				
			||||||
	if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
 | 
						tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 | 
				
			||||||
					iph->saddr, iph->daddr)) != NULL) {
 | 
						if (tunnel != NULL) {
 | 
				
			||||||
 | 
							struct pcpu_tstats *tstats;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 | 
							if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 | 
				
			||||||
			rcu_read_unlock();
 | 
								rcu_read_unlock();
 | 
				
			||||||
			kfree_skb(skb);
 | 
								kfree_skb(skb);
 | 
				
			||||||
| 
						 | 
					@ -374,7 +406,11 @@ static int ipip_rcv(struct sk_buff *skb)
 | 
				
			||||||
		skb->protocol = htons(ETH_P_IP);
 | 
							skb->protocol = htons(ETH_P_IP);
 | 
				
			||||||
		skb->pkt_type = PACKET_HOST;
 | 
							skb->pkt_type = PACKET_HOST;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		skb_tunnel_rx(skb, tunnel->dev);
 | 
							tstats = this_cpu_ptr(tunnel->dev->tstats);
 | 
				
			||||||
 | 
							tstats->rx_packets++;
 | 
				
			||||||
 | 
							tstats->rx_bytes += skb->len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							__skb_tunnel_rx(skb, tunnel->dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ipip_ecn_decapsulate(iph, skb);
 | 
							ipip_ecn_decapsulate(iph, skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -397,13 +433,12 @@ static int ipip_rcv(struct sk_buff *skb)
 | 
				
			||||||
static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 | 
					static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct ip_tunnel *tunnel = netdev_priv(dev);
 | 
						struct ip_tunnel *tunnel = netdev_priv(dev);
 | 
				
			||||||
	struct net_device_stats *stats = &dev->stats;
 | 
						struct pcpu_tstats *tstats;
 | 
				
			||||||
	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 | 
					 | 
				
			||||||
	struct iphdr  *tiph = &tunnel->parms.iph;
 | 
						struct iphdr  *tiph = &tunnel->parms.iph;
 | 
				
			||||||
	u8     tos = tunnel->parms.iph.tos;
 | 
						u8     tos = tunnel->parms.iph.tos;
 | 
				
			||||||
	__be16 df = tiph->frag_off;
 | 
						__be16 df = tiph->frag_off;
 | 
				
			||||||
	struct rtable *rt;     			/* Route to the other host */
 | 
						struct rtable *rt;     			/* Route to the other host */
 | 
				
			||||||
	struct net_device *tdev;			/* Device to other host */
 | 
						struct net_device *tdev;		/* Device to other host */
 | 
				
			||||||
	struct iphdr  *old_iph = ip_hdr(skb);
 | 
						struct iphdr  *old_iph = ip_hdr(skb);
 | 
				
			||||||
	struct iphdr  *iph;			/* Our new IP header */
 | 
						struct iphdr  *iph;			/* Our new IP header */
 | 
				
			||||||
	unsigned int max_headroom;		/* The extra header space needed */
 | 
						unsigned int max_headroom;		/* The extra header space needed */
 | 
				
			||||||
| 
						 | 
					@ -413,13 +448,13 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 | 
				
			||||||
	if (skb->protocol != htons(ETH_P_IP))
 | 
						if (skb->protocol != htons(ETH_P_IP))
 | 
				
			||||||
		goto tx_error;
 | 
							goto tx_error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (tos&1)
 | 
						if (tos & 1)
 | 
				
			||||||
		tos = old_iph->tos;
 | 
							tos = old_iph->tos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!dst) {
 | 
						if (!dst) {
 | 
				
			||||||
		/* NBMA tunnel */
 | 
							/* NBMA tunnel */
 | 
				
			||||||
		if ((rt = skb_rtable(skb)) == NULL) {
 | 
							if ((rt = skb_rtable(skb)) == NULL) {
 | 
				
			||||||
			stats->tx_fifo_errors++;
 | 
								dev->stats.tx_fifo_errors++;
 | 
				
			||||||
			goto tx_error;
 | 
								goto tx_error;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		if ((dst = rt->rt_gateway) == 0)
 | 
							if ((dst = rt->rt_gateway) == 0)
 | 
				
			||||||
| 
						 | 
					@ -427,14 +462,20 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		struct flowi fl = { .oif = tunnel->parms.link,
 | 
							struct flowi fl = {
 | 
				
			||||||
				    .nl_u = { .ip4_u =
 | 
								.oif = tunnel->parms.link,
 | 
				
			||||||
					      { .daddr = dst,
 | 
								.nl_u = {
 | 
				
			||||||
						.saddr = tiph->saddr,
 | 
									.ip4_u = {
 | 
				
			||||||
						.tos = RT_TOS(tos) } },
 | 
										.daddr = dst,
 | 
				
			||||||
				    .proto = IPPROTO_IPIP };
 | 
										.saddr = tiph->saddr,
 | 
				
			||||||
 | 
										.tos = RT_TOS(tos)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								.proto = IPPROTO_IPIP
 | 
				
			||||||
 | 
							};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
 | 
							if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
 | 
				
			||||||
			stats->tx_carrier_errors++;
 | 
								dev->stats.tx_carrier_errors++;
 | 
				
			||||||
			goto tx_error_icmp;
 | 
								goto tx_error_icmp;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -442,7 +483,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (tdev == dev) {
 | 
						if (tdev == dev) {
 | 
				
			||||||
		ip_rt_put(rt);
 | 
							ip_rt_put(rt);
 | 
				
			||||||
		stats->collisions++;
 | 
							dev->stats.collisions++;
 | 
				
			||||||
		goto tx_error;
 | 
							goto tx_error;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -452,7 +493,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 | 
				
			||||||
		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 | 
							mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (mtu < 68) {
 | 
							if (mtu < 68) {
 | 
				
			||||||
			stats->collisions++;
 | 
								dev->stats.collisions++;
 | 
				
			||||||
			ip_rt_put(rt);
 | 
								ip_rt_put(rt);
 | 
				
			||||||
			goto tx_error;
 | 
								goto tx_error;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -488,7 +529,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 | 
				
			||||||
		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 | 
							struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 | 
				
			||||||
		if (!new_skb) {
 | 
							if (!new_skb) {
 | 
				
			||||||
			ip_rt_put(rt);
 | 
								ip_rt_put(rt);
 | 
				
			||||||
			txq->tx_dropped++;
 | 
								dev->stats.tx_dropped++;
 | 
				
			||||||
			dev_kfree_skb(skb);
 | 
								dev_kfree_skb(skb);
 | 
				
			||||||
			return NETDEV_TX_OK;
 | 
								return NETDEV_TX_OK;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -525,14 +566,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 | 
				
			||||||
		iph->ttl	=	old_iph->ttl;
 | 
							iph->ttl	=	old_iph->ttl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	nf_reset(skb);
 | 
						nf_reset(skb);
 | 
				
			||||||
 | 
						tstats = this_cpu_ptr(dev->tstats);
 | 
				
			||||||
	IPTUNNEL_XMIT();
 | 
						__IPTUNNEL_XMIT(tstats, &dev->stats);
 | 
				
			||||||
	return NETDEV_TX_OK;
 | 
						return NETDEV_TX_OK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
tx_error_icmp:
 | 
					tx_error_icmp:
 | 
				
			||||||
	dst_link_failure(skb);
 | 
						dst_link_failure(skb);
 | 
				
			||||||
tx_error:
 | 
					tx_error:
 | 
				
			||||||
	stats->tx_errors++;
 | 
						dev->stats.tx_errors++;
 | 
				
			||||||
	dev_kfree_skb(skb);
 | 
						dev_kfree_skb(skb);
 | 
				
			||||||
	return NETDEV_TX_OK;
 | 
						return NETDEV_TX_OK;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -547,13 +588,19 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
 | 
				
			||||||
	iph = &tunnel->parms.iph;
 | 
						iph = &tunnel->parms.iph;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (iph->daddr) {
 | 
						if (iph->daddr) {
 | 
				
			||||||
		struct flowi fl = { .oif = tunnel->parms.link,
 | 
							struct flowi fl = {
 | 
				
			||||||
				    .nl_u = { .ip4_u =
 | 
								.oif = tunnel->parms.link,
 | 
				
			||||||
					      { .daddr = iph->daddr,
 | 
								.nl_u = {
 | 
				
			||||||
						.saddr = iph->saddr,
 | 
									.ip4_u = {
 | 
				
			||||||
						.tos = RT_TOS(iph->tos) } },
 | 
										.daddr = iph->daddr,
 | 
				
			||||||
				    .proto = IPPROTO_IPIP };
 | 
										.saddr = iph->saddr,
 | 
				
			||||||
 | 
										.tos = RT_TOS(iph->tos)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								.proto = IPPROTO_IPIP
 | 
				
			||||||
 | 
							};
 | 
				
			||||||
		struct rtable *rt;
 | 
							struct rtable *rt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
 | 
							if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
 | 
				
			||||||
			tdev = rt->dst.dev;
 | 
								tdev = rt->dst.dev;
 | 
				
			||||||
			ip_rt_put(rt);
 | 
								ip_rt_put(rt);
 | 
				
			||||||
| 
						 | 
					@ -699,13 +746,19 @@ static const struct net_device_ops ipip_netdev_ops = {
 | 
				
			||||||
	.ndo_start_xmit	= ipip_tunnel_xmit,
 | 
						.ndo_start_xmit	= ipip_tunnel_xmit,
 | 
				
			||||||
	.ndo_do_ioctl	= ipip_tunnel_ioctl,
 | 
						.ndo_do_ioctl	= ipip_tunnel_ioctl,
 | 
				
			||||||
	.ndo_change_mtu	= ipip_tunnel_change_mtu,
 | 
						.ndo_change_mtu	= ipip_tunnel_change_mtu,
 | 
				
			||||||
 | 
						.ndo_get_stats  = ipip_get_stats,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void ipip_dev_free(struct net_device *dev)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						free_percpu(dev->tstats);
 | 
				
			||||||
 | 
						free_netdev(dev);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void ipip_tunnel_setup(struct net_device *dev)
 | 
					static void ipip_tunnel_setup(struct net_device *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	dev->netdev_ops		= &ipip_netdev_ops;
 | 
						dev->netdev_ops		= &ipip_netdev_ops;
 | 
				
			||||||
	dev->destructor		= free_netdev;
 | 
						dev->destructor		= ipip_dev_free;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dev->type		= ARPHRD_TUNNEL;
 | 
						dev->type		= ARPHRD_TUNNEL;
 | 
				
			||||||
	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
 | 
						dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
 | 
				
			||||||
| 
						 | 
					@ -717,7 +770,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
 | 
				
			||||||
	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
 | 
						dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void ipip_tunnel_init(struct net_device *dev)
 | 
					static int ipip_tunnel_init(struct net_device *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct ip_tunnel *tunnel = netdev_priv(dev);
 | 
						struct ip_tunnel *tunnel = netdev_priv(dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -728,6 +781,12 @@ static void ipip_tunnel_init(struct net_device *dev)
 | 
				
			||||||
	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 | 
						memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ipip_tunnel_bind_dev(dev);
 | 
						ipip_tunnel_bind_dev(dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dev->tstats = alloc_percpu(struct pcpu_tstats);
 | 
				
			||||||
 | 
						if (!dev->tstats)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
 | 
					static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue