forked from mirrors/linux
		
	unix_stream_sendmsg() currently uses order-2 allocations,
and we had numerous reports this can fail.
The __GFP_REPEAT flag present in sock_alloc_send_pskb() is
not helping.
This patch extends the work done in commit eb6a24816b
("af_unix: reduce high order page allocations) for
datagram sockets.
This opens the possibility of zero copy IO (splice() and
friends)
The trick is to not use skb_pull() anymore in recvmsg() path,
and instead add a @consumed field in UNIXCB() to track amount
of already read payload in the skb.
There is a performance regression for large sends
because of extra page allocations that will be addressed
in a follow-up patch, allowing sock_alloc_send_pskb()
to attempt high order page allocations.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
		
	
			
		
			
				
	
	
		
			81 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
#ifndef __LINUX_NET_AFUNIX_H
 | 
						|
#define __LINUX_NET_AFUNIX_H
 | 
						|
 | 
						|
#include <linux/socket.h>
 | 
						|
#include <linux/un.h>
 | 
						|
#include <linux/mutex.h>
 | 
						|
#include <net/sock.h>
 | 
						|
 | 
						|
void unix_inflight(struct file *fp);
 | 
						|
void unix_notinflight(struct file *fp);
 | 
						|
void unix_gc(void);
 | 
						|
void wait_for_unix_gc(void);
 | 
						|
struct sock *unix_get_socket(struct file *filp);
 | 
						|
struct sock *unix_peer_get(struct sock *);
 | 
						|
 | 
						|
#define UNIX_HASH_SIZE	256
 | 
						|
#define UNIX_HASH_BITS	8
 | 
						|
 | 
						|
extern unsigned int unix_tot_inflight;
 | 
						|
extern spinlock_t unix_table_lock;
 | 
						|
extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 | 
						|
 | 
						|
struct unix_address {
 | 
						|
	atomic_t	refcnt;
 | 
						|
	int		len;
 | 
						|
	unsigned int	hash;
 | 
						|
	struct sockaddr_un name[0];
 | 
						|
};
 | 
						|
 | 
						|
struct unix_skb_parms {
 | 
						|
	struct pid		*pid;		/* Skb credentials	*/
 | 
						|
	kuid_t			uid;
 | 
						|
	kgid_t			gid;
 | 
						|
	struct scm_fp_list	*fp;		/* Passed files		*/
 | 
						|
#ifdef CONFIG_SECURITY_NETWORK
 | 
						|
	u32			secid;		/* Security ID		*/
 | 
						|
#endif
 | 
						|
	u32			consumed;
 | 
						|
};
 | 
						|
 | 
						|
#define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
 | 
						|
#define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 | 
						|
 | 
						|
#define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
 | 
						|
#define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
 | 
						|
#define unix_state_lock_nested(s) \
 | 
						|
				spin_lock_nested(&unix_sk(s)->lock, \
 | 
						|
				SINGLE_DEPTH_NESTING)
 | 
						|
 | 
						|
/* The AF_UNIX socket */
 | 
						|
struct unix_sock {
 | 
						|
	/* WARNING: sk has to be the first member */
 | 
						|
	struct sock		sk;
 | 
						|
	struct unix_address     *addr;
 | 
						|
	struct path		path;
 | 
						|
	struct mutex		readlock;
 | 
						|
	struct sock		*peer;
 | 
						|
	struct list_head	link;
 | 
						|
	atomic_long_t		inflight;
 | 
						|
	spinlock_t		lock;
 | 
						|
	unsigned char		recursion_level;
 | 
						|
	unsigned long		gc_flags;
 | 
						|
#define UNIX_GC_CANDIDATE	0
 | 
						|
#define UNIX_GC_MAYBE_CYCLE	1
 | 
						|
	struct socket_wq	peer_wq;
 | 
						|
};
 | 
						|
#define unix_sk(__sk) ((struct unix_sock *)__sk)
 | 
						|
 | 
						|
#define peer_wait peer_wq.wait
 | 
						|
 | 
						|
long unix_inq_len(struct sock *sk);
 | 
						|
long unix_outq_len(struct sock *sk);
 | 
						|
 | 
						|
#ifdef CONFIG_SYSCTL
 | 
						|
int unix_sysctl_register(struct net *net);
 | 
						|
void unix_sysctl_unregister(struct net *net);
 | 
						|
#else
 | 
						|
static inline int unix_sysctl_register(struct net *net) { return 0; }
 | 
						|
static inline void unix_sysctl_unregister(struct net *net) {}
 | 
						|
#endif
 | 
						|
#endif
 |