forked from mirrors/linux
		
	poll: avoid extra wakeups in select/poll
After introduction of keyed wakeups Davide Libenzi did on epoll, we are able to avoid spurious wakeups in poll()/select() code too. For example, typical use of poll()/select() is to wait for incoming network frames on many sockets. But TX completion for UDP/TCP frames call sock_wfree() which in turn schedules thread. When scheduled, thread does a full scan of all polled fds and can sleep again, because nothing is really available. If number of fds is large, this cause significant load. This patch makes select()/poll() aware of keyed wakeups and useless wakeups are avoided. This reduces number of context switches by about 50% on some setups, and work performed by sofirq handlers. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: David S. Miller <davem@davemloft.net> Acked-by: Andi Kleen <ak@linux.intel.com> Acked-by: Ingo Molnar <mingo@elte.hu> Acked-by: Davide Libenzi <davidel@xmailserver.org> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									02d5341ae5
								
							
						
					
					
						commit
						4938d7e023
					
				
					 2 changed files with 39 additions and 4 deletions
				
			
		
							
								
								
									
										40
									
								
								fs/select.c
									
									
									
									
									
								
							
							
						
						
									
										40
									
								
								fs/select.c
									
									
									
									
									
								
							| 
						 | 
					@ -168,7 +168,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
 | 
				
			||||||
	return table->entry++;
 | 
						return table->entry++;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 | 
					static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct poll_wqueues *pwq = wait->private;
 | 
						struct poll_wqueues *pwq = wait->private;
 | 
				
			||||||
	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
 | 
						DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
 | 
				
			||||||
| 
						 | 
					@ -194,6 +194,16 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 | 
				
			||||||
	return default_wake_function(&dummy_wait, mode, sync, key);
 | 
						return default_wake_function(&dummy_wait, mode, sync, key);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct poll_table_entry *entry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						entry = container_of(wait, struct poll_table_entry, wait);
 | 
				
			||||||
 | 
						if (key && !((unsigned long)key & entry->key))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						return __pollwake(wait, mode, sync, key);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Add a new entry */
 | 
					/* Add a new entry */
 | 
				
			||||||
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 | 
					static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 | 
				
			||||||
				poll_table *p)
 | 
									poll_table *p)
 | 
				
			||||||
| 
						 | 
					@ -205,6 +215,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 | 
				
			||||||
	get_file(filp);
 | 
						get_file(filp);
 | 
				
			||||||
	entry->filp = filp;
 | 
						entry->filp = filp;
 | 
				
			||||||
	entry->wait_address = wait_address;
 | 
						entry->wait_address = wait_address;
 | 
				
			||||||
 | 
						entry->key = p->key;
 | 
				
			||||||
	init_waitqueue_func_entry(&entry->wait, pollwake);
 | 
						init_waitqueue_func_entry(&entry->wait, pollwake);
 | 
				
			||||||
	entry->wait.private = pwq;
 | 
						entry->wait.private = pwq;
 | 
				
			||||||
	add_wait_queue(wait_address, &entry->wait);
 | 
						add_wait_queue(wait_address, &entry->wait);
 | 
				
			||||||
| 
						 | 
					@ -362,6 +373,18 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
 | 
				
			||||||
#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
 | 
					#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
 | 
				
			||||||
#define POLLEX_SET (POLLPRI)
 | 
					#define POLLEX_SET (POLLPRI)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void wait_key_set(poll_table *wait, unsigned long in,
 | 
				
			||||||
 | 
									unsigned long out, unsigned long bit)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (wait) {
 | 
				
			||||||
 | 
							wait->key = POLLEX_SET;
 | 
				
			||||||
 | 
							if (in & bit)
 | 
				
			||||||
 | 
								wait->key |= POLLIN_SET;
 | 
				
			||||||
 | 
							if (out & bit)
 | 
				
			||||||
 | 
								wait->key |= POLLOUT_SET;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 | 
					int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	ktime_t expire, *to = NULL;
 | 
						ktime_t expire, *to = NULL;
 | 
				
			||||||
| 
						 | 
					@ -418,20 +441,25 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 | 
				
			||||||
				if (file) {
 | 
									if (file) {
 | 
				
			||||||
					f_op = file->f_op;
 | 
										f_op = file->f_op;
 | 
				
			||||||
					mask = DEFAULT_POLLMASK;
 | 
										mask = DEFAULT_POLLMASK;
 | 
				
			||||||
					if (f_op && f_op->poll)
 | 
										if (f_op && f_op->poll) {
 | 
				
			||||||
						mask = (*f_op->poll)(file, retval ? NULL : wait);
 | 
											wait_key_set(wait, in, out, bit);
 | 
				
			||||||
 | 
											mask = (*f_op->poll)(file, wait);
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
					fput_light(file, fput_needed);
 | 
										fput_light(file, fput_needed);
 | 
				
			||||||
					if ((mask & POLLIN_SET) && (in & bit)) {
 | 
										if ((mask & POLLIN_SET) && (in & bit)) {
 | 
				
			||||||
						res_in |= bit;
 | 
											res_in |= bit;
 | 
				
			||||||
						retval++;
 | 
											retval++;
 | 
				
			||||||
 | 
											wait = NULL;
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
					if ((mask & POLLOUT_SET) && (out & bit)) {
 | 
										if ((mask & POLLOUT_SET) && (out & bit)) {
 | 
				
			||||||
						res_out |= bit;
 | 
											res_out |= bit;
 | 
				
			||||||
						retval++;
 | 
											retval++;
 | 
				
			||||||
 | 
											wait = NULL;
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
					if ((mask & POLLEX_SET) && (ex & bit)) {
 | 
										if ((mask & POLLEX_SET) && (ex & bit)) {
 | 
				
			||||||
						res_ex |= bit;
 | 
											res_ex |= bit;
 | 
				
			||||||
						retval++;
 | 
											retval++;
 | 
				
			||||||
 | 
											wait = NULL;
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
| 
						 | 
					@ -685,8 +713,12 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
 | 
				
			||||||
		mask = POLLNVAL;
 | 
							mask = POLLNVAL;
 | 
				
			||||||
		if (file != NULL) {
 | 
							if (file != NULL) {
 | 
				
			||||||
			mask = DEFAULT_POLLMASK;
 | 
								mask = DEFAULT_POLLMASK;
 | 
				
			||||||
			if (file->f_op && file->f_op->poll)
 | 
								if (file->f_op && file->f_op->poll) {
 | 
				
			||||||
 | 
									if (pwait)
 | 
				
			||||||
 | 
										pwait->key = pollfd->events |
 | 
				
			||||||
 | 
												POLLERR | POLLHUP;
 | 
				
			||||||
				mask = file->f_op->poll(file, pwait);
 | 
									mask = file->f_op->poll(file, pwait);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
			/* Mask out unneeded events. */
 | 
								/* Mask out unneeded events. */
 | 
				
			||||||
			mask &= pollfd->events | POLLERR | POLLHUP;
 | 
								mask &= pollfd->events | POLLERR | POLLHUP;
 | 
				
			||||||
			fput_light(file, fput_needed);
 | 
								fput_light(file, fput_needed);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -32,6 +32,7 @@ typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef struct poll_table_struct {
 | 
					typedef struct poll_table_struct {
 | 
				
			||||||
	poll_queue_proc qproc;
 | 
						poll_queue_proc qproc;
 | 
				
			||||||
 | 
						unsigned long key;
 | 
				
			||||||
} poll_table;
 | 
					} poll_table;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
 | 
					static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
 | 
				
			||||||
| 
						 | 
					@ -43,10 +44,12 @@ static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_addres
 | 
				
			||||||
static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 | 
					static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	pt->qproc = qproc;
 | 
						pt->qproc = qproc;
 | 
				
			||||||
 | 
						pt->key   = ~0UL; /* all events enabled */
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct poll_table_entry {
 | 
					struct poll_table_entry {
 | 
				
			||||||
	struct file *filp;
 | 
						struct file *filp;
 | 
				
			||||||
 | 
						unsigned long key;
 | 
				
			||||||
	wait_queue_t wait;
 | 
						wait_queue_t wait;
 | 
				
			||||||
	wait_queue_head_t *wait_address;
 | 
						wait_queue_head_t *wait_address;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue