mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	cgroup: Merge branch 'memcg_event' into for-3.14
Merge v3.12 based patch series to move cgroup_event implementation to memcg into for-3.14. The following two commits cause a conflict in kernel/cgroup.c2ff2a7d03b("cgroup: kill css_id")79bd9814e5("cgroup, memcg: move cgroup_event implementation to memcg") Each patch removes a struct definition from kernel/cgroup.c. As the two are adjacent, they cause a context conflict. Easily resolved by removing both structs. Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
		
						commit
						edab95103d
					
				
					 7 changed files with 335 additions and 360 deletions
				
			
		| 
						 | 
				
			
			@ -24,7 +24,6 @@ CONTENTS:
 | 
			
		|||
  2.1 Basic Usage
 | 
			
		||||
  2.2 Attaching processes
 | 
			
		||||
  2.3 Mounting hierarchies by name
 | 
			
		||||
  2.4 Notification API
 | 
			
		||||
3. Kernel API
 | 
			
		||||
  3.1 Overview
 | 
			
		||||
  3.2 Synchronization
 | 
			
		||||
| 
						 | 
				
			
			@ -472,25 +471,6 @@ you give a subsystem a name.
 | 
			
		|||
The name of the subsystem appears as part of the hierarchy description
 | 
			
		||||
in /proc/mounts and /proc/<pid>/cgroups.
 | 
			
		||||
 | 
			
		||||
2.4 Notification API
 | 
			
		||||
--------------------
 | 
			
		||||
 | 
			
		||||
There is mechanism which allows to get notifications about changing
 | 
			
		||||
status of a cgroup.
 | 
			
		||||
 | 
			
		||||
To register a new notification handler you need to:
 | 
			
		||||
 - create a file descriptor for event notification using eventfd(2);
 | 
			
		||||
 - open a control file to be monitored (e.g. memory.usage_in_bytes);
 | 
			
		||||
 - write "<event_fd> <control_fd> <args>" to cgroup.event_control.
 | 
			
		||||
   Interpretation of args is defined by control file implementation;
 | 
			
		||||
 | 
			
		||||
eventfd will be woken up by control file implementation or when the
 | 
			
		||||
cgroup is removed.
 | 
			
		||||
 | 
			
		||||
To unregister a notification handler just close eventfd.
 | 
			
		||||
 | 
			
		||||
NOTE: Support of notifications should be implemented for the control
 | 
			
		||||
file. See documentation for the subsystem.
 | 
			
		||||
 | 
			
		||||
3. Kernel API
 | 
			
		||||
=============
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,7 +29,6 @@ struct cgroup_subsys;
 | 
			
		|||
struct inode;
 | 
			
		||||
struct cgroup;
 | 
			
		||||
struct css_id;
 | 
			
		||||
struct eventfd_ctx;
 | 
			
		||||
 | 
			
		||||
extern int cgroup_init_early(void);
 | 
			
		||||
extern int cgroup_init(void);
 | 
			
		||||
| 
						 | 
				
			
			@ -239,10 +238,6 @@ struct cgroup {
 | 
			
		|||
	struct rcu_head rcu_head;
 | 
			
		||||
	struct work_struct destroy_work;
 | 
			
		||||
 | 
			
		||||
	/* List of events which userspace want to receive */
 | 
			
		||||
	struct list_head event_list;
 | 
			
		||||
	spinlock_t event_list_lock;
 | 
			
		||||
 | 
			
		||||
	/* directory xattrs */
 | 
			
		||||
	struct simple_xattrs xattrs;
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			@ -506,25 +501,6 @@ struct cftype {
 | 
			
		|||
	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
 | 
			
		||||
 | 
			
		||||
	int (*release)(struct inode *inode, struct file *file);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * register_event() callback will be used to add new userspace
 | 
			
		||||
	 * waiter for changes related to the cftype. Implement it if
 | 
			
		||||
	 * you want to provide this functionality. Use eventfd_signal()
 | 
			
		||||
	 * on eventfd to send notification to userspace.
 | 
			
		||||
	 */
 | 
			
		||||
	int (*register_event)(struct cgroup_subsys_state *css,
 | 
			
		||||
			      struct cftype *cft, struct eventfd_ctx *eventfd,
 | 
			
		||||
			      const char *args);
 | 
			
		||||
	/*
 | 
			
		||||
	 * unregister_event() callback will be called when userspace
 | 
			
		||||
	 * closes the eventfd or on cgroup removing.
 | 
			
		||||
	 * This callback must be implemented, if you want provide
 | 
			
		||||
	 * notification functionality.
 | 
			
		||||
	 */
 | 
			
		||||
	void (*unregister_event)(struct cgroup_subsys_state *css,
 | 
			
		||||
				 struct cftype *cft,
 | 
			
		||||
				 struct eventfd_ctx *eventfd);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,6 +7,7 @@
 | 
			
		|||
#include <linux/gfp.h>
 | 
			
		||||
#include <linux/types.h>
 | 
			
		||||
#include <linux/cgroup.h>
 | 
			
		||||
#include <linux/eventfd.h>
 | 
			
		||||
 | 
			
		||||
struct vmpressure {
 | 
			
		||||
	unsigned long scanned;
 | 
			
		||||
| 
						 | 
				
			
			@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr);
 | 
			
		|||
extern void vmpressure_cleanup(struct vmpressure *vmpr);
 | 
			
		||||
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
 | 
			
		||||
extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
 | 
			
		||||
extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
 | 
			
		||||
extern int vmpressure_register_event(struct cgroup_subsys_state *css,
 | 
			
		||||
				     struct cftype *cft,
 | 
			
		||||
extern int vmpressure_register_event(struct mem_cgroup *memcg,
 | 
			
		||||
				     struct eventfd_ctx *eventfd,
 | 
			
		||||
				     const char *args);
 | 
			
		||||
extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
 | 
			
		||||
					struct cftype *cft,
 | 
			
		||||
extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
 | 
			
		||||
					struct eventfd_ctx *eventfd);
 | 
			
		||||
#else
 | 
			
		||||
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -848,7 +848,6 @@ config NUMA_BALANCING
 | 
			
		|||
 | 
			
		||||
menuconfig CGROUPS
 | 
			
		||||
	boolean "Control Group support"
 | 
			
		||||
	depends on EVENTFD
 | 
			
		||||
	help
 | 
			
		||||
	  This option adds support for grouping sets of processes together, for
 | 
			
		||||
	  use with process control subsystems such as Cpusets, CFS, memory
 | 
			
		||||
| 
						 | 
				
			
			@ -915,6 +914,7 @@ config MEMCG
 | 
			
		|||
	bool "Memory Resource Controller for Control Groups"
 | 
			
		||||
	depends on RESOURCE_COUNTERS
 | 
			
		||||
	select MM_OWNER
 | 
			
		||||
	select EVENTFD
 | 
			
		||||
	help
 | 
			
		||||
	  Provides a memory resource controller that manages both anonymous
 | 
			
		||||
	  memory and page cache. (See Documentation/cgroups/memory.txt)
 | 
			
		||||
| 
						 | 
				
			
			@ -1154,7 +1154,6 @@ config UIDGID_STRICT_TYPE_CHECKS
 | 
			
		|||
 | 
			
		||||
config SCHED_AUTOGROUP
 | 
			
		||||
	bool "Automatic process group scheduling"
 | 
			
		||||
	select EVENTFD
 | 
			
		||||
	select CGROUPS
 | 
			
		||||
	select CGROUP_SCHED
 | 
			
		||||
	select FAIR_GROUP_SCHED
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										259
									
								
								kernel/cgroup.c
									
									
									
									
									
								
							
							
						
						
									
										259
									
								
								kernel/cgroup.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -56,11 +56,8 @@
 | 
			
		|||
#include <linux/pid_namespace.h>
 | 
			
		||||
#include <linux/idr.h>
 | 
			
		||||
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
 | 
			
		||||
#include <linux/eventfd.h>
 | 
			
		||||
#include <linux/poll.h>
 | 
			
		||||
#include <linux/flex_array.h> /* used in cgroup_attach_task */
 | 
			
		||||
#include <linux/kthread.h>
 | 
			
		||||
#include <linux/file.h>
 | 
			
		||||
 | 
			
		||||
#include <linux/atomic.h>
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -132,36 +129,6 @@ struct cfent {
 | 
			
		|||
	struct simple_xattrs		xattrs;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * cgroup_event represents events which userspace want to receive.
 | 
			
		||||
 */
 | 
			
		||||
struct cgroup_event {
 | 
			
		||||
	/*
 | 
			
		||||
	 * css which the event belongs to.
 | 
			
		||||
	 */
 | 
			
		||||
	struct cgroup_subsys_state *css;
 | 
			
		||||
	/*
 | 
			
		||||
	 * Control file which the event associated.
 | 
			
		||||
	 */
 | 
			
		||||
	struct cftype *cft;
 | 
			
		||||
	/*
 | 
			
		||||
	 * eventfd to signal userspace about the event.
 | 
			
		||||
	 */
 | 
			
		||||
	struct eventfd_ctx *eventfd;
 | 
			
		||||
	/*
 | 
			
		||||
	 * Each of these stored in a list by the cgroup.
 | 
			
		||||
	 */
 | 
			
		||||
	struct list_head list;
 | 
			
		||||
	/*
 | 
			
		||||
	 * All fields below needed to unregister event when
 | 
			
		||||
	 * userspace closes eventfd.
 | 
			
		||||
	 */
 | 
			
		||||
	poll_table pt;
 | 
			
		||||
	wait_queue_head_t *wqh;
 | 
			
		||||
	wait_queue_t wait;
 | 
			
		||||
	struct work_struct remove;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* The list of hierarchy roots */
 | 
			
		||||
 | 
			
		||||
static LIST_HEAD(cgroup_roots);
 | 
			
		||||
| 
						 | 
				
			
			@ -1351,8 +1318,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 | 
			
		|||
	INIT_LIST_HEAD(&cgrp->pidlists);
 | 
			
		||||
	mutex_init(&cgrp->pidlist_mutex);
 | 
			
		||||
	cgrp->dummy_css.cgroup = cgrp;
 | 
			
		||||
	INIT_LIST_HEAD(&cgrp->event_list);
 | 
			
		||||
	spin_lock_init(&cgrp->event_list_lock);
 | 
			
		||||
	simple_xattrs_init(&cgrp->xattrs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2626,16 +2591,6 @@ static const struct inode_operations cgroup_dir_inode_operations = {
 | 
			
		|||
	.removexattr = cgroup_removexattr,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Check if a file is a control file
 | 
			
		||||
 */
 | 
			
		||||
static inline struct cftype *__file_cft(struct file *file)
 | 
			
		||||
{
 | 
			
		||||
	if (file_inode(file)->i_fop != &cgroup_file_operations)
 | 
			
		||||
		return ERR_PTR(-EINVAL);
 | 
			
		||||
	return __d_cft(file->f_dentry);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int cgroup_create_file(struct dentry *dentry, umode_t mode,
 | 
			
		||||
				struct super_block *sb)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -3915,202 +3870,6 @@ static void cgroup_dput(struct cgroup *cgrp)
 | 
			
		|||
	deactivate_super(sb);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Unregister event and free resources.
 | 
			
		||||
 *
 | 
			
		||||
 * Gets called from workqueue.
 | 
			
		||||
 */
 | 
			
		||||
static void cgroup_event_remove(struct work_struct *work)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup_event *event = container_of(work, struct cgroup_event,
 | 
			
		||||
			remove);
 | 
			
		||||
	struct cgroup_subsys_state *css = event->css;
 | 
			
		||||
 | 
			
		||||
	remove_wait_queue(event->wqh, &event->wait);
 | 
			
		||||
 | 
			
		||||
	event->cft->unregister_event(css, event->cft, event->eventfd);
 | 
			
		||||
 | 
			
		||||
	/* Notify userspace the event is going away. */
 | 
			
		||||
	eventfd_signal(event->eventfd, 1);
 | 
			
		||||
 | 
			
		||||
	eventfd_ctx_put(event->eventfd);
 | 
			
		||||
	kfree(event);
 | 
			
		||||
	css_put(css);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Gets called on POLLHUP on eventfd when user closes it.
 | 
			
		||||
 *
 | 
			
		||||
 * Called with wqh->lock held and interrupts disabled.
 | 
			
		||||
 */
 | 
			
		||||
static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
 | 
			
		||||
		int sync, void *key)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup_event *event = container_of(wait,
 | 
			
		||||
			struct cgroup_event, wait);
 | 
			
		||||
	struct cgroup *cgrp = event->css->cgroup;
 | 
			
		||||
	unsigned long flags = (unsigned long)key;
 | 
			
		||||
 | 
			
		||||
	if (flags & POLLHUP) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * If the event has been detached at cgroup removal, we
 | 
			
		||||
		 * can simply return knowing the other side will cleanup
 | 
			
		||||
		 * for us.
 | 
			
		||||
		 *
 | 
			
		||||
		 * We can't race against event freeing since the other
 | 
			
		||||
		 * side will require wqh->lock via remove_wait_queue(),
 | 
			
		||||
		 * which we hold.
 | 
			
		||||
		 */
 | 
			
		||||
		spin_lock(&cgrp->event_list_lock);
 | 
			
		||||
		if (!list_empty(&event->list)) {
 | 
			
		||||
			list_del_init(&event->list);
 | 
			
		||||
			/*
 | 
			
		||||
			 * We are in atomic context, but cgroup_event_remove()
 | 
			
		||||
			 * may sleep, so we have to call it in workqueue.
 | 
			
		||||
			 */
 | 
			
		||||
			schedule_work(&event->remove);
 | 
			
		||||
		}
 | 
			
		||||
		spin_unlock(&cgrp->event_list_lock);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void cgroup_event_ptable_queue_proc(struct file *file,
 | 
			
		||||
		wait_queue_head_t *wqh, poll_table *pt)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup_event *event = container_of(pt,
 | 
			
		||||
			struct cgroup_event, pt);
 | 
			
		||||
 | 
			
		||||
	event->wqh = wqh;
 | 
			
		||||
	add_wait_queue(wqh, &event->wait);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Parse input and register new cgroup event handler.
 | 
			
		||||
 *
 | 
			
		||||
 * Input must be in format '<event_fd> <control_fd> <args>'.
 | 
			
		||||
 * Interpretation of args is defined by control file implementation.
 | 
			
		||||
 */
 | 
			
		||||
static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
 | 
			
		||||
				      struct cftype *cft, const char *buffer)
 | 
			
		||||
{
 | 
			
		||||
	struct cgroup *cgrp = dummy_css->cgroup;
 | 
			
		||||
	struct cgroup_event *event;
 | 
			
		||||
	struct cgroup_subsys_state *cfile_css;
 | 
			
		||||
	unsigned int efd, cfd;
 | 
			
		||||
	struct fd efile;
 | 
			
		||||
	struct fd cfile;
 | 
			
		||||
	char *endp;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	efd = simple_strtoul(buffer, &endp, 10);
 | 
			
		||||
	if (*endp != ' ')
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	buffer = endp + 1;
 | 
			
		||||
 | 
			
		||||
	cfd = simple_strtoul(buffer, &endp, 10);
 | 
			
		||||
	if ((*endp != ' ') && (*endp != '\0'))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	buffer = endp + 1;
 | 
			
		||||
 | 
			
		||||
	event = kzalloc(sizeof(*event), GFP_KERNEL);
 | 
			
		||||
	if (!event)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	INIT_LIST_HEAD(&event->list);
 | 
			
		||||
	init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
 | 
			
		||||
	init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
 | 
			
		||||
	INIT_WORK(&event->remove, cgroup_event_remove);
 | 
			
		||||
 | 
			
		||||
	efile = fdget(efd);
 | 
			
		||||
	if (!efile.file) {
 | 
			
		||||
		ret = -EBADF;
 | 
			
		||||
		goto out_kfree;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	event->eventfd = eventfd_ctx_fileget(efile.file);
 | 
			
		||||
	if (IS_ERR(event->eventfd)) {
 | 
			
		||||
		ret = PTR_ERR(event->eventfd);
 | 
			
		||||
		goto out_put_efile;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cfile = fdget(cfd);
 | 
			
		||||
	if (!cfile.file) {
 | 
			
		||||
		ret = -EBADF;
 | 
			
		||||
		goto out_put_eventfd;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* the process need read permission on control file */
 | 
			
		||||
	/* AV: shouldn't we check that it's been opened for read instead? */
 | 
			
		||||
	ret = inode_permission(file_inode(cfile.file), MAY_READ);
 | 
			
		||||
	if (ret < 0)
 | 
			
		||||
		goto out_put_cfile;
 | 
			
		||||
 | 
			
		||||
	event->cft = __file_cft(cfile.file);
 | 
			
		||||
	if (IS_ERR(event->cft)) {
 | 
			
		||||
		ret = PTR_ERR(event->cft);
 | 
			
		||||
		goto out_put_cfile;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!event->cft->ss) {
 | 
			
		||||
		ret = -EBADF;
 | 
			
		||||
		goto out_put_cfile;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Determine the css of @cfile, verify it belongs to the same
 | 
			
		||||
	 * cgroup as cgroup.event_control, and associate @event with it.
 | 
			
		||||
	 * Remaining events are automatically removed on cgroup destruction
 | 
			
		||||
	 * but the removal is asynchronous, so take an extra ref.
 | 
			
		||||
	 */
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
 | 
			
		||||
	ret = -EINVAL;
 | 
			
		||||
	event->css = cgroup_css(cgrp, event->cft->ss);
 | 
			
		||||
	cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
 | 
			
		||||
	if (event->css && event->css == cfile_css && css_tryget(event->css))
 | 
			
		||||
		ret = 0;
 | 
			
		||||
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto out_put_cfile;
 | 
			
		||||
 | 
			
		||||
	if (!event->cft->register_event || !event->cft->unregister_event) {
 | 
			
		||||
		ret = -EINVAL;
 | 
			
		||||
		goto out_put_css;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = event->cft->register_event(event->css, event->cft,
 | 
			
		||||
			event->eventfd, buffer);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto out_put_css;
 | 
			
		||||
 | 
			
		||||
	efile.file->f_op->poll(efile.file, &event->pt);
 | 
			
		||||
 | 
			
		||||
	spin_lock(&cgrp->event_list_lock);
 | 
			
		||||
	list_add(&event->list, &cgrp->event_list);
 | 
			
		||||
	spin_unlock(&cgrp->event_list_lock);
 | 
			
		||||
 | 
			
		||||
	fdput(cfile);
 | 
			
		||||
	fdput(efile);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
out_put_css:
 | 
			
		||||
	css_put(event->css);
 | 
			
		||||
out_put_cfile:
 | 
			
		||||
	fdput(cfile);
 | 
			
		||||
out_put_eventfd:
 | 
			
		||||
	eventfd_ctx_put(event->eventfd);
 | 
			
		||||
out_put_efile:
 | 
			
		||||
	fdput(efile);
 | 
			
		||||
out_kfree:
 | 
			
		||||
	kfree(event);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
 | 
			
		||||
				      struct cftype *cft)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -4135,11 +3894,6 @@ static struct cftype cgroup_base_files[] = {
 | 
			
		|||
		.release = cgroup_pidlist_release,
 | 
			
		||||
		.mode = S_IRUGO | S_IWUSR,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		.name = "cgroup.event_control",
 | 
			
		||||
		.write_string = cgroup_write_event_control,
 | 
			
		||||
		.mode = S_IWUGO,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		.name = "cgroup.clone_children",
 | 
			
		||||
		.flags = CFTYPE_INSANE,
 | 
			
		||||
| 
						 | 
				
			
			@ -4610,7 +4364,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 | 
			
		|||
	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
 | 
			
		||||
{
 | 
			
		||||
	struct dentry *d = cgrp->dentry;
 | 
			
		||||
	struct cgroup_event *event, *tmp;
 | 
			
		||||
	struct cgroup_subsys *ss;
 | 
			
		||||
	struct cgroup *child;
 | 
			
		||||
	bool empty;
 | 
			
		||||
| 
						 | 
				
			
			@ -4685,18 +4438,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 | 
			
		|||
	dget(d);
 | 
			
		||||
	cgroup_d_remove_dir(d);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Unregister events and notify userspace.
 | 
			
		||||
	 * Notify userspace about cgroup removing only after rmdir of cgroup
 | 
			
		||||
	 * directory to avoid race between userspace and kernelspace.
 | 
			
		||||
	 */
 | 
			
		||||
	spin_lock(&cgrp->event_list_lock);
 | 
			
		||||
	list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
 | 
			
		||||
		list_del_init(&event->list);
 | 
			
		||||
		schedule_work(&event->remove);
 | 
			
		||||
	}
 | 
			
		||||
	spin_unlock(&cgrp->event_list_lock);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										355
									
								
								mm/memcontrol.c
									
									
									
									
									
								
							
							
						
						
									
										355
									
								
								mm/memcontrol.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -45,6 +45,7 @@
 | 
			
		|||
#include <linux/swapops.h>
 | 
			
		||||
#include <linux/spinlock.h>
 | 
			
		||||
#include <linux/eventfd.h>
 | 
			
		||||
#include <linux/poll.h>
 | 
			
		||||
#include <linux/sort.h>
 | 
			
		||||
#include <linux/fs.h>
 | 
			
		||||
#include <linux/seq_file.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -55,6 +56,7 @@
 | 
			
		|||
#include <linux/cpu.h>
 | 
			
		||||
#include <linux/oom.h>
 | 
			
		||||
#include <linux/lockdep.h>
 | 
			
		||||
#include <linux/file.h>
 | 
			
		||||
#include "internal.h"
 | 
			
		||||
#include <net/sock.h>
 | 
			
		||||
#include <net/ip.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -227,6 +229,46 @@ struct mem_cgroup_eventfd_list {
 | 
			
		|||
	struct eventfd_ctx *eventfd;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * cgroup_event represents events which userspace want to receive.
 | 
			
		||||
 */
 | 
			
		||||
struct mem_cgroup_event {
 | 
			
		||||
	/*
 | 
			
		||||
	 * memcg which the event belongs to.
 | 
			
		||||
	 */
 | 
			
		||||
	struct mem_cgroup *memcg;
 | 
			
		||||
	/*
 | 
			
		||||
	 * eventfd to signal userspace about the event.
 | 
			
		||||
	 */
 | 
			
		||||
	struct eventfd_ctx *eventfd;
 | 
			
		||||
	/*
 | 
			
		||||
	 * Each of these stored in a list by the cgroup.
 | 
			
		||||
	 */
 | 
			
		||||
	struct list_head list;
 | 
			
		||||
	/*
 | 
			
		||||
	 * register_event() callback will be used to add new userspace
 | 
			
		||||
	 * waiter for changes related to this event.  Use eventfd_signal()
 | 
			
		||||
	 * on eventfd to send notification to userspace.
 | 
			
		||||
	 */
 | 
			
		||||
	int (*register_event)(struct mem_cgroup *memcg,
 | 
			
		||||
			      struct eventfd_ctx *eventfd, const char *args);
 | 
			
		||||
	/*
 | 
			
		||||
	 * unregister_event() callback will be called when userspace closes
 | 
			
		||||
	 * the eventfd or on cgroup removing.  This callback must be set,
 | 
			
		||||
	 * if you want provide notification functionality.
 | 
			
		||||
	 */
 | 
			
		||||
	void (*unregister_event)(struct mem_cgroup *memcg,
 | 
			
		||||
				 struct eventfd_ctx *eventfd);
 | 
			
		||||
	/*
 | 
			
		||||
	 * All fields below needed to unregister event when
 | 
			
		||||
	 * userspace closes eventfd.
 | 
			
		||||
	 */
 | 
			
		||||
	poll_table pt;
 | 
			
		||||
	wait_queue_head_t *wqh;
 | 
			
		||||
	wait_queue_t wait;
 | 
			
		||||
	struct work_struct remove;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void mem_cgroup_threshold(struct mem_cgroup *memcg);
 | 
			
		||||
static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -331,6 +373,10 @@ struct mem_cgroup {
 | 
			
		|||
	atomic_t	numainfo_updating;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	/* List of events which userspace want to receive */
 | 
			
		||||
	struct list_head event_list;
 | 
			
		||||
	spinlock_t event_list_lock;
 | 
			
		||||
 | 
			
		||||
	struct mem_cgroup_per_node *nodeinfo[0];
 | 
			
		||||
	/* WARNING: nodeinfo must be the last member here */
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			@ -490,11 +536,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
 | 
			
		|||
	return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css)
 | 
			
		||||
{
 | 
			
		||||
	return &mem_cgroup_from_css(css)->vmpressure;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 | 
			
		||||
{
 | 
			
		||||
	return (memcg == root_mem_cgroup);
 | 
			
		||||
| 
						 | 
				
			
			@ -5648,13 +5689,11 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
 | 
			
		|||
		mem_cgroup_oom_notify_cb(iter);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css,
 | 
			
		||||
	struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
 | 
			
		||||
static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
 | 
			
		||||
	struct eventfd_ctx *eventfd, const char *args, enum res_type type)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 | 
			
		||||
	struct mem_cgroup_thresholds *thresholds;
 | 
			
		||||
	struct mem_cgroup_threshold_ary *new;
 | 
			
		||||
	enum res_type type = MEMFILE_TYPE(cft->private);
 | 
			
		||||
	u64 threshold, usage;
 | 
			
		||||
	int i, size, ret;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -5731,13 +5770,23 @@ static int mem_cgroup_usage_register_event(struct cgroup_subsys_state *css,
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css,
 | 
			
		||||
	struct cftype *cft, struct eventfd_ctx *eventfd)
 | 
			
		||||
static int mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
 | 
			
		||||
	struct eventfd_ctx *eventfd, const char *args)
 | 
			
		||||
{
 | 
			
		||||
	return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEM);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int memsw_cgroup_usage_register_event(struct mem_cgroup *memcg,
 | 
			
		||||
	struct eventfd_ctx *eventfd, const char *args)
 | 
			
		||||
{
 | 
			
		||||
	return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEMSWAP);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
 | 
			
		||||
	struct eventfd_ctx *eventfd, enum res_type type)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 | 
			
		||||
	struct mem_cgroup_thresholds *thresholds;
 | 
			
		||||
	struct mem_cgroup_threshold_ary *new;
 | 
			
		||||
	enum res_type type = MEMFILE_TYPE(cft->private);
 | 
			
		||||
	u64 usage;
 | 
			
		||||
	int i, j, size;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -5810,14 +5859,23 @@ static void mem_cgroup_usage_unregister_event(struct cgroup_subsys_state *css,
 | 
			
		|||
	mutex_unlock(&memcg->thresholds_lock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css,
 | 
			
		||||
	struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
 | 
			
		||||
static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
 | 
			
		||||
	struct eventfd_ctx *eventfd)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 | 
			
		||||
	struct mem_cgroup_eventfd_list *event;
 | 
			
		||||
	enum res_type type = MEMFILE_TYPE(cft->private);
 | 
			
		||||
	return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
 | 
			
		||||
	struct eventfd_ctx *eventfd)
 | 
			
		||||
{
 | 
			
		||||
	return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg,
 | 
			
		||||
	struct eventfd_ctx *eventfd, const char *args)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup_eventfd_list *event;
 | 
			
		||||
 | 
			
		||||
	BUG_ON(type != _OOM_TYPE);
 | 
			
		||||
	event = kmalloc(sizeof(*event),	GFP_KERNEL);
 | 
			
		||||
	if (!event)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
| 
						 | 
				
			
			@ -5835,14 +5893,10 @@ static int mem_cgroup_oom_register_event(struct cgroup_subsys_state *css,
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void mem_cgroup_oom_unregister_event(struct cgroup_subsys_state *css,
 | 
			
		||||
	struct cftype *cft, struct eventfd_ctx *eventfd)
 | 
			
		||||
static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg,
 | 
			
		||||
	struct eventfd_ctx *eventfd)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 | 
			
		||||
	struct mem_cgroup_eventfd_list *ev, *tmp;
 | 
			
		||||
	enum res_type type = MEMFILE_TYPE(cft->private);
 | 
			
		||||
 | 
			
		||||
	BUG_ON(type != _OOM_TYPE);
 | 
			
		||||
 | 
			
		||||
	spin_lock(&memcg_oom_lock);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -5959,13 +6013,233 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
 | 
			
		|||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * DO NOT USE IN NEW FILES.
 | 
			
		||||
 *
 | 
			
		||||
 * "cgroup.event_control" implementation.
 | 
			
		||||
 *
 | 
			
		||||
 * This is way over-engineered.  It tries to support fully configurable
 | 
			
		||||
 * events for each user.  Such level of flexibility is completely
 | 
			
		||||
 * unnecessary especially in the light of the planned unified hierarchy.
 | 
			
		||||
 *
 | 
			
		||||
 * Please deprecate this and replace with something simpler if at all
 | 
			
		||||
 * possible.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Unregister event and free resources.
 | 
			
		||||
 *
 | 
			
		||||
 * Gets called from workqueue.
 | 
			
		||||
 */
 | 
			
		||||
static void memcg_event_remove(struct work_struct *work)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup_event *event =
 | 
			
		||||
		container_of(work, struct mem_cgroup_event, remove);
 | 
			
		||||
	struct mem_cgroup *memcg = event->memcg;
 | 
			
		||||
 | 
			
		||||
	remove_wait_queue(event->wqh, &event->wait);
 | 
			
		||||
 | 
			
		||||
	event->unregister_event(memcg, event->eventfd);
 | 
			
		||||
 | 
			
		||||
	/* Notify userspace the event is going away. */
 | 
			
		||||
	eventfd_signal(event->eventfd, 1);
 | 
			
		||||
 | 
			
		||||
	eventfd_ctx_put(event->eventfd);
 | 
			
		||||
	kfree(event);
 | 
			
		||||
	css_put(&memcg->css);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Gets called on POLLHUP on eventfd when user closes it.
 | 
			
		||||
 *
 | 
			
		||||
 * Called with wqh->lock held and interrupts disabled.
 | 
			
		||||
 */
 | 
			
		||||
static int memcg_event_wake(wait_queue_t *wait, unsigned mode,
 | 
			
		||||
			    int sync, void *key)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup_event *event =
 | 
			
		||||
		container_of(wait, struct mem_cgroup_event, wait);
 | 
			
		||||
	struct mem_cgroup *memcg = event->memcg;
 | 
			
		||||
	unsigned long flags = (unsigned long)key;
 | 
			
		||||
 | 
			
		||||
	if (flags & POLLHUP) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * If the event has been detached at cgroup removal, we
 | 
			
		||||
		 * can simply return knowing the other side will cleanup
 | 
			
		||||
		 * for us.
 | 
			
		||||
		 *
 | 
			
		||||
		 * We can't race against event freeing since the other
 | 
			
		||||
		 * side will require wqh->lock via remove_wait_queue(),
 | 
			
		||||
		 * which we hold.
 | 
			
		||||
		 */
 | 
			
		||||
		spin_lock(&memcg->event_list_lock);
 | 
			
		||||
		if (!list_empty(&event->list)) {
 | 
			
		||||
			list_del_init(&event->list);
 | 
			
		||||
			/*
 | 
			
		||||
			 * We are in atomic context, but cgroup_event_remove()
 | 
			
		||||
			 * may sleep, so we have to call it in workqueue.
 | 
			
		||||
			 */
 | 
			
		||||
			schedule_work(&event->remove);
 | 
			
		||||
		}
 | 
			
		||||
		spin_unlock(&memcg->event_list_lock);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void memcg_event_ptable_queue_proc(struct file *file,
 | 
			
		||||
		wait_queue_head_t *wqh, poll_table *pt)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup_event *event =
 | 
			
		||||
		container_of(pt, struct mem_cgroup_event, pt);
 | 
			
		||||
 | 
			
		||||
	event->wqh = wqh;
 | 
			
		||||
	add_wait_queue(wqh, &event->wait);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * DO NOT USE IN NEW FILES.
 | 
			
		||||
 *
 | 
			
		||||
 * Parse input and register new cgroup event handler.
 | 
			
		||||
 *
 | 
			
		||||
 * Input must be in format '<event_fd> <control_fd> <args>'.
 | 
			
		||||
 * Interpretation of args is defined by control file implementation.
 | 
			
		||||
 */
 | 
			
		||||
static int memcg_write_event_control(struct cgroup_subsys_state *css,
 | 
			
		||||
				     struct cftype *cft, const char *buffer)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 | 
			
		||||
	struct mem_cgroup_event *event;
 | 
			
		||||
	struct cgroup_subsys_state *cfile_css;
 | 
			
		||||
	unsigned int efd, cfd;
 | 
			
		||||
	struct fd efile;
 | 
			
		||||
	struct fd cfile;
 | 
			
		||||
	const char *name;
 | 
			
		||||
	char *endp;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	efd = simple_strtoul(buffer, &endp, 10);
 | 
			
		||||
	if (*endp != ' ')
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	buffer = endp + 1;
 | 
			
		||||
 | 
			
		||||
	cfd = simple_strtoul(buffer, &endp, 10);
 | 
			
		||||
	if ((*endp != ' ') && (*endp != '\0'))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	buffer = endp + 1;
 | 
			
		||||
 | 
			
		||||
	event = kzalloc(sizeof(*event), GFP_KERNEL);
 | 
			
		||||
	if (!event)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	event->memcg = memcg;
 | 
			
		||||
	INIT_LIST_HEAD(&event->list);
 | 
			
		||||
	init_poll_funcptr(&event->pt, memcg_event_ptable_queue_proc);
 | 
			
		||||
	init_waitqueue_func_entry(&event->wait, memcg_event_wake);
 | 
			
		||||
	INIT_WORK(&event->remove, memcg_event_remove);
 | 
			
		||||
 | 
			
		||||
	efile = fdget(efd);
 | 
			
		||||
	if (!efile.file) {
 | 
			
		||||
		ret = -EBADF;
 | 
			
		||||
		goto out_kfree;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	event->eventfd = eventfd_ctx_fileget(efile.file);
 | 
			
		||||
	if (IS_ERR(event->eventfd)) {
 | 
			
		||||
		ret = PTR_ERR(event->eventfd);
 | 
			
		||||
		goto out_put_efile;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cfile = fdget(cfd);
 | 
			
		||||
	if (!cfile.file) {
 | 
			
		||||
		ret = -EBADF;
 | 
			
		||||
		goto out_put_eventfd;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* the process need read permission on control file */
 | 
			
		||||
	/* AV: shouldn't we check that it's been opened for read instead? */
 | 
			
		||||
	ret = inode_permission(file_inode(cfile.file), MAY_READ);
 | 
			
		||||
	if (ret < 0)
 | 
			
		||||
		goto out_put_cfile;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Determine the event callbacks and set them in @event.  This used
 | 
			
		||||
	 * to be done via struct cftype but cgroup core no longer knows
 | 
			
		||||
	 * about these events.  The following is crude but the whole thing
 | 
			
		||||
	 * is for compatibility anyway.
 | 
			
		||||
	 *
 | 
			
		||||
	 * DO NOT ADD NEW FILES.
 | 
			
		||||
	 */
 | 
			
		||||
	name = cfile.file->f_dentry->d_name.name;
 | 
			
		||||
 | 
			
		||||
	if (!strcmp(name, "memory.usage_in_bytes")) {
 | 
			
		||||
		event->register_event = mem_cgroup_usage_register_event;
 | 
			
		||||
		event->unregister_event = mem_cgroup_usage_unregister_event;
 | 
			
		||||
	} else if (!strcmp(name, "memory.oom_control")) {
 | 
			
		||||
		event->register_event = mem_cgroup_oom_register_event;
 | 
			
		||||
		event->unregister_event = mem_cgroup_oom_unregister_event;
 | 
			
		||||
	} else if (!strcmp(name, "memory.pressure_level")) {
 | 
			
		||||
		event->register_event = vmpressure_register_event;
 | 
			
		||||
		event->unregister_event = vmpressure_unregister_event;
 | 
			
		||||
	} else if (!strcmp(name, "memory.memsw.usage_in_bytes")) {
 | 
			
		||||
		event->register_event = memsw_cgroup_usage_register_event;
 | 
			
		||||
		event->unregister_event = memsw_cgroup_usage_unregister_event;
 | 
			
		||||
	} else {
 | 
			
		||||
		ret = -EINVAL;
 | 
			
		||||
		goto out_put_cfile;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Verify @cfile should belong to @css.  Also, remaining events are
 | 
			
		||||
	 * automatically removed on cgroup destruction but the removal is
 | 
			
		||||
	 * asynchronous, so take an extra ref on @css.
 | 
			
		||||
	 */
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
 | 
			
		||||
	ret = -EINVAL;
 | 
			
		||||
	cfile_css = css_from_dir(cfile.file->f_dentry->d_parent,
 | 
			
		||||
				 &mem_cgroup_subsys);
 | 
			
		||||
	if (cfile_css == css && css_tryget(css))
 | 
			
		||||
		ret = 0;
 | 
			
		||||
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto out_put_cfile;
 | 
			
		||||
 | 
			
		||||
	ret = event->register_event(memcg, event->eventfd, buffer);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto out_put_css;
 | 
			
		||||
 | 
			
		||||
	efile.file->f_op->poll(efile.file, &event->pt);
 | 
			
		||||
 | 
			
		||||
	spin_lock(&memcg->event_list_lock);
 | 
			
		||||
	list_add(&event->list, &memcg->event_list);
 | 
			
		||||
	spin_unlock(&memcg->event_list_lock);
 | 
			
		||||
 | 
			
		||||
	fdput(cfile);
 | 
			
		||||
	fdput(efile);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
out_put_css:
 | 
			
		||||
	css_put(css);
 | 
			
		||||
out_put_cfile:
 | 
			
		||||
	fdput(cfile);
 | 
			
		||||
out_put_eventfd:
 | 
			
		||||
	eventfd_ctx_put(event->eventfd);
 | 
			
		||||
out_put_efile:
 | 
			
		||||
	fdput(efile);
 | 
			
		||||
out_kfree:
 | 
			
		||||
	kfree(event);
 | 
			
		||||
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct cftype mem_cgroup_files[] = {
 | 
			
		||||
	{
 | 
			
		||||
		.name = "usage_in_bytes",
 | 
			
		||||
		.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
 | 
			
		||||
		.read = mem_cgroup_read,
 | 
			
		||||
		.register_event = mem_cgroup_usage_register_event,
 | 
			
		||||
		.unregister_event = mem_cgroup_usage_unregister_event,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		.name = "max_usage_in_bytes",
 | 
			
		||||
| 
						 | 
				
			
			@ -6005,6 +6279,12 @@ static struct cftype mem_cgroup_files[] = {
 | 
			
		|||
		.write_u64 = mem_cgroup_hierarchy_write,
 | 
			
		||||
		.read_u64 = mem_cgroup_hierarchy_read,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		.name = "cgroup.event_control",		/* XXX: for compat */
 | 
			
		||||
		.write_string = memcg_write_event_control,
 | 
			
		||||
		.flags = CFTYPE_NO_PREFIX,
 | 
			
		||||
		.mode = S_IWUGO,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		.name = "swappiness",
 | 
			
		||||
		.read_u64 = mem_cgroup_swappiness_read,
 | 
			
		||||
| 
						 | 
				
			
			@ -6019,14 +6299,10 @@ static struct cftype mem_cgroup_files[] = {
 | 
			
		|||
		.name = "oom_control",
 | 
			
		||||
		.read_map = mem_cgroup_oom_control_read,
 | 
			
		||||
		.write_u64 = mem_cgroup_oom_control_write,
 | 
			
		||||
		.register_event = mem_cgroup_oom_register_event,
 | 
			
		||||
		.unregister_event = mem_cgroup_oom_unregister_event,
 | 
			
		||||
		.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		.name = "pressure_level",
 | 
			
		||||
		.register_event = vmpressure_register_event,
 | 
			
		||||
		.unregister_event = vmpressure_unregister_event,
 | 
			
		||||
	},
 | 
			
		||||
#ifdef CONFIG_NUMA
 | 
			
		||||
	{
 | 
			
		||||
| 
						 | 
				
			
			@ -6074,8 +6350,6 @@ static struct cftype memsw_cgroup_files[] = {
 | 
			
		|||
		.name = "memsw.usage_in_bytes",
 | 
			
		||||
		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
 | 
			
		||||
		.read = mem_cgroup_read,
 | 
			
		||||
		.register_event = mem_cgroup_usage_register_event,
 | 
			
		||||
		.unregister_event = mem_cgroup_usage_unregister_event,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		.name = "memsw.max_usage_in_bytes",
 | 
			
		||||
| 
						 | 
				
			
			@ -6265,6 +6539,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 | 
			
		|||
	mutex_init(&memcg->thresholds_lock);
 | 
			
		||||
	spin_lock_init(&memcg->move_lock);
 | 
			
		||||
	vmpressure_init(&memcg->vmpressure);
 | 
			
		||||
	INIT_LIST_HEAD(&memcg->event_list);
 | 
			
		||||
	spin_lock_init(&memcg->event_list_lock);
 | 
			
		||||
 | 
			
		||||
	return &memcg->css;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -6340,6 +6616,19 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
 | 
			
		|||
static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 | 
			
		||||
{
 | 
			
		||||
	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 | 
			
		||||
	struct mem_cgroup_event *event, *tmp;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Unregister events and notify userspace.
 | 
			
		||||
	 * Notify userspace about cgroup removing only after rmdir of cgroup
 | 
			
		||||
	 * directory to avoid race between userspace and kernelspace.
 | 
			
		||||
	 */
 | 
			
		||||
	spin_lock(&memcg->event_list_lock);
 | 
			
		||||
	list_for_each_entry_safe(event, tmp, &memcg->event_list, list) {
 | 
			
		||||
		list_del_init(&event->list);
 | 
			
		||||
		schedule_work(&event->remove);
 | 
			
		||||
	}
 | 
			
		||||
	spin_unlock(&memcg->event_list_lock);
 | 
			
		||||
 | 
			
		||||
	kmem_cgroup_css_offline(memcg);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -278,8 +278,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
 | 
			
		|||
 | 
			
		||||
/**
 | 
			
		||||
 * vmpressure_register_event() - Bind vmpressure notifications to an eventfd
 | 
			
		||||
 * @css:	css that is interested in vmpressure notifications
 | 
			
		||||
 * @cft:	cgroup control files handle
 | 
			
		||||
 * @memcg:	memcg that is interested in vmpressure notifications
 | 
			
		||||
 * @eventfd:	eventfd context to link notifications with
 | 
			
		||||
 * @args:	event arguments (used to set up a pressure level threshold)
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -289,15 +288,12 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
 | 
			
		|||
 * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or
 | 
			
		||||
 * "critical").
 | 
			
		||||
 *
 | 
			
		||||
 * This function should not be used directly, just pass it to (struct
 | 
			
		||||
 * cftype).register_event, and then cgroup core will handle everything by
 | 
			
		||||
 * itself.
 | 
			
		||||
 * To be used as memcg event method.
 | 
			
		||||
 */
 | 
			
		||||
int vmpressure_register_event(struct cgroup_subsys_state *css,
 | 
			
		||||
			      struct cftype *cft, struct eventfd_ctx *eventfd,
 | 
			
		||||
			      const char *args)
 | 
			
		||||
int vmpressure_register_event(struct mem_cgroup *memcg,
 | 
			
		||||
			      struct eventfd_ctx *eventfd, const char *args)
 | 
			
		||||
{
 | 
			
		||||
	struct vmpressure *vmpr = css_to_vmpressure(css);
 | 
			
		||||
	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
 | 
			
		||||
	struct vmpressure_event *ev;
 | 
			
		||||
	int level;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -325,23 +321,19 @@ int vmpressure_register_event(struct cgroup_subsys_state *css,
 | 
			
		|||
 | 
			
		||||
/**
 | 
			
		||||
 * vmpressure_unregister_event() - Unbind eventfd from vmpressure
 | 
			
		||||
 * @css:	css handle
 | 
			
		||||
 * @cft:	cgroup control files handle
 | 
			
		||||
 * @memcg:	memcg handle
 | 
			
		||||
 * @eventfd:	eventfd context that was used to link vmpressure with the @cg
 | 
			
		||||
 *
 | 
			
		||||
 * This function does internal manipulations to detach the @eventfd from
 | 
			
		||||
 * the vmpressure notifications, and then frees internal resources
 | 
			
		||||
 * associated with the @eventfd (but the @eventfd itself is not freed).
 | 
			
		||||
 *
 | 
			
		||||
 * This function should not be used directly, just pass it to (struct
 | 
			
		||||
 * cftype).unregister_event, and then cgroup core will handle everything
 | 
			
		||||
 * by itself.
 | 
			
		||||
 * To be used as memcg event method.
 | 
			
		||||
 */
 | 
			
		||||
void vmpressure_unregister_event(struct cgroup_subsys_state *css,
 | 
			
		||||
				 struct cftype *cft,
 | 
			
		||||
void vmpressure_unregister_event(struct mem_cgroup *memcg,
 | 
			
		||||
				 struct eventfd_ctx *eventfd)
 | 
			
		||||
{
 | 
			
		||||
	struct vmpressure *vmpr = css_to_vmpressure(css);
 | 
			
		||||
	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
 | 
			
		||||
	struct vmpressure_event *ev;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&vmpr->events_lock);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue